Add typos checks (#770)

2025-02-25 23:51:15 +01:00
parent 8699a28be0
commit a1809ad3de
47 changed files with 114 additions and 82 deletions
--- a/lerobot/common/datasets/compute_stats.py
+++ b/lerobot/common/datasets/compute_stats.py
@@ -92,7 +92,7 @@ def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], featu
            axes_to_reduce = (0, 2, 3)  # keep channel dim
            keepdims = True
        else:
-            ep_ft_array = data  # data is alreay a np.ndarray
+            ep_ft_array = data  # data is already a np.ndarray
            axes_to_reduce = 0  # compute stats over the first axis
            keepdims = data.ndim == 1  # keep as np.array

--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -226,7 +226,7 @@ class LeRobotDatasetMetadata:

    def add_task(self, task: str):
        """
-        Given a task in natural language, add it to the dictionnary of tasks.
+        Given a task in natural language, add it to the dictionary of tasks.
        """
        if task in self.task_to_task_index:
            raise ValueError(f"The task '{task}' already exists and can't be added twice.")
@@ -389,7 +389,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
                - info contains various information about the dataset like shapes, keys, fps etc.
                - stats stores the dataset statistics of the different modalities for normalization
                - tasks contains the prompts for each task of the dataset, which can be used for
-                  task-conditionned training.
+                  task-conditioned training.
            - hf_dataset (from datasets.Dataset), which will read any values from parquet files.
            - videos (optional) from which frames are loaded to be synchronous with data from parquet files.

@@ -848,7 +848,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
        episode_buffer["episode_index"] = np.full((episode_length,), episode_index)

-        # Add new tasks to the tasks dictionnary
+        # Add new tasks to the tasks dictionary
        for task in episode_tasks:
            task_index = self.meta.get_task_index(task)
            if task_index is None:
--- a/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py
@@ -152,7 +152,7 @@ def download_raw(raw_dir: Path, repo_id: str):
            stacklevel=1,
        )

-    # Send warning if raw_dir isn't well formated
+    # Send warning if raw_dir isn't well formatted
    if raw_dir.parts[-2] != user_id or raw_dir.parts[-1] != dataset_id:
        warnings.warn(
            f"""`raw_dir` ({raw_dir}) doesn't contain a community or user id `/` the name of the dataset that
--- a/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py
@@ -68,9 +68,9 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
            modality_df,
            on="timestamp_utc",
            # "nearest" is the best option over "backward", since the latter can desynchronizes camera timestamps by
-            # matching timestamps that are too far appart, in order to fit the backward constraints. It's not the case for "nearest".
+            # matching timestamps that are too far apart, in order to fit the backward constraints. It's not the case for "nearest".
            # However, note that "nearest" might synchronize the reference camera with other cameras on slightly future timestamps.
-            # are too far appart.
+            # are too far apart.
            direction="nearest",
            tolerance=pd.Timedelta(f"{1 / fps} seconds"),
        )
@@ -126,7 +126,7 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
    videos_dir.parent.mkdir(parents=True, exist_ok=True)
    videos_dir.symlink_to((raw_dir / "videos").absolute())

-    # sanity check the video paths are well formated
+    # sanity check the video paths are well formatted
    for key in df:
        if "observation.images." not in key:
            continue
@@ -143,7 +143,7 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
            # it is the case for video_frame dictionary = [{"path": ..., "timestamp": ...}]
            data_dict[key] = [video_frame[0] for video_frame in df[key].values]

-            # sanity check the video path is well formated
+            # sanity check the video path is well formatted
            video_path = videos_dir.parent / data_dict[key][0]["path"]
            if not video_path.exists():
                raise ValueError(f"Video file not found in {video_path}")
--- a/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py
@@ -17,7 +17,7 @@
 For all datasets in the RLDS format.
 For https://github.com/google-deepmind/open_x_embodiment (OPENX) datasets.

-NOTE: You need to install tensorflow and tensorflow_datsets before running this script.
+NOTE: You need to install tensorflow and tensorflow_datasets before running this script.

 Example:
    python lerobot/scripts/push_dataset_to_hub.py \
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -222,7 +222,7 @@ def load_episodes(local_dir: Path) -> dict:


 def write_episode_stats(episode_index: int, episode_stats: dict, local_dir: Path):
-    # We wrap episode_stats in a dictionnary since `episode_stats["episode_index"]`
+    # We wrap episode_stats in a dictionary since `episode_stats["episode_index"]`
    # is a dictionary of stats and not an integer.
    episode_stats = {"episode_index": episode_index, "stats": serialize_dict(episode_stats)}
    append_jsonlines(episode_stats, local_dir / EPISODES_STATS_PATH)
@@ -445,10 +445,10 @@ def get_episode_data_index(
    if episodes is not None:
        episode_lengths = {ep_idx: episode_lengths[ep_idx] for ep_idx in episodes}

-    cumulative_lenghts = list(accumulate(episode_lengths.values()))
+    cumulative_lengths = list(accumulate(episode_lengths.values()))
    return {
-        "from": torch.LongTensor([0] + cumulative_lenghts[:-1]),
-        "to": torch.LongTensor(cumulative_lenghts),
+        "from": torch.LongTensor([0] + cumulative_lengths[:-1]),
+        "to": torch.LongTensor(cumulative_lengths),
    }


--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -31,6 +31,7 @@ from lerobot.common.robot_devices.robots.configs import AlohaRobotConfig

 LOCAL_DIR = Path("data/")

+# spellchecker:off
 ALOHA_MOBILE_INFO = {
    "robot_config": AlohaRobotConfig(),
    "license": "mit",
@@ -856,6 +857,7 @@ DATASETS = {
            }""").lstrip(),
    },
 }
+# spellchecker:on


 def batch_convert():
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -17,7 +17,7 @@
 """
 This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 1.6 to
 2.0. You will be required to provide the 'tasks', which is a short but accurate description in plain English
-for each of the task performed in the dataset. This will allow to easily train models with task-conditionning.
+for each of the task performed in the dataset. This will allow to easily train models with task-conditioning.

 We support 3 different scenarios for these tasks (see instructions below):
    1. Single task dataset: all episodes of your dataset have the same single task.
--- a/lerobot/common/datasets/video_utils.py
+++ b/lerobot/common/datasets/video_utils.py
@@ -73,7 +73,7 @@ def decode_video_frames_torchvision(
    last_ts = max(timestamps)

    # access closest key frame of the first requested frame
-    # Note: closest key frame timestamp is usally smaller than `first_ts` (e.g. key frame can be the first frame of the video)
+    # Note: closest key frame timestamp is usually smaller than `first_ts` (e.g. key frame can be the first frame of the video)
    # for details on what `seek` is doing see: https://pyav.basswood-io.com/docs/stable/api/container.html?highlight=inputcontainer#av.container.InputContainer.seek
    reader.seek(first_ts, keyframes_only=keyframes_only)