From ad115b6c27b095b3d1e7c291c3adf0b34d188e19 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 3 Oct 2024 20:00:44 +0200
Subject: [PATCH 01/59] WIP

---
 convert_dataset_16_to_20.py | 484 ++++++++++++++++++++++++++++++++++++
 1 file changed, 484 insertions(+)
 create mode 100644 convert_dataset_16_to_20.py

diff --git a/convert_dataset_16_to_20.py b/convert_dataset_16_to_20.py
new file mode 100644
index 00000000..fdb5f233
--- /dev/null
+++ b/convert_dataset_16_to_20.py
@@ -0,0 +1,484 @@
+"""
+This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 1.6 to
+2.0. You will be required to provide the 'tasks', which is a short but accurate description in plain English
+for each of the task performed in the dataset. This will allow to easily train models with task-conditionning.
+
+If your dataset contains a single task, you can provide it directly via the CLI with the '--task' option (see
+examples below).
+
+If your dataset is a multi-task dataset, TODO
+
+In any case, keep in mind that there should only be one task per episode. Multi-task episodes are not
+supported for now.
+
+Usage examples
+
+Single-task dataset:
+```bash
+python convert_dataset_16_to_20.py \
+    --repo-id lerobot/aloha_sim_insertion_human_image \
+    --task "Insert the peg into the socket." \
+    --robot-config lerobot/configs/robot/aloha.yaml
+```
+
+```bash
+python convert_dataset_16_to_20.py \
+    --repo-id aliberts/koch_tutorial \
+    --task "Pick the Lego block and drop it in the box on the right." \
+    --robot-config lerobot/configs/robot/koch.yaml \
+    --local-dir data
+```
+
+Multi-task dataset:
+TODO
+"""
+
+import argparse
+import json
+import math
+import subprocess
+from io import BytesIO
+from pathlib import Path
+
+import pyarrow as pa
+import pyarrow.compute as pc
+import pyarrow.parquet as pq
+import torch
+from huggingface_hub import HfApi
+from PIL import Image
+from safetensors.torch import load_file
+
+from lerobot.common.utils.utils import init_hydra_config
+
+V1_6 = "v1.6"
+V2_0 = "v2.0"
+
+PARQUET_PATH = "data/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
+VIDEO_PATH = "videos/{image_key}_episode_{episode_index:06d}.mp4"
+
+
+def parse_robot_config(config_path: Path, config_overrides: list[str] | None = None) -> tuple[str, dict]:
+    robot_cfg = init_hydra_config(config_path, config_overrides)
+    if robot_cfg["robot_type"] in ["aloha", "koch"]:
+        state_names = [
+            f"{arm}_{motor}" if len(robot_cfg["follower_arms"]) > 1 else motor
+            for arm in robot_cfg["follower_arms"]
+            for motor in robot_cfg["follower_arms"][arm]["motors"]
+        ]
+        action_names = [
+            # f"{arm}_{motor}" for arm in ["left", "right"] for motor in robot_cfg["leader_arms"][arm]["motors"]
+            f"{arm}_{motor}" if len(robot_cfg["leader_arms"]) > 1 else motor
+            for arm in robot_cfg["leader_arms"]
+            for motor in robot_cfg["leader_arms"][arm]["motors"]
+        ]
+    # elif robot_cfg["robot_type"] == "stretch3": TODO
+    else:
+        raise NotImplementedError(
+            "Please provide robot_config={'robot_type': ..., 'names': ...} directly to convert_dataset()."
+        )
+
+    return {
+        "robot_type": robot_cfg["robot_type"],
+        "names": {
+            "observation.state": state_names,
+            "action": action_names,
+        },
+    }
+
+
+def load_json(fpath: Path) -> dict:
+    with open(fpath) as f:
+        return json.load(f)
+
+
+def write_json(data: dict, fpath: Path) -> None:
+    fpath.parent.mkdir(exist_ok=True, parents=True)
+    with open(fpath, "w") as f:
+        json.dump(data, f, indent=4)
+
+
+def convert_stats_to_json(input_dir: Path, output_dir: Path) -> None:
+    safetensor_path = input_dir / "stats.safetensors"
+    stats = load_file(safetensor_path)
+    serializable_stats = {key: value.tolist() for key, value in stats.items()}
+
+    json_path = output_dir / "stats.json"
+    json_path.parent.mkdir(exist_ok=True, parents=True)
+    with open(json_path, "w") as f:
+        json.dump(serializable_stats, f, indent=4)
+
+    # Sanity check
+    with open(json_path) as f:
+        stats_json = json.load(f)
+
+    stats_json = {key: torch.tensor(value) for key, value in stats_json.items()}
+    for key in stats:
+        torch.testing.assert_close(stats_json[key], stats[key])
+
+
+def get_keys(table: pa.Table) -> dict[str, list]:
+    table_metadata = json.loads(table.schema.metadata[b"huggingface"].decode("utf-8"))
+    sequence_keys, image_keys, video_keys = [], [], []
+    for key, val in table_metadata["info"]["features"].items():
+        if val["_type"] == "Sequence":
+            sequence_keys.append(key)
+        elif val["_type"] == "Image":
+            image_keys.append(key)
+        elif val["_type"] == "VideoFrame":
+            video_keys.append(key)
+
+    return {
+        "sequence": sequence_keys,
+        "image": image_keys,
+        "video": video_keys,
+    }
+
+
+def remove_hf_metadata_features(table: pa.Table, features: list[str]) -> pa.Table:
+    # HACK
+    schema = table.schema
+    # decode bytes dict
+    table_metadata = json.loads(schema.metadata[b"huggingface"].decode("utf-8"))
+    for key in features:
+        table_metadata["info"]["features"].pop(key)
+
+    # re-encode bytes dict
+    table_metadata = {b"huggingface": json.dumps(table_metadata).encode("utf-8")}
+    new_schema = schema.with_metadata(table_metadata)
+    return table.replace_schema_metadata(new_schema.metadata)
+
+
+def add_hf_metadata_features(table: pa.Table, features: dict[str, dict]) -> pa.Table:
+    # HACK
+    schema = table.schema
+    # decode bytes dict
+    table_metadata = json.loads(schema.metadata[b"huggingface"].decode("utf-8"))
+    for key, val in features.items():
+        table_metadata["info"]["features"][key] = val
+
+    # re-encode bytes dict
+    table_metadata = {b"huggingface": json.dumps(table_metadata).encode("utf-8")}
+    new_schema = schema.with_metadata(table_metadata)
+    return table.replace_schema_metadata(new_schema.metadata)
+
+
+def remove_videoframe_from_table(table: pa.Table, image_columns: list) -> pa.Table:
+    table = table.drop(image_columns)
+    table = remove_hf_metadata_features(table, image_columns)
+    return table
+
+
+def add_tasks(table: pa.Table, tasks_by_episodes: dict) -> pa.Table:
+    tasks_index = pa.array([tasks_by_episodes.get(key.as_py(), None) for key in table["episode_index"]])
+    table = table.append_column("task_index", tasks_index)
+    hf_feature = {"task_index": {"dtype": "int64", "_type": "Value"}}
+    table = add_hf_metadata_features(table, hf_feature)
+    return table
+
+
+def split_parquet_by_episodes(
+    table: pa.Table, keys: dict[str, list], total_episodes: int, episode_indices: list, output_dir: Path
+) -> list:
+    (output_dir / "data").mkdir(exist_ok=True, parents=True)
+    if len(keys["video"]) > 0:
+        table = remove_videoframe_from_table(table, keys["video"])
+
+    episode_lengths = []
+    for episode_index in sorted(episode_indices):
+        # Write each episode_index to a new parquet file
+        filtered_table = table.filter(pc.equal(table["episode_index"], episode_index))
+        episode_lengths.insert(episode_index, len(filtered_table))
+        output_file = output_dir / PARQUET_PATH.format(
+            episode_index=episode_index, total_episodes=total_episodes
+        )
+        pq.write_table(filtered_table, output_file)
+
+    return episode_lengths
+
+
+def _get_audio_info(video_path: Path | str) -> dict:
+    ffprobe_audio_cmd = [
+        "ffprobe",
+        "-v",
+        "error",
+        "-select_streams",
+        "a:0",
+        "-show_entries",
+        "stream=channels,codec_name,bit_rate,sample_rate,bit_depth,channel_layout,duration",
+        "-of",
+        "json",
+        str(video_path),
+    ]
+    result = subprocess.run(ffprobe_audio_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"Error running ffprobe: {result.stderr}")
+
+    info = json.loads(result.stdout)
+    audio_stream_info = info["streams"][0] if info.get("streams") else None
+    if audio_stream_info is None:
+        return {"has_audio": False}
+
+    # Return the information, defaulting to None if no audio stream is present
+    return {
+        "has_audio": True,
+        "audio.channels": audio_stream_info.get("channels", None),
+        "audio.codec": audio_stream_info.get("codec_name", None),
+        "audio.bit_rate": int(audio_stream_info["bit_rate"]) if audio_stream_info.get("bit_rate") else None,
+        "audio.sample_rate": int(audio_stream_info["sample_rate"])
+        if audio_stream_info.get("sample_rate")
+        else None,
+        "audio.bit_depth": audio_stream_info.get("bit_depth", None),
+        "audio.channel_layout": audio_stream_info.get("channel_layout", None),
+    }
+
+
+def _get_video_info(video_path: Path | str) -> dict:
+    ffprobe_video_cmd = [
+        "ffprobe",
+        "-v",
+        "error",
+        "-select_streams",
+        "v:0",
+        "-show_entries",
+        "stream=r_frame_rate,width,height,codec_name,nb_frames,duration,pix_fmt",
+        "-of",
+        "json",
+        str(video_path),
+    ]
+    result = subprocess.run(ffprobe_video_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"Error running ffprobe: {result.stderr}")
+
+    info = json.loads(result.stdout)
+    video_stream_info = info["streams"][0]
+
+    # Calculate fps from r_frame_rate
+    r_frame_rate = video_stream_info["r_frame_rate"]
+    num, denom = map(int, r_frame_rate.split("/"))
+    fps = num / denom
+
+    video_info = {
+        "video.fps": fps,
+        "video.width": video_stream_info["width"],
+        "video.height": video_stream_info["height"],
+        "video.codec": video_stream_info["codec_name"],
+        "video.pix_fmt": video_stream_info["pix_fmt"],
+        **_get_audio_info(video_path),
+    }
+
+    return video_info
+
+
+def get_videos_info(repo_id: str, local_dir: Path, video_keys: list[str]) -> dict:
+    hub_api = HfApi()
+    videos_info_dict = {
+        "videos_path": VIDEO_PATH,
+        "has_audio": False,
+        "has_depth": False,
+    }
+    for vid_key in video_keys:
+        video_path = VIDEO_PATH.format(image_key=vid_key, episode_index=0)
+        video_path = hub_api.hf_hub_download(
+            repo_id=repo_id, repo_type="dataset", local_dir=local_dir, filename=video_path
+        )
+        videos_info_dict[vid_key] = _get_video_info(video_path)
+        videos_info_dict["has_audio"] = (
+            videos_info_dict["has_audio"] or videos_info_dict[vid_key]["has_audio"]
+        )
+
+    return videos_info_dict
+
+
+def get_video_shapes(videos_info: dict, video_keys: list) -> dict:
+    video_shapes = {}
+    for img_key in video_keys:
+        video_shapes[img_key] = {
+            "width": videos_info[img_key]["video.width"],
+            "height": videos_info[img_key]["video.height"],
+        }
+
+    return video_shapes
+
+
+def get_image_shapes(table: pa.Table, image_keys: list) -> dict:
+    image_shapes = {}
+    for img_key in image_keys:
+        image_bytes = table[img_key][0].as_py()  # Assuming first row
+        image = Image.open(BytesIO(image_bytes["bytes"]))
+        image_shapes[img_key] = {
+            "width": image.width,
+            "height": image.height,
+        }
+
+    return image_shapes
+
+
+def get_generic_motor_names(sequence_shapes: dict) -> dict:
+    return {key: [f"motor_{i}" for i in range(length)] for key, length in sequence_shapes.items()}
+
+
+def convert_dataset(
+    repo_id: str,
+    local_dir: Path,
+    tasks: dict,
+    tasks_by_episodes: dict | None = None,
+    robot_config: dict | None = None,
+):
+    v1_6_dir = local_dir / repo_id / V1_6
+    v2_0_dir = local_dir / repo_id / V2_0
+    v1_6_dir.mkdir(parents=True, exist_ok=True)
+    v2_0_dir.mkdir(parents=True, exist_ok=True)
+
+    hub_api = HfApi()
+    hub_api.snapshot_download(
+        repo_id=repo_id, repo_type="dataset", revision=V1_6, local_dir=v1_6_dir, ignore_patterns="videos/"
+    )
+
+    metadata_v1_6 = load_json(v1_6_dir / "meta_data" / "info.json")
+
+    table = pq.read_table(v1_6_dir / "data")
+    keys = get_keys(table)
+
+    # Episodes
+    episode_indices = sorted(table["episode_index"].unique().to_pylist())
+    total_episodes = len(episode_indices)
+    assert episode_indices == list(range(total_episodes))
+
+    # Tasks
+    if tasks_by_episodes is None:  # Single task dataset
+        tasks_by_episodes = {ep_idx: 0 for ep_idx in episode_indices}
+
+    assert set(tasks) == set(tasks_by_episodes.values())
+    table = add_tasks(table, tasks_by_episodes)
+    write_json(tasks, v2_0_dir / "meta" / "tasks.json")
+
+    # Split data into 1 parquet file by episode
+    episode_lengths = split_parquet_by_episodes(table, keys, total_episodes, episode_indices, v2_0_dir)
+
+    # Shapes
+    sequence_shapes = {key: len(table[key][0]) for key in keys["sequence"]}
+    image_shapes = get_image_shapes(table, keys["image"]) if len(keys["image"]) > 0 else {}
+    if len(keys["video"]) > 0:
+        assert metadata_v1_6.get("video", False)
+        videos_info = get_videos_info(repo_id, v1_6_dir, video_keys=keys["video"])
+        video_shapes = get_video_shapes(videos_info, keys["video"])
+        for img_key in keys["video"]:
+            assert videos_info[img_key]["video.pix_fmt"] == metadata_v1_6["encoding"]["pix_fmt"]
+            assert math.isclose(videos_info[img_key]["video.fps"], metadata_v1_6["fps"], rel_tol=1e-3)
+    else:
+        assert len(keys["video"]) == 0
+        videos_info = None
+        video_shapes = {}
+
+    # Names
+    if robot_config is not None:
+        robot_type = robot_config["robot_type"]
+        names = robot_config["names"]
+    else:
+        robot_type = "unknown"
+        names = get_generic_motor_names(sequence_shapes)
+
+    assert set(names) == set(keys["sequence"])
+    for key in sequence_shapes:
+        assert len(names[key]) == sequence_shapes[key]
+
+    # Episodes info
+    episodes = [
+        {"index": ep_idx, "task": tasks_by_episodes[ep_idx], "length": episode_lengths[ep_idx]}
+        for ep_idx in episode_indices
+    ]
+
+    # Assemble metadata v2.0
+    metadata_v2_0 = {
+        "codebase_version": V2_0,
+        "data_path": PARQUET_PATH,
+        "robot_type": robot_type,
+        "total_episodes": total_episodes,
+        "total_tasks": len(tasks),
+        "fps": metadata_v1_6["fps"],
+        "image_keys": keys["video"] + keys["image"],
+        "keys": keys["sequence"],
+        "shapes": {**image_shapes, **video_shapes, **sequence_shapes},
+        "names": names,
+        "videos": videos_info,
+        "episodes": episodes,
+    }
+    write_json(metadata_v2_0, v2_0_dir / "meta" / "info.json")
+
+    convert_stats_to_json(v1_6_dir / "meta_data", v2_0_dir / "meta")
+
+    # test_repo_id = f"aliberts/{repo_id.split('/')[1]}"
+    # if hub_api.repo_exists(test_repo_id, repo_type="dataset"):
+    #     hub_api.delete_repo(test_repo_id, repo_type="dataset")
+
+    # hub_api.create_repo(test_repo_id, repo_type="dataset", exist_ok=True)
+    # hub_api.upload_folder(repo_id=test_repo_id, folder_path=v2_0_dir, repo_type="dataset")
+
+    # TODO:
+    # - [X] Add shapes
+    # - [X] Add keys
+    # - [X] Add paths
+    # - [X] convert stats.json
+    # - [X] Add task.json
+    # - [X] Add names
+    # - [X] Add robot_type
+    # - [/] Add sanity checks (encoding, shapes)
+    # - [ ] Handle multitask datasets
+    # - [ ] Push properly to branch v2.0 and delete v1.6 stuff from that branch
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--repo-id",
+        type=str,
+        required=True,
+        help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset (e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).",
+    )
+    parser.add_argument(
+        "--task",
+        type=str,
+        required=True,
+        help="A short but accurate description of the task performed in the dataset.",
+    )
+    parser.add_argument(
+        "--robot-config",
+        type=Path,
+        default=None,
+        help="Path to the robot's config yaml the dataset during conversion.",
+    )
+    parser.add_argument(
+        "--robot-overrides",
+        type=str,
+        nargs="*",
+        help="Any key=value arguments to override the robot config values (use dots for.nested=overrides)",
+    )
+    parser.add_argument(
+        "--local-dir",
+        type=Path,
+        default=None,
+        help="Local directory to store the dataset during conversion. Defaults to /tmp/{repo_id}",
+    )
+
+    args = parser.parse_args()
+    if args.local_dir is None:
+        args.local_dir = Path(f"/tmp/{args.repo_id}")
+
+    tasks = {0: args.task}
+    del args.task
+
+    if args.robot_config is not None:
+        robot_config = parse_robot_config(args.robot_config, args.robot_overrides)
+    else:
+        robot_config = None
+    del args.robot_config, args.robot_overrides
+
+    convert_dataset(**vars(args), tasks=tasks, robot_config=robot_config)
+
+
+if __name__ == "__main__":
+    from time import sleep
+
+    sleep(1)
+    main()

From 1016a983a10c9d41c8145d10217786b1f2eea57e Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 4 Oct 2024 14:26:50 +0200
Subject: [PATCH 02/59] Add upload folders

---
 convert_dataset_16_to_20.py | 39 ++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/convert_dataset_16_to_20.py b/convert_dataset_16_to_20.py
index fdb5f233..c53f7595 100644
--- a/convert_dataset_16_to_20.py
+++ b/convert_dataset_16_to_20.py
@@ -34,6 +34,7 @@ TODO
 """
 
 import argparse
+import contextlib
 import json
 import math
 import subprocess
@@ -45,10 +46,13 @@ import pyarrow.compute as pc
 import pyarrow.parquet as pq
 import torch
 from huggingface_hub import HfApi
+from huggingface_hub.errors import EntryNotFoundError
 from PIL import Image
 from safetensors.torch import load_file
 
+from lerobot.common.datasets.utils import create_branch
 from lerobot.common.utils.utils import init_hydra_config
+from lerobot.scripts.push_dataset_to_hub import push_dataset_card_to_hub
 
 V1_6 = "v1.6"
 V2_0 = "v2.0"
@@ -374,9 +378,11 @@ def convert_dataset(
     if robot_config is not None:
         robot_type = robot_config["robot_type"]
         names = robot_config["names"]
+        repo_tags = [robot_type]
     else:
         robot_type = "unknown"
         names = get_generic_motor_names(sequence_shapes)
+        repo_tags = None
 
     assert set(names) == set(keys["sequence"])
     for key in sequence_shapes:
@@ -396,6 +402,7 @@ def convert_dataset(
         "total_episodes": total_episodes,
         "total_tasks": len(tasks),
         "fps": metadata_v1_6["fps"],
+        "splits": {"train": f"0:{total_episodes}"},
         "image_keys": keys["video"] + keys["image"],
         "keys": keys["sequence"],
         "shapes": {**image_shapes, **video_shapes, **sequence_shapes},
@@ -404,15 +411,32 @@ def convert_dataset(
         "episodes": episodes,
     }
     write_json(metadata_v2_0, v2_0_dir / "meta" / "info.json")
-
     convert_stats_to_json(v1_6_dir / "meta_data", v2_0_dir / "meta")
 
-    # test_repo_id = f"aliberts/{repo_id.split('/')[1]}"
-    # if hub_api.repo_exists(test_repo_id, repo_type="dataset"):
-    #     hub_api.delete_repo(test_repo_id, repo_type="dataset")
+    with contextlib.suppress(EntryNotFoundError):
+        hub_api.delete_folder(repo_id=repo_id, path_in_repo="data", repo_type="dataset", revision="main")
 
-    # hub_api.create_repo(test_repo_id, repo_type="dataset", exist_ok=True)
-    # hub_api.upload_folder(repo_id=test_repo_id, folder_path=v2_0_dir, repo_type="dataset")
+    with contextlib.suppress(EntryNotFoundError):
+        hub_api.delete_folder(repo_id=repo_id, path_in_repo="meta_data", repo_type="dataset", revision="main")
+
+    hub_api.upload_folder(
+        repo_id=repo_id,
+        path_in_repo="data",
+        folder_path=v2_0_dir / "data",
+        repo_type="dataset",
+        revision="main",
+    )
+    hub_api.upload_folder(
+        repo_id=repo_id,
+        path_in_repo="meta",
+        folder_path=v2_0_dir / "meta",
+        repo_type="dataset",
+        revision="main",
+    )
+    metadata_v2_0.pop("episodes")
+    card_text = f"```json\n{json.dumps(metadata_v2_0, indent=4)}\n```"
+    push_dataset_card_to_hub(repo_id=repo_id, revision="main", tags=repo_tags, text=card_text)
+    create_branch(repo_id=repo_id, branch=V2_0, repo_type="dataset")
 
     # TODO:
     # - [X] Add shapes
@@ -422,9 +446,10 @@ def convert_dataset(
     # - [X] Add task.json
     # - [X] Add names
     # - [X] Add robot_type
+    # - [X] Add splits
+    # - [X] Push properly to branch v2.0 and delete v1.6 stuff from that branch
     # - [/] Add sanity checks (encoding, shapes)
     # - [ ] Handle multitask datasets
-    # - [ ] Push properly to branch v2.0 and delete v1.6 stuff from that branch
 
 
 def main():

From 07e113ce21d710dae4adc0a64fbd1ec1b3d06c5b Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 4 Oct 2024 14:36:11 +0200
Subject: [PATCH 03/59] Add info.json link

---
 convert_dataset_16_to_20.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/convert_dataset_16_to_20.py b/convert_dataset_16_to_20.py
index c53f7595..f2878605 100644
--- a/convert_dataset_16_to_20.py
+++ b/convert_dataset_16_to_20.py
@@ -433,8 +433,9 @@ def convert_dataset(
         repo_type="dataset",
         revision="main",
     )
+
     metadata_v2_0.pop("episodes")
-    card_text = f"```json\n{json.dumps(metadata_v2_0, indent=4)}\n```"
+    card_text = f"[meta/info.json](meta/info.json)\n```json\n{json.dumps(metadata_v2_0, indent=4)}\n```"
     push_dataset_card_to_hub(repo_id=repo_id, revision="main", tags=repo_tags, text=card_text)
     create_branch(repo_id=repo_id, branch=V2_0, repo_type="dataset")
 

From 21ba4b5263048eeab3252170274a7de17b843bed Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Sun, 6 Oct 2024 11:16:49 +0200
Subject: [PATCH 04/59] Add pixel channels

---
 convert_dataset_16_to_20.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/convert_dataset_16_to_20.py b/convert_dataset_16_to_20.py
index f2878605..abcd8ff0 100644
--- a/convert_dataset_16_to_20.py
+++ b/convert_dataset_16_to_20.py
@@ -261,10 +261,13 @@ def _get_video_info(video_path: Path | str) -> dict:
     num, denom = map(int, r_frame_rate.split("/"))
     fps = num / denom
 
+    pixel_channels = get_video_pixel_channels(video_stream_info["pix_fmt"])
+
     video_info = {
         "video.fps": fps,
         "video.width": video_stream_info["width"],
         "video.height": video_stream_info["height"],
+        "video.channels": pixel_channels,
         "video.codec": video_stream_info["codec_name"],
         "video.pix_fmt": video_stream_info["pix_fmt"],
         **_get_audio_info(video_path),
@@ -293,12 +296,38 @@ def get_videos_info(repo_id: str, local_dir: Path, video_keys: list[str]) -> dic
     return videos_info_dict
 
 
+def get_video_pixel_channels(pix_fmt: str) -> int:
+    if "gray" in pix_fmt or "depth" in pix_fmt or "monochrome" in pix_fmt:
+        return 1
+    elif "rgba" in pix_fmt or "yuva" in pix_fmt:
+        return 4
+    elif "rgb" in pix_fmt or "yuv" in pix_fmt:
+        return 3
+    else:
+        raise ValueError("Unknown format")
+
+
+def get_image_pixel_channels(image: Image):
+    if image.mode == "L":
+        return 1  # Grayscale
+    elif image.mode == "LA":
+        return 2  # Grayscale + Alpha
+    elif image.mode == "RGB":
+        return 3  # RGB
+    elif image.mode == "RGBA":
+        return 4  # RGBA
+    else:
+        raise ValueError("Unknown format")
+
+
 def get_video_shapes(videos_info: dict, video_keys: list) -> dict:
     video_shapes = {}
     for img_key in video_keys:
+        channels = get_video_pixel_channels(videos_info[img_key]["video.pix_fmt"])
         video_shapes[img_key] = {
             "width": videos_info[img_key]["video.width"],
             "height": videos_info[img_key]["video.height"],
+            "channels": channels,
         }
 
     return video_shapes
@@ -309,9 +338,11 @@ def get_image_shapes(table: pa.Table, image_keys: list) -> dict:
     for img_key in image_keys:
         image_bytes = table[img_key][0].as_py()  # Assuming first row
         image = Image.open(BytesIO(image_bytes["bytes"]))
+        channels = get_image_pixel_channels(image)
         image_shapes[img_key] = {
             "width": image.width,
             "height": image.height,
+            "channels": channels,
         }
 
     return image_shapes

From 2d75b93ba0e554dd18d9a3f87bb433256bd0fbb7 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 8 Oct 2024 15:31:37 +0200
Subject: [PATCH 05/59] Update info.json format

---
 convert_dataset_16_to_20.py | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/convert_dataset_16_to_20.py b/convert_dataset_16_to_20.py
index abcd8ff0..1a4f1520 100644
--- a/convert_dataset_16_to_20.py
+++ b/convert_dataset_16_to_20.py
@@ -18,7 +18,8 @@ Single-task dataset:
 python convert_dataset_16_to_20.py \
     --repo-id lerobot/aloha_sim_insertion_human_image \
     --task "Insert the peg into the socket." \
-    --robot-config lerobot/configs/robot/aloha.yaml
+    --robot-config lerobot/configs/robot/aloha.yaml \
+    --local-dir data
 ```
 
 ```bash
@@ -50,7 +51,7 @@ from huggingface_hub.errors import EntryNotFoundError
 from PIL import Image
 from safetensors.torch import load_file
 
-from lerobot.common.datasets.utils import create_branch
+from lerobot.common.datasets.utils import create_branch, flatten_dict, unflatten_dict
 from lerobot.common.utils.utils import init_hydra_config
 from lerobot.scripts.push_dataset_to_hub import push_dataset_card_to_hub
 
@@ -58,7 +59,7 @@ V1_6 = "v1.6"
 V2_0 = "v2.0"
 
 PARQUET_PATH = "data/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
-VIDEO_PATH = "videos/{image_key}_episode_{episode_index:06d}.mp4"
+VIDEO_PATH = "videos/{video_key}_episode_{episode_index:06d}.mp4"
 
 
 def parse_robot_config(config_path: Path, config_overrides: list[str] | None = None) -> tuple[str, dict]:
@@ -104,17 +105,19 @@ def write_json(data: dict, fpath: Path) -> None:
 def convert_stats_to_json(input_dir: Path, output_dir: Path) -> None:
     safetensor_path = input_dir / "stats.safetensors"
     stats = load_file(safetensor_path)
-    serializable_stats = {key: value.tolist() for key, value in stats.items()}
+    serialized_stats = {key: value.tolist() for key, value in stats.items()}
+    serialized_stats = unflatten_dict(serialized_stats)
 
     json_path = output_dir / "stats.json"
     json_path.parent.mkdir(exist_ok=True, parents=True)
     with open(json_path, "w") as f:
-        json.dump(serializable_stats, f, indent=4)
+        json.dump(serialized_stats, f, indent=4)
 
     # Sanity check
     with open(json_path) as f:
         stats_json = json.load(f)
 
+    stats_json = flatten_dict(stats_json)
     stats_json = {key: torch.tensor(value) for key, value in stats_json.items()}
     for key in stats:
         torch.testing.assert_close(stats_json[key], stats[key])
@@ -270,6 +273,7 @@ def _get_video_info(video_path: Path | str) -> dict:
         "video.channels": pixel_channels,
         "video.codec": video_stream_info["codec_name"],
         "video.pix_fmt": video_stream_info["pix_fmt"],
+        "video.is_depth_map": False,
         **_get_audio_info(video_path),
     }
 
@@ -278,20 +282,13 @@ def _get_video_info(video_path: Path | str) -> dict:
 
 def get_videos_info(repo_id: str, local_dir: Path, video_keys: list[str]) -> dict:
     hub_api = HfApi()
-    videos_info_dict = {
-        "videos_path": VIDEO_PATH,
-        "has_audio": False,
-        "has_depth": False,
-    }
+    videos_info_dict = {"videos_path": VIDEO_PATH}
     for vid_key in video_keys:
-        video_path = VIDEO_PATH.format(image_key=vid_key, episode_index=0)
+        video_path = VIDEO_PATH.format(video_key=vid_key, episode_index=0)
         video_path = hub_api.hf_hub_download(
             repo_id=repo_id, repo_type="dataset", local_dir=local_dir, filename=video_path
         )
         videos_info_dict[vid_key] = _get_video_info(video_path)
-        videos_info_dict["has_audio"] = (
-            videos_info_dict["has_audio"] or videos_info_dict[vid_key]["has_audio"]
-        )
 
     return videos_info_dict
 
@@ -359,8 +356,8 @@ def convert_dataset(
     tasks_by_episodes: dict | None = None,
     robot_config: dict | None = None,
 ):
-    v1_6_dir = local_dir / repo_id / V1_6
-    v2_0_dir = local_dir / repo_id / V2_0
+    v1_6_dir = local_dir / V1_6 / repo_id
+    v2_0_dir = local_dir / V2_0 / repo_id
     v1_6_dir.mkdir(parents=True, exist_ok=True)
     v2_0_dir.mkdir(parents=True, exist_ok=True)
 
@@ -434,9 +431,10 @@ def convert_dataset(
         "total_tasks": len(tasks),
         "fps": metadata_v1_6["fps"],
         "splits": {"train": f"0:{total_episodes}"},
-        "image_keys": keys["video"] + keys["image"],
         "keys": keys["sequence"],
-        "shapes": {**image_shapes, **video_shapes, **sequence_shapes},
+        "video_keys": keys["video"],
+        "image_keys": keys["image"],
+        "shapes": {**sequence_shapes, **video_shapes, **image_shapes},
         "names": names,
         "videos": videos_info,
         "episodes": episodes,

From 096824b5ff3af40a4f7ae322da145e08e1203269 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 9 Oct 2024 14:33:26 +0200
Subject: [PATCH 06/59] Rework LeRobotDataset.__init__

---
 lerobot/common/datasets/lerobot_dataset.py | 140 +++++++++++++-----
 lerobot/common/datasets/utils.py           | 163 +++++++++++----------
 2 files changed, 189 insertions(+), 114 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index eb76f78d..35e9c762 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 import logging
 import os
+from itertools import accumulate
 from pathlib import Path
 from typing import Callable
 
@@ -24,27 +25,27 @@ import torch.utils
 
 from lerobot.common.datasets.compute_stats import aggregate_stats
 from lerobot.common.datasets.utils import (
-    calculate_episode_data_index,
-    load_episode_data_index,
+    download_episodes,
+    get_hub_safe_version,
     load_hf_dataset,
     load_info,
     load_previous_and_future_frames,
     load_stats,
-    load_videos,
-    reset_episode_index,
+    load_tasks,
 )
 from lerobot.common.datasets.video_utils import VideoFrame, load_from_videos
 
 # For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md
-CODEBASE_VERSION = "v1.6"
-DATA_DIR = Path(os.environ["DATA_DIR"]) if "DATA_DIR" in os.environ else None
+CODEBASE_VERSION = "v2.0"
+LEROBOT_HOME = Path(os.getenv("LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser()
 
 
 class LeRobotDataset(torch.utils.data.Dataset):
     def __init__(
         self,
         repo_id: str,
-        root: Path | None = DATA_DIR,
+        root: Path | None = None,
+        episodes: list[int] | None = None,
         split: str = "train",
         image_transforms: Callable | None = None,
         delta_timestamps: dict[list[float]] | None = None,
@@ -52,24 +53,64 @@ class LeRobotDataset(torch.utils.data.Dataset):
     ):
         super().__init__()
         self.repo_id = repo_id
-        self.root = root
+        self.root = root if root is not None else LEROBOT_HOME / repo_id
         self.split = split
         self.image_transforms = image_transforms
         self.delta_timestamps = delta_timestamps
-        # load data from hub or locally when root is provided
-        # TODO(rcadene, aliberts): implement faster transfer
-        # https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads
-        self.hf_dataset = load_hf_dataset(repo_id, CODEBASE_VERSION, root, split)
-        if split == "train":
-            self.episode_data_index = load_episode_data_index(repo_id, CODEBASE_VERSION, root)
-        else:
-            self.episode_data_index = calculate_episode_data_index(self.hf_dataset)
-            self.hf_dataset = reset_episode_index(self.hf_dataset)
-        self.stats = load_stats(repo_id, CODEBASE_VERSION, root)
-        self.info = load_info(repo_id, CODEBASE_VERSION, root)
-        if self.video:
-            self.videos_dir = load_videos(repo_id, CODEBASE_VERSION, root)
-            self.video_backend = video_backend if video_backend is not None else "pyav"
+        self.episodes = episodes
+        self.video_backend = video_backend if video_backend is not None else "pyav"
+
+        # Load metadata
+        self.root.mkdir(exist_ok=True, parents=True)
+        self._version = get_hub_safe_version(repo_id, CODEBASE_VERSION)
+        self.info = load_info(repo_id, self._version, self.root)
+        self.stats = load_stats(repo_id, self._version, self.root)
+        self.tasks = load_tasks(repo_id, self._version, self.root)
+
+        # Load actual data
+        download_episodes(
+            repo_id,
+            self._version,
+            self.root,
+            self.data_path,
+            self.video_keys,
+            self.num_episodes,
+            self.episodes,
+            self.videos_path,
+        )
+        self.hf_dataset = load_hf_dataset(self.root, self.data_path, self.total_episodes, self.episodes)
+        self.episode_data_index = self.get_episode_data_index()
+
+        # TODO(aliberts):
+        # - [ ] Update __get_item__
+        # - [ ] Add self.consolidate() for:
+        #     - [ ] Sanity checks (episodes num, shapes, files, etc.)
+        #     - [ ] Update episode_index (arg update=True)
+        #     - [ ] Update info.json (arg update=True)
+
+        # TODO(aliberts): remove (deprecated)
+        # if split == "train":
+        #     self.episode_data_index = load_episode_data_index(self.episodes, self.episode_list)
+        # else:
+        #     self.episode_data_index = calculate_episode_data_index(self.hf_dataset)
+        #     self.hf_dataset = reset_episode_index(self.hf_dataset)
+        # if self.video:
+        #     self.videos_dir = load_videos(repo_id, CODEBASE_VERSION, root)
+
+    @property
+    def data_path(self) -> str:
+        """Formattable string for the parquet files."""
+        return self.info["data_path"]
+
+    @property
+    def videos_path(self) -> str | None:
+        """Formattable string for the video files."""
+        return self.info["videos"]["videos_path"] if len(self.video_keys) > 0 else None
+
+    @property
+    def episode_dicts(self) -> list[dict]:
+        """List of dictionary containing information for each episode, indexed by episode_index."""
+        return self.info["episodes"]
 
     @property
     def fps(self) -> int:
@@ -77,24 +118,24 @@ class LeRobotDataset(torch.utils.data.Dataset):
         return self.info["fps"]
 
     @property
-    def video(self) -> bool:
-        """Returns True if this dataset loads video frames from mp4 files.
-        Returns False if it only loads images from png files.
-        """
-        return self.info.get("video", False)
+    def keys(self) -> list[str]:
+        """Keys to access non-image data (state, actions etc.)."""
+        return self.info["keys"]
 
     @property
-    def features(self) -> datasets.Features:
-        return self.hf_dataset.features
+    def image_keys(self) -> list[str]:
+        """Keys to access visual modalities stored as images."""
+        return self.info["image_keys"]
+
+    @property
+    def video_keys(self) -> list[str]:
+        """Keys to access visual modalities stored as videos."""
+        return self.info["video_keys"]
 
     @property
     def camera_keys(self) -> list[str]:
-        """Keys to access image and video stream from cameras."""
-        keys = []
-        for key, feats in self.hf_dataset.features.items():
-            if isinstance(feats, (datasets.Image, VideoFrame)):
-                keys.append(key)
-        return keys
+        """Keys to access image and video streams from cameras."""
+        return self.image_keys + self.video_keys
 
     @property
     def video_frame_keys(self) -> list[str]:
@@ -117,8 +158,13 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
     @property
     def num_episodes(self) -> int:
-        """Number of episodes."""
-        return len(self.hf_dataset.unique("episode_index"))
+        """Number of episodes selected."""
+        return len(self.episodes) if self.episodes is not None else self.total_episodes
+
+    @property
+    def total_episodes(self) -> int:
+        """Total number of episodes available."""
+        return self.info["total_episodes"]
 
     @property
     def tolerance_s(self) -> float:
@@ -129,6 +175,22 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # 1e-4 to account for possible numerical error
         return 1 / self.fps - 1e-4
 
+    @property
+    def shapes(self) -> dict:
+        """Shapes for the different features."""
+        self.info.get("shapes")
+
+    def get_episode_data_index(self) -> dict[str, torch.Tensor]:
+        episode_lengths = {ep_idx: ep_dict["length"] for ep_idx, ep_dict in enumerate(self.episode_dicts)}
+        if self.episodes is not None:
+            episode_lengths = {ep_idx: episode_lengths[ep_idx] for ep_idx in self.episodes}
+
+        cumulative_lenghts = list(accumulate(episode_lengths.values()))
+        return {
+            "from": torch.LongTensor([0] + cumulative_lenghts[:-1]),
+            "to": torch.LongTensor(cumulative_lenghts),
+        }
+
     def __len__(self):
         return self.num_samples
 
@@ -147,7 +209,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         if self.video:
             item = load_from_videos(
                 item,
-                self.video_frame_keys,
+                self.video_keys,
                 self.videos_dir,
                 self.tolerance_s,
                 self.video_backend,
@@ -225,7 +287,7 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
     def __init__(
         self,
         repo_ids: list[str],
-        root: Path | None = DATA_DIR,
+        root: Path | None = LEROBOT_HOME,
         split: str = "train",
         image_transforms: Callable | None = None,
         delta_timestamps: dict[list[float]] | None = None,
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index d6aef15f..fd76ccd1 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
-import re
 import warnings
 from functools import cache
 from pathlib import Path
@@ -22,10 +21,9 @@ from typing import Dict
 
 import datasets
 import torch
-from datasets import load_dataset, load_from_disk
+from datasets import load_dataset
 from huggingface_hub import DatasetCard, HfApi, hf_hub_download, snapshot_download
 from PIL import Image as PILImage
-from safetensors.torch import load_file
 from torchvision import transforms
 
 DATASET_CARD_TEMPLATE = """
@@ -96,7 +94,14 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
 
 
 @cache
-def get_hf_dataset_safe_version(repo_id: str, version: str) -> str:
+def get_hub_safe_version(repo_id: str, version: str) -> str:
+    num_version = float(version.strip("v"))
+    if num_version < 2:
+        raise ValueError(
+            f"""The dataset you requested ({repo_id}) is in {version} format. We introduced a new
+            format with v2.0 that is not backward compatible. Please use our conversion script
+            first (convert_dataset_16_to_20.py) to convert your dataset to this new format."""
+        )
     api = HfApi()
     dataset_info = api.list_repo_refs(repo_id, repo_type="dataset")
     branches = [b.name for b in dataset_info.branches]
@@ -116,56 +121,27 @@ def get_hf_dataset_safe_version(repo_id: str, version: str) -> str:
         return version
 
 
-def load_hf_dataset(repo_id: str, version: str, root: Path, split: str) -> datasets.Dataset:
+def load_hf_dataset(
+    local_dir: Path,
+    data_path: str,
+    total_episodes: int,
+    episodes: list[int] | None = None,
+    split: str = "train",
+) -> datasets.Dataset:
     """hf_dataset contains all the observations, states, actions, rewards, etc."""
-    if root is not None:
-        hf_dataset = load_from_disk(str(Path(root) / repo_id / "train"))
-        # TODO(rcadene): clean this which enables getting a subset of dataset
-        if split != "train":
-            if "%" in split:
-                raise NotImplementedError(f"We dont support splitting based on percentage for now ({split}).")
-            match_from = re.search(r"train\[(\d+):\]", split)
-            match_to = re.search(r"train\[:(\d+)\]", split)
-            if match_from:
-                from_frame_index = int(match_from.group(1))
-                hf_dataset = hf_dataset.select(range(from_frame_index, len(hf_dataset)))
-            elif match_to:
-                to_frame_index = int(match_to.group(1))
-                hf_dataset = hf_dataset.select(range(to_frame_index))
-            else:
-                raise ValueError(
-                    f'`split` ({split}) should either be "train", "train[INT:]", or "train[:INT]"'
-                )
+    if episodes is None:
+        path = str(local_dir / "data")
+        hf_dataset = load_dataset("parquet", data_dir=path, split=split)
     else:
-        safe_version = get_hf_dataset_safe_version(repo_id, version)
-        hf_dataset = load_dataset(repo_id, revision=safe_version, split=split)
+        files = [data_path.format(episode_index=ep_idx, total_episodes=total_episodes) for ep_idx in episodes]
+        files = [str(local_dir / fpath) for fpath in files]
+        hf_dataset = load_dataset("parquet", data_files=files, split=split)
 
     hf_dataset.set_transform(hf_transform_to_torch)
     return hf_dataset
 
 
-def load_episode_data_index(repo_id, version, root) -> dict[str, torch.Tensor]:
-    """episode_data_index contains the range of indices for each episode
-
-    Example:
-    ```python
-    from_id = episode_data_index["from"][episode_id].item()
-    to_id = episode_data_index["to"][episode_id].item()
-    episode_frames = [dataset[i] for i in range(from_id, to_id)]
-    ```
-    """
-    if root is not None:
-        path = Path(root) / repo_id / "meta_data" / "episode_data_index.safetensors"
-    else:
-        safe_version = get_hf_dataset_safe_version(repo_id, version)
-        path = hf_hub_download(
-            repo_id, "meta_data/episode_data_index.safetensors", repo_type="dataset", revision=safe_version
-        )
-
-    return load_file(path)
-
-
-def load_stats(repo_id, version, root) -> dict[str, dict[str, torch.Tensor]]:
+def load_stats(repo_id: str, version: str, local_dir: Path) -> dict[str, dict[str, torch.Tensor]]:
     """stats contains the statistics per modality computed over the full dataset, such as max, min, mean, std
 
     Example:
@@ -173,47 +149,84 @@ def load_stats(repo_id, version, root) -> dict[str, dict[str, torch.Tensor]]:
     normalized_action = (action - stats["action"]["mean"]) / stats["action"]["std"]
     ```
     """
-    if root is not None:
-        path = Path(root) / repo_id / "meta_data" / "stats.safetensors"
-    else:
-        safe_version = get_hf_dataset_safe_version(repo_id, version)
-        path = hf_hub_download(
-            repo_id, "meta_data/stats.safetensors", repo_type="dataset", revision=safe_version
-        )
+    fpath = hf_hub_download(
+        repo_id, filename="meta/stats.json", local_dir=local_dir, repo_type="dataset", revision=version
+    )
+    with open(fpath) as f:
+        stats = json.load(f)
 
-    stats = load_file(path)
+    stats = flatten_dict(stats)
+    stats = {key: torch.tensor(value) for key, value in stats.items()}
     return unflatten_dict(stats)
 
 
-def load_info(repo_id, version, root) -> dict:
-    """info contains useful information regarding the dataset that are not stored elsewhere
+def load_info(repo_id: str, version: str, local_dir: Path) -> dict:
+    """info contains structural information about the dataset. It should be the reference and
+    act as the 'source of thruth' for what's inside the dataset.
 
     Example:
     ```python
     print("frame per second used to collect the video", info["fps"])
     ```
     """
-    if root is not None:
-        path = Path(root) / repo_id / "meta_data" / "info.json"
-    else:
-        safe_version = get_hf_dataset_safe_version(repo_id, version)
-        path = hf_hub_download(repo_id, "meta_data/info.json", repo_type="dataset", revision=safe_version)
-
-    with open(path) as f:
-        info = json.load(f)
-    return info
+    fpath = hf_hub_download(
+        repo_id, filename="meta/info.json", local_dir=local_dir, repo_type="dataset", revision=version
+    )
+    with open(fpath) as f:
+        return json.load(f)
 
 
-def load_videos(repo_id, version, root) -> Path:
-    if root is not None:
-        path = Path(root) / repo_id / "videos"
-    else:
-        # TODO(rcadene): we download the whole repo here. see if we can avoid this
-        safe_version = get_hf_dataset_safe_version(repo_id, version)
-        repo_dir = snapshot_download(repo_id, repo_type="dataset", revision=safe_version)
-        path = Path(repo_dir) / "videos"
+def load_tasks(repo_id: str, version: str, local_dir: Path) -> dict:
+    """tasks contains all the tasks of the dataset, indexed by their task_index.
 
-    return path
+    Example:
+    ```json
+    {
+        "0": "Pick the Lego block and drop it in the box on the right."
+    }
+    ```
+    """
+    fpath = hf_hub_download(
+        repo_id, filename="meta/tasks.json", local_dir=local_dir, repo_type="dataset", revision=version
+    )
+    with open(fpath) as f:
+        return json.load(f)
+
+
+def download_episodes(
+    repo_id: str,
+    version: str,
+    local_dir: Path,
+    data_path: str,
+    video_keys: list,
+    total_episodes: int,
+    episodes: list[int] | None = None,
+    videos_path: str | None = None,
+) -> None:
+    """Downloads the dataset from the given 'repo_id' at the provided 'version'. If 'episodes' is given, this
+    will only download those episodes (selected by their episode_index). If 'episodes' is None, the whole
+    dataset will be downloaded. Thanks to the behavior of snapshot_download, if the files are already present
+    in 'local_dir', they won't be downloaded again.
+
+    Note: Currently, if you're running this code offline but you already have the files in 'local_dir',
+    snapshot_download will still fail. This behavior will be fixed in an upcoming update of huggingface_hub.
+    """
+    # TODO(rcadene, aliberts): implement faster transfer
+    # https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads
+    files = None
+    if episodes is not None:
+        files = [data_path.format(episode_index=ep_idx, total_episodes=total_episodes) for ep_idx in episodes]
+        if len(video_keys) > 0:
+            video_files = [
+                videos_path.format(video_key=vid_key, episode_index=ep_idx)
+                for vid_key in video_keys
+                for ep_idx in episodes
+            ]
+            files += video_files
+
+    snapshot_download(
+        repo_id, repo_type="dataset", revision=version, local_dir=local_dir, allow_patterns=files
+    )
 
 
 def load_previous_and_future_frames(

From b417cebc4e0c2dd8cc087d17684ed25902c91854 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 10 Oct 2024 21:32:14 +0200
Subject: [PATCH 07/59] Update LeRobotDataset.__get_item__

---
 lerobot/common/datasets/lerobot_dataset.py | 191 +++++++++++++++------
 lerobot/common/datasets/utils.py           | 130 ++++++++++----
 lerobot/common/datasets/video_utils.py     |  39 +----
 3 files changed, 232 insertions(+), 128 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 35e9c762..b91eb75f 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -15,25 +15,27 @@
 # limitations under the License.
 import logging
 import os
-from itertools import accumulate
 from pathlib import Path
 from typing import Callable
 
 import datasets
 import torch
 import torch.utils
+from huggingface_hub import snapshot_download
 
 from lerobot.common.datasets.compute_stats import aggregate_stats
 from lerobot.common.datasets.utils import (
-    download_episodes,
+    check_delta_timestamps,
+    check_timestamps_sync,
+    get_delta_indices,
+    get_episode_data_index,
     get_hub_safe_version,
     load_hf_dataset,
     load_info,
-    load_previous_and_future_frames,
     load_stats,
     load_tasks,
 )
-from lerobot.common.datasets.video_utils import VideoFrame, load_from_videos
+from lerobot.common.datasets.video_utils import VideoFrame, decode_video_frames_torchvision
 
 # For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md
 CODEBASE_VERSION = "v2.0"
@@ -49,6 +51,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         split: str = "train",
         image_transforms: Callable | None = None,
         delta_timestamps: dict[list[float]] | None = None,
+        tolerance_s: float = 1e-4,
         video_backend: str | None = None,
     ):
         super().__init__()
@@ -58,7 +61,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.image_transforms = image_transforms
         self.delta_timestamps = delta_timestamps
         self.episodes = episodes
+        self.tolerance_s = tolerance_s
         self.video_backend = video_backend if video_backend is not None else "pyav"
+        self.delta_indices = None
 
         # Load metadata
         self.root.mkdir(exist_ok=True, parents=True)
@@ -68,34 +73,60 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.tasks = load_tasks(repo_id, self._version, self.root)
 
         # Load actual data
-        download_episodes(
-            repo_id,
-            self._version,
-            self.root,
-            self.data_path,
-            self.video_keys,
-            self.num_episodes,
-            self.episodes,
-            self.videos_path,
-        )
+        self.download_episodes()
         self.hf_dataset = load_hf_dataset(self.root, self.data_path, self.total_episodes, self.episodes)
-        self.episode_data_index = self.get_episode_data_index()
+        self.episode_data_index = get_episode_data_index(self.episodes, self.episode_dicts)
+
+        # Check timestamps
+        check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s)
+
+        # Setup delta_indices
+        if self.delta_timestamps is not None:
+            check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s)
+            self.delta_indices = get_delta_indices(self.delta_timestamps, self.fps)
 
         # TODO(aliberts):
-        # - [ ] Update __get_item__
+        # - [X] Move delta_timestamp logic outside __get_item__
+        # - [X] Update __get_item__
+        # - [ ] Add self.add_frame()
         # - [ ] Add self.consolidate() for:
+        #     - [X] Check timestamps sync
         #     - [ ] Sanity checks (episodes num, shapes, files, etc.)
         #     - [ ] Update episode_index (arg update=True)
         #     - [ ] Update info.json (arg update=True)
 
-        # TODO(aliberts): remove (deprecated)
-        # if split == "train":
-        #     self.episode_data_index = load_episode_data_index(self.episodes, self.episode_list)
-        # else:
-        #     self.episode_data_index = calculate_episode_data_index(self.hf_dataset)
-        #     self.hf_dataset = reset_episode_index(self.hf_dataset)
-        # if self.video:
-        #     self.videos_dir = load_videos(repo_id, CODEBASE_VERSION, root)
+    def download_episodes(self) -> None:
+        """Downloads the dataset from the given 'repo_id' at the provided version. If 'episodes' is given, this
+        will only download those episodes (selected by their episode_index). If 'episodes' is None, the whole
+        dataset will be downloaded. Thanks to the behavior of snapshot_download, if the files are already present
+        in 'local_dir', they won't be downloaded again.
+
+        Note: Currently, if you're running this code offline but you already have the files in 'local_dir',
+        snapshot_download will still fail. This behavior will be fixed in an upcoming update of huggingface_hub.
+        """
+        # TODO(rcadene, aliberts): implement faster transfer
+        # https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads
+        files = None
+        if self.episodes is not None:
+            files = [
+                self.data_path.format(episode_index=ep_idx, total_episodes=self.total_episodes)
+                for ep_idx in self.episodes
+            ]
+            if len(self.video_keys) > 0:
+                video_files = [
+                    self.videos_path.format(video_key=vid_key, episode_index=ep_idx)
+                    for vid_key in self.video_keys
+                    for ep_idx in self.episodes
+                ]
+                files += video_files
+
+        snapshot_download(
+            self.repo_id,
+            repo_type="dataset",
+            revision=self._version,
+            local_dir=self.root,
+            allow_patterns=files,
+        )
 
     @property
     def data_path(self) -> str:
@@ -134,17 +165,20 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
     @property
     def camera_keys(self) -> list[str]:
-        """Keys to access image and video streams from cameras."""
+        """Keys to access image and video streams from cameras (regardless of their storage method)."""
         return self.image_keys + self.video_keys
 
     @property
     def video_frame_keys(self) -> list[str]:
-        """Keys to access video frames that requires to be decoded into images.
+        """
+        DEPRECATED, USE 'video_keys' INSTEAD
+        Keys to access video frames that requires to be decoded into images.
 
         Note: It is empty if the dataset contains images only,
         or equal to `self.cameras` if the dataset contains videos only,
         or can even be a subset of `self.cameras` in a case of a mixed image/video dataset.
         """
+        # TODO(aliberts): remove
         video_frame_keys = []
         for key, feats in self.hf_dataset.features.items():
             if isinstance(feats, VideoFrame):
@@ -166,54 +200,97 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Total number of episodes available."""
         return self.info["total_episodes"]
 
-    @property
-    def tolerance_s(self) -> float:
-        """Tolerance in seconds used to discard loaded frames when their timestamps
-        are not close enough from the requested frames. It is only used when `delta_timestamps`
-        is provided or when loading video frames from mp4 files.
-        """
-        # 1e-4 to account for possible numerical error
-        return 1 / self.fps - 1e-4
+    # @property
+    # def tolerance_s(self) -> float:
+    #     """Tolerance in seconds used to discard loaded frames when their timestamps
+    #     are not close enough from the requested frames. It is used at the init of the dataset to make sure
+    #     that each timestamps is separated to the next by 1/fps +/- tolerance. It is only used when
+    #     `delta_timestamps` is provided or when loading video frames from mp4 files.
+    #     """
+    #     # 1e-4 to account for possible numerical error
+    #     return 1e-4
 
     @property
     def shapes(self) -> dict:
         """Shapes for the different features."""
         self.info.get("shapes")
 
-    def get_episode_data_index(self) -> dict[str, torch.Tensor]:
-        episode_lengths = {ep_idx: ep_dict["length"] for ep_idx, ep_dict in enumerate(self.episode_dicts)}
+    def current_episode_index(self, idx: int) -> int:
+        episode_index = self.hf_dataset["episode_index"][idx]
         if self.episodes is not None:
-            episode_lengths = {ep_idx: episode_lengths[ep_idx] for ep_idx in self.episodes}
+            # get episode_index from selected episodes
+            episode_index = self.episodes.index(episode_index)
 
-        cumulative_lenghts = list(accumulate(episode_lengths.values()))
+        return episode_index
+
+    def episode_length(self, episode_index) -> int:
+        """Number of samples/frames for given episode."""
+        return self.info["episodes"][episode_index]["length"]
+
+    def _get_query_indices(self, idx: int, ep_idx: int) -> dict[str, list[int]]:
+        # Pad values outside of current episode range
+        ep_start = self.episode_data_index["from"][ep_idx]
+        ep_end = self.episode_data_index["to"][ep_idx]
         return {
-            "from": torch.LongTensor([0] + cumulative_lenghts[:-1]),
-            "to": torch.LongTensor(cumulative_lenghts),
+            key: [max(ep_start.item(), min(ep_end.item() - 1, idx + delta)) for delta in delta_idx]
+            for key, delta_idx in self.delta_indices.items()
         }
 
+    def _get_query_timestamps(
+        self, query_indices: dict[str, list[int]], current_ts: float
+    ) -> dict[str, list[float]]:
+        query_timestamps = {}
+        for key in self.video_keys:
+            if key in query_indices:
+                timestamps = self.hf_dataset.select(query_indices[key])["timestamp"]
+                query_timestamps[key] = torch.stack(timestamps).tolist()
+            else:
+                query_timestamps[key] = [current_ts]
+
+        return query_timestamps
+
+    def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
+        return {
+            key: torch.stack(self.hf_dataset.select(q_idx)[key])
+            for key, q_idx in query_indices.items()
+            if key not in self.video_keys
+        }
+
+    def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict:
+        """Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
+        in the main process (e.g. by using a second Dataloader with num_workers=0). It will result in a
+        Segmentation Fault. This probably happens because a memory reference to the video loader is created in
+        the main process and a subprocess fails to access it.
+        """
+        item = {}
+        for vid_key, query_ts in query_timestamps.items():
+            video_path = self.root / self.videos_path.format(video_key=vid_key, episode_index=ep_idx)
+            frames = decode_video_frames_torchvision(
+                video_path, query_ts, self.tolerance_s, self.video_backend
+            )
+            item[vid_key] = frames
+
+        return item
+
     def __len__(self):
         return self.num_samples
 
-    def __getitem__(self, idx):
+    def __getitem__(self, idx) -> dict:
         item = self.hf_dataset[idx]
+        ep_idx = item["episode_index"].item()
 
-        if self.delta_timestamps is not None:
-            item = load_previous_and_future_frames(
-                item,
-                self.hf_dataset,
-                self.episode_data_index,
-                self.delta_timestamps,
-                self.tolerance_s,
-            )
+        if self.delta_indices is not None:
+            current_ep_idx = self.episodes.index(ep_idx) if self.episodes is not None else ep_idx
+            query_indices = self._get_query_indices(idx, current_ep_idx)
+            query_result = self._query_hf_dataset(query_indices)
+            for key, val in query_result.items():
+                item[key] = val
 
-        if self.video:
-            item = load_from_videos(
-                item,
-                self.video_keys,
-                self.videos_dir,
-                self.tolerance_s,
-                self.video_backend,
-            )
+        if len(self.video_keys) > 0:
+            current_ts = item["timestamp"].item()
+            query_timestamps = self._get_query_timestamps(query_indices, current_ts)
+            video_frames = self._query_videos(query_timestamps, ep_idx)
+            item = {**video_frames, **item}
 
         if self.image_transforms is not None:
             for cam in self.camera_keys:
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index fd76ccd1..9b70d4f6 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -16,13 +16,15 @@
 import json
 import warnings
 from functools import cache
+from itertools import accumulate
 from pathlib import Path
+from pprint import pformat
 from typing import Dict
 
 import datasets
 import torch
 from datasets import load_dataset
-from huggingface_hub import DatasetCard, HfApi, hf_hub_download, snapshot_download
+from huggingface_hub import DatasetCard, HfApi, hf_hub_download
 from PIL import Image as PILImage
 from torchvision import transforms
 
@@ -193,40 +195,102 @@ def load_tasks(repo_id: str, version: str, local_dir: Path) -> dict:
         return json.load(f)
 
 
-def download_episodes(
-    repo_id: str,
-    version: str,
-    local_dir: Path,
-    data_path: str,
-    video_keys: list,
-    total_episodes: int,
-    episodes: list[int] | None = None,
-    videos_path: str | None = None,
-) -> None:
-    """Downloads the dataset from the given 'repo_id' at the provided 'version'. If 'episodes' is given, this
-    will only download those episodes (selected by their episode_index). If 'episodes' is None, the whole
-    dataset will be downloaded. Thanks to the behavior of snapshot_download, if the files are already present
-    in 'local_dir', they won't be downloaded again.
-
-    Note: Currently, if you're running this code offline but you already have the files in 'local_dir',
-    snapshot_download will still fail. This behavior will be fixed in an upcoming update of huggingface_hub.
-    """
-    # TODO(rcadene, aliberts): implement faster transfer
-    # https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads
-    files = None
+def get_episode_data_index(episodes: list, episode_dicts: list[dict]) -> dict[str, torch.Tensor]:
+    episode_lengths = {ep_idx: ep_dict["length"] for ep_idx, ep_dict in enumerate(episode_dicts)}
     if episodes is not None:
-        files = [data_path.format(episode_index=ep_idx, total_episodes=total_episodes) for ep_idx in episodes]
-        if len(video_keys) > 0:
-            video_files = [
-                videos_path.format(video_key=vid_key, episode_index=ep_idx)
-                for vid_key in video_keys
-                for ep_idx in episodes
-            ]
-            files += video_files
+        episode_lengths = {ep_idx: episode_lengths[ep_idx] for ep_idx in episodes}
 
-    snapshot_download(
-        repo_id, repo_type="dataset", revision=version, local_dir=local_dir, allow_patterns=files
-    )
+    cumulative_lenghts = list(accumulate(episode_lengths.values()))
+    return {
+        "from": torch.LongTensor([0] + cumulative_lenghts[:-1]),
+        "to": torch.LongTensor(cumulative_lenghts),
+    }
+
+
+def check_timestamps_sync(
+    hf_dataset: datasets.Dataset,
+    episode_data_index: dict[str, torch.Tensor],
+    fps: int,
+    tolerance_s: float,
+    raise_value_error: bool = True,
+) -> bool:
+    """
+    This check is to make sure that each timestamps is separated to the next by 1/fps +/- tolerance to
+    account for possible numerical error.
+    """
+    timestamps = torch.stack(hf_dataset["timestamp"])
+    # timestamps[2] += tolerance_s  # TODO delete
+    # timestamps[-2] += tolerance_s/2  # TODO delete
+    diffs = torch.diff(timestamps)
+    within_tolerance = torch.abs(diffs - 1 / fps) <= tolerance_s
+
+    # We mask differences between the timestamp at the end of an episode
+    # and the one the start of the next episode since these are expected
+    # to be outside tolerance.
+    mask = torch.ones(len(diffs), dtype=torch.bool)
+    ignored_diffs = episode_data_index["to"][:-1] - 1
+    mask[ignored_diffs] = False
+    filtered_within_tolerance = within_tolerance[mask]
+
+    if not torch.all(filtered_within_tolerance):
+        # Track original indices before masking
+        original_indices = torch.arange(len(diffs))
+        filtered_indices = original_indices[mask]
+        outside_tolerance_filtered_indices = torch.nonzero(~filtered_within_tolerance).squeeze()
+        outside_tolerance_indices = filtered_indices[outside_tolerance_filtered_indices]
+        episode_indices = torch.stack(hf_dataset["episode_index"])
+
+        outside_tolerances = []
+        for idx in outside_tolerance_indices:
+            entry = {
+                "timestamps": [timestamps[idx], timestamps[idx + 1]],
+                "diff": diffs[idx],
+                "episode_index": episode_indices[idx].item(),
+            }
+            outside_tolerances.append(entry)
+
+        if raise_value_error:
+            raise ValueError(
+                f"""One or several timestamps unexpectedly violate the tolerance inside episode range.
+                This might be due to synchronization issues with timestamps during data collection.
+                \n{pformat(outside_tolerances)}"""
+            )
+        return False
+
+    return True
+
+
+def check_delta_timestamps(
+    delta_timestamps: dict[str, list[float]], fps: int, tolerance_s: float, raise_value_error: bool = True
+) -> bool:
+    outside_tolerance = {}
+    for key, delta_ts in delta_timestamps.items():
+        abs_delta_ts = torch.abs(torch.tensor(delta_ts))
+        within_tolerance = (abs_delta_ts % (1 / fps)) <= tolerance_s
+        if not torch.all(within_tolerance):
+            outside_tolerance[key] = torch.tensor(delta_ts)[~within_tolerance]
+
+    if len(outside_tolerance) > 0:
+        if raise_value_error:
+            raise ValueError(
+                f"""
+                The following delta_timestamps are found outside of tolerance range.
+                Please make sure they are multiples of 1/{fps} +/- tolerance and adjust
+                their values accordingly.
+                \n{pformat(outside_tolerance)}
+                """
+            )
+        return False
+
+    return True
+
+
+def get_delta_indices(delta_timestamps: dict[str, list[float]], fps: int) -> dict[str, list[int]]:
+    delta_indices = {}
+    for key, delta_ts in delta_timestamps.items():
+        delta_indices[key] = (torch.tensor(delta_ts) * fps).long().tolist()
+
+    return delta_indices
 
 
 def load_previous_and_future_frames(
diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py
index 4d4ac6b0..6a606415 100644
--- a/lerobot/common/datasets/video_utils.py
+++ b/lerobot/common/datasets/video_utils.py
@@ -27,45 +27,8 @@ import torchvision
 from datasets.features.features import register_feature
 
 
-def load_from_videos(
-    item: dict[str, torch.Tensor],
-    video_frame_keys: list[str],
-    videos_dir: Path,
-    tolerance_s: float,
-    backend: str = "pyav",
-):
-    """Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
-    in the main process (e.g. by using a second Dataloader with num_workers=0). It will result in a Segmentation Fault.
-    This probably happens because a memory reference to the video loader is created in the main process and a
-    subprocess fails to access it.
-    """
-    # since video path already contains "videos" (e.g. videos_dir="data/videos", path="videos/episode_0.mp4")
-    data_dir = videos_dir.parent
-
-    for key in video_frame_keys:
-        if isinstance(item[key], list):
-            # load multiple frames at once (expected when delta_timestamps is not None)
-            timestamps = [frame["timestamp"] for frame in item[key]]
-            paths = [frame["path"] for frame in item[key]]
-            if len(set(paths)) > 1:
-                raise NotImplementedError("All video paths are expected to be the same for now.")
-            video_path = data_dir / paths[0]
-
-            frames = decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
-            item[key] = frames
-        else:
-            # load one frame
-            timestamps = [item[key]["timestamp"]]
-            video_path = data_dir / item[key]["path"]
-
-            frames = decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
-            item[key] = frames[0]
-
-    return item
-
-
 def decode_video_frames_torchvision(
-    video_path: str,
+    video_path: Path | str,
     timestamps: list[float],
     tolerance_s: float,
     backend: str = "pyav",

From 6d2bc11365d3ac9f0ebd04e6aea2e49e50400027 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 11 Oct 2024 10:59:38 +0200
Subject: [PATCH 08/59] Add doc, scrap video_frame_keys attribute

---
 lerobot/common/datasets/lerobot_dataset.py | 107 +++++++++++++++------
 lerobot/common/datasets/utils.py           |   4 +
 lerobot/scripts/push_dataset_to_hub.py     |   2 +-
 lerobot/scripts/visualize_dataset_html.py  |   3 +-
 4 files changed, 85 insertions(+), 31 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index b91eb75f..52d3377c 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -54,6 +54,83 @@ class LeRobotDataset(torch.utils.data.Dataset):
         tolerance_s: float = 1e-4,
         video_backend: str | None = None,
     ):
+        """LeRobotDataset encapsulates 3 main things:
+            - metadata:
+                - info contains various information about the dataset like shapes, keys, fps etc.
+                - stats stores the dataset statistics of the different modalities for normalization
+                - tasks contains the prompts for each task of the dataset, which can be used for
+                  task-conditionned training.
+            - hf_dataset (from datasets.Dataset), which will read any values from parquet files.
+            - (optional) videos from which frames are loaded to be synchronous with data from parquet files.
+
+        3 use modes are available for this class, depending on 3 different use cases:
+
+        1. Your dataset already exists on the Hugging Face Hub at the address
+        https://huggingface.co/datasets/{repo_id} and is not on your local disk in the 'root' folder:
+            Instantiating this class with this 'repo_id' will download the dataset from that address and load
+            it, pending your dataset is compliant with codebase_version v2.0. If your dataset has been created
+            before this new format, you will be prompted to convert it using our conversion script from v1.6
+            to v2.0, which you can find at [TODO(aliberts): move conversion script & add location here].
+
+        2. Your dataset already exists on your local disk in the 'root' folder:
+            This is typically the case when you recorded your dataset locally and you may or may not have
+            pushed it to the hub yet. Instantiating this class with 'root' will load your dataset directly
+            from disk. This can happen while you're offline (no internet connection).
+
+        3. Your dataset doesn't already exists (either on local disk or on the Hub):
+            [TODO(aliberts): add classmethod for this case?]
+
+
+        In terms of files, a typical LeRobotDataset looks like this from its root path:
+        .
+        ├── README.md
+        ├── data
+        │   ├── train-00000-of-00050.parquet
+        │   ├── train-00001-of-00050.parquet
+        │   ├── train-00002-of-00050.parquet
+        │   ...
+        ├── meta
+        │   ├── info.json
+        │   ├── stats.json
+        │   └── tasks.json
+        └── videos (optional)
+            ├── observation.images.laptop_episode_000000.mp4
+            ├── observation.images.laptop_episode_000001.mp4
+            ├── observation.images.laptop_episode_000002.mp4
+            ...
+            ├── observation.images.phone_episode_000000.mp4
+            ├── observation.images.phone_episode_000001.mp4
+            ├── observation.images.phone_episode_000002.mp4
+            ...
+
+        Note that this file-based structure is designed to be as versatile as possible. The files are split by
+        episodes which allows a more granular control over which episodes one wants to use and download. The
+        structure of the dataset is entirely described in the info.json file, which can be easily downloaded
+        or viewed directly on the hub before downloading any actual data. The type of files used are very
+        simple and do not need complex tools to be read, it only uses .parquet, .json and .mp4 files (and .md
+        for the README).
+
+        Args:
+            repo_id (str): This is the repo id that will be used to fetch the dataset. Locally, the dataset
+                will be stored under root/repo_id.
+            root (Path | None, optional): Local directory to use for downloading/writing files. You can also
+                set the LEROBOT_HOME environment variable to point to a different location. Defaults to
+                '~/.cache/huggingface/lerobot'.
+            episodes (list[int] | None, optional): If specified, this will only load episodes specified by
+                their episode_index in this list. Defaults to None.
+            split (str, optional): _description_. Defaults to "train".
+            image_transforms (Callable | None, optional): You can pass standard v2 image transforms from
+                torchvision.transforms.v2 here which will be applied to visual modalities (whether they come
+                from videos or images). Defaults to None.
+            delta_timestamps (dict[list[float]] | None, optional): _description_. Defaults to None.
+            tolerance_s (float, optional): Tolerance in seconds used to ensure data timestamps are actually in
+                sync with the fps value. It is used at the init of the dataset to make sure that each
+                timestamps is separated to the next by 1/fps +/- tolerance_s. This also applies to frames
+                decoded from video files. It is also used to check that `delta_timestamps` (when provided) are
+                multiples of 1/fps. Defaults to 1e-4.
+            video_backend (str | None, optional): Video backend to use for decoding videos. There is currently
+                a single option which is the pyav decoder used by Torchvision. Defaults to pyav.
+        """
         super().__init__()
         self.repo_id = repo_id
         self.root = root if root is not None else LEROBOT_HOME / repo_id
@@ -88,6 +165,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # TODO(aliberts):
         # - [X] Move delta_timestamp logic outside __get_item__
         # - [X] Update __get_item__
+        # - [/] Add doc
         # - [ ] Add self.add_frame()
         # - [ ] Add self.consolidate() for:
         #     - [X] Check timestamps sync
@@ -168,23 +246,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Keys to access image and video streams from cameras (regardless of their storage method)."""
         return self.image_keys + self.video_keys
 
-    @property
-    def video_frame_keys(self) -> list[str]:
-        """
-        DEPRECATED, USE 'video_keys' INSTEAD
-        Keys to access video frames that requires to be decoded into images.
-
-        Note: It is empty if the dataset contains images only,
-        or equal to `self.cameras` if the dataset contains videos only,
-        or can even be a subset of `self.cameras` in a case of a mixed image/video dataset.
-        """
-        # TODO(aliberts): remove
-        video_frame_keys = []
-        for key, feats in self.hf_dataset.features.items():
-            if isinstance(feats, VideoFrame):
-                video_frame_keys.append(key)
-        return video_frame_keys
-
     @property
     def num_samples(self) -> int:
         """Number of samples/frames."""
@@ -200,16 +261,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Total number of episodes available."""
         return self.info["total_episodes"]
 
-    # @property
-    # def tolerance_s(self) -> float:
-    #     """Tolerance in seconds used to discard loaded frames when their timestamps
-    #     are not close enough from the requested frames. It is used at the init of the dataset to make sure
-    #     that each timestamps is separated to the next by 1/fps +/- tolerance. It is only used when
-    #     `delta_timestamps` is provided or when loading video frames from mp4 files.
-    #     """
-    #     # 1e-4 to account for possible numerical error
-    #     return 1e-4
-
     @property
     def shapes(self) -> dict:
         """Shapes for the different features."""
@@ -308,7 +359,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             f"  Type: {'video (.mp4)' if self.video else 'image (.png)'},\n"
             f"  Recorded Frames per Second: {self.fps},\n"
             f"  Camera Keys: {self.camera_keys},\n"
-            f"  Video Frame Keys: {self.video_frame_keys if self.video else 'N/A'},\n"
+            f"  Video Frame Keys: {self.camera_keys if self.video else 'N/A'},\n"
             f"  Transformations: {self.image_transforms},\n"
             f"  Codebase Version: {self.info.get('codebase_version', '< v1.6')},\n"
             f")"
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index 9b70d4f6..b20b63fe 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -263,6 +263,10 @@ def check_timestamps_sync(
 def check_delta_timestamps(
     delta_timestamps: dict[str, list[float]], fps: int, tolerance_s: float, raise_value_error: bool = True
 ) -> bool:
+    """This will check if all the values in delta_timestamps are multiples of 1/fps +/- tolerance.
+    This is to ensure that these delta_timestamps added to any timestamp from a dataset will themselves be
+    actual timestamps from the dataset.
+    """
     outside_tolerance = {}
     for key, delta_ts in delta_timestamps.items():
         abs_delta_ts = torch.abs(torch.tensor(delta_ts))
diff --git a/lerobot/scripts/push_dataset_to_hub.py b/lerobot/scripts/push_dataset_to_hub.py
index adc4c72a..6eac4d0e 100644
--- a/lerobot/scripts/push_dataset_to_hub.py
+++ b/lerobot/scripts/push_dataset_to_hub.py
@@ -260,7 +260,7 @@ def push_dataset_to_hub(
         episode_index = 0
         tests_videos_dir = tests_data_dir / repo_id / "videos"
         tests_videos_dir.mkdir(parents=True, exist_ok=True)
-        for key in lerobot_dataset.video_frame_keys:
+        for key in lerobot_dataset.camera_keys:
             fname = f"{key}_episode_{episode_index:06d}.mp4"
             shutil.copy(videos_dir / fname, tests_videos_dir / fname)
 
diff --git a/lerobot/scripts/visualize_dataset_html.py b/lerobot/scripts/visualize_dataset_html.py
index c035e562..d9d153a0 100644
--- a/lerobot/scripts/visualize_dataset_html.py
+++ b/lerobot/scripts/visualize_dataset_html.py
@@ -171,8 +171,7 @@ def get_episode_video_paths(dataset: LeRobotDataset, ep_index: int) -> list[str]
     # get first frame of episode (hack to get video_path of the episode)
     first_frame_idx = dataset.episode_data_index["from"][ep_index].item()
     return [
-        dataset.hf_dataset.select_columns(key)[first_frame_idx][key]["path"]
-        for key in dataset.video_frame_keys
+        dataset.hf_dataset.select_columns(key)[first_frame_idx][key]["path"] for key in dataset.camera_keys
     ]
 
 

From 7f680886b0d288bcc9992f62874a09a0984c295c Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 11 Oct 2024 11:03:11 +0200
Subject: [PATCH 09/59] Add huggingface-hub patch for offline snapshot_download
 with local_dir

---
 lerobot/common/datasets/lerobot_dataset.py |    3 -
 poetry.lock                                | 1511 +++++++++++---------
 pyproject.toml                             |    2 +-
 3 files changed, 824 insertions(+), 692 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 52d3377c..b283a185 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -178,9 +178,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         will only download those episodes (selected by their episode_index). If 'episodes' is None, the whole
         dataset will be downloaded. Thanks to the behavior of snapshot_download, if the files are already present
         in 'local_dir', they won't be downloaded again.
-
-        Note: Currently, if you're running this code offline but you already have the files in 'local_dir',
-        snapshot_download will still fail. This behavior will be fixed in an upcoming update of huggingface_hub.
         """
         # TODO(rcadene, aliberts): implement faster transfer
         # https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads
diff --git a/poetry.lock b/poetry.lock
index 43089048..b4d491ae 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -13,113 +13,113 @@ files = [
 
 [[package]]
 name = "aiohappyeyeballs"
-version = "2.4.0"
+version = "2.4.3"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiohappyeyeballs-2.4.0-py3-none-any.whl", hash = "sha256:7ce92076e249169a13c2f49320d1967425eaf1f407522d707d59cac7628d62bd"},
-    {file = "aiohappyeyeballs-2.4.0.tar.gz", hash = "sha256:55a1714f084e63d49639800f95716da97a1f173d46a16dfcfda0016abb93b6b2"},
+    {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"},
+    {file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"},
 ]
 
 [[package]]
 name = "aiohttp"
-version = "3.10.6"
+version = "3.10.10"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiohttp-3.10.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:682836fc672972cc3101cc9e30d49c5f7e8f1d010478d46119fe725a4545acfd"},
-    {file = "aiohttp-3.10.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:289fa8a20018d0d5aa9e4b35d899bd51bcb80f0d5f365d9a23e30dac3b79159b"},
-    {file = "aiohttp-3.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8617c96a20dd57e7e9d398ff9d04f3d11c4d28b1767273a5b1a018ada5a654d3"},
-    {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdbeff1b062751c2a2a55b171f7050fb7073633c699299d042e962aacdbe1a07"},
-    {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ea35d849cdd4a9268f910bff4497baebbc1aa3f2f625fd8ccd9ac99c860c621"},
-    {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:473961b3252f3b949bb84873d6e268fb6d8aa0ccc6eb7404fa58c76a326bb8e1"},
-    {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d2665c5df629eb2f981dab244c01bfa6cdc185f4ffa026639286c4d56fafb54"},
-    {file = "aiohttp-3.10.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25d92f794f1332f656e3765841fc2b7ad5c26c3f3d01e8949eeb3495691cf9f4"},
-    {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9bd6b2033993d5ae80883bb29b83fb2b432270bbe067c2f53cc73bb57c46065f"},
-    {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d7f408c43f5e75ea1edc152fb375e8f46ef916f545fb66d4aebcbcfad05e2796"},
-    {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:cf8b8560aa965f87bf9c13bf9fed7025993a155ca0ce8422da74bf46d18c2f5f"},
-    {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14477c4e52e2f17437b99893fd220ffe7d7ee41df5ebf931a92b8ca82e6fd094"},
-    {file = "aiohttp-3.10.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fb138fbf9f53928e779650f5ed26d0ea1ed8b2cab67f0ea5d63afa09fdc07593"},
-    {file = "aiohttp-3.10.6-cp310-cp310-win32.whl", hash = "sha256:9843d683b8756971797be171ead21511d2215a2d6e3c899c6e3107fbbe826791"},
-    {file = "aiohttp-3.10.6-cp310-cp310-win_amd64.whl", hash = "sha256:f8b8e49fe02f744d38352daca1dbef462c3874900bd8166516f6ea8e82b5aacf"},
-    {file = "aiohttp-3.10.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f52e54fd776ad0da1006708762213b079b154644db54bcfc62f06eaa5b896402"},
-    {file = "aiohttp-3.10.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:995ab1a238fd0d19dc65f2d222e5eb064e409665c6426a3e51d5101c1979ee84"},
-    {file = "aiohttp-3.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0749c4d5a08a802dd66ecdf59b2df4d76b900004017468a7bb736c3b5a3dd902"},
-    {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e05b39158f2af0e2438cc2075cfc271f4ace0c3cc4a81ec95b27a0432e161951"},
-    {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a9f196c970db2dcde4f24317e06615363349dc357cf4d7a3b0716c20ac6d7bcd"},
-    {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:47647c8af04a70e07a2462931b0eba63146a13affa697afb4ecbab9d03a480ce"},
-    {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c0efe7e99f6d94d63274c06344bd0e9c8daf184ce5602a29bc39e00a18720"},
-    {file = "aiohttp-3.10.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9721cdd83a994225352ca84cd537760d41a9da3c0eacb3ff534747ab8fba6d0"},
-    {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0b82c8ebed66ce182893e7c0b6b60ba2ace45b1df104feb52380edae266a4850"},
-    {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:b169f8e755e541b72e714b89a831b315bbe70db44e33fead28516c9e13d5f931"},
-    {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0be3115753baf8b4153e64f9aa7bf6c0c64af57979aa900c31f496301b374570"},
-    {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e1f80cd17d81a404b6e70ef22bfe1870bafc511728397634ad5f5efc8698df56"},
-    {file = "aiohttp-3.10.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6419728b08fb6380c66a470d2319cafcec554c81780e2114b7e150329b9a9a7f"},
-    {file = "aiohttp-3.10.6-cp311-cp311-win32.whl", hash = "sha256:bd294dcdc1afdc510bb51d35444003f14e327572877d016d576ac3b9a5888a27"},
-    {file = "aiohttp-3.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:bf861da9a43d282d6dd9dcd64c23a0fccf2c5aa5cd7c32024513c8c79fb69de3"},
-    {file = "aiohttp-3.10.6-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:2708baccdc62f4b1251e59c2aac725936a900081f079b88843dabcab0feeeb27"},
-    {file = "aiohttp-3.10.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7475da7a5e2ccf1a1c86c8fee241e277f4874c96564d06f726d8df8e77683ef7"},
-    {file = "aiohttp-3.10.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:02108326574ff60267b7b35b17ac5c0bbd0008ccb942ce4c48b657bb90f0b8aa"},
-    {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:029a019627b37fa9eac5c75cc54a6bb722c4ebbf5a54d8c8c0fb4dd8facf2702"},
-    {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a637d387db6fdad95e293fab5433b775fd104ae6348d2388beaaa60d08b38c4"},
-    {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dc1a16f3fc1944c61290d33c88dc3f09ba62d159b284c38c5331868425aca426"},
-    {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81b292f37969f9cc54f4643f0be7dacabf3612b3b4a65413661cf6c350226787"},
-    {file = "aiohttp-3.10.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0754690a3a26e819173a34093798c155bafb21c3c640bff13be1afa1e9d421f9"},
-    {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:164ecd32e65467d86843dbb121a6666c3deb23b460e3f8aefdcaacae79eb718a"},
-    {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:438c5863feb761f7ca3270d48c292c334814459f61cc12bab5ba5b702d7c9e56"},
-    {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ba18573bb1de1063d222f41de64a0d3741223982dcea863b3f74646faf618ec7"},
-    {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c82a94ddec996413a905f622f3da02c4359952aab8d817c01cf9915419525e95"},
-    {file = "aiohttp-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:92351aa5363fc3c1f872ca763f86730ced32b01607f0c9662b1fa711087968d0"},
-    {file = "aiohttp-3.10.6-cp312-cp312-win32.whl", hash = "sha256:3e15e33bfc73fa97c228f72e05e8795e163a693fd5323549f49367c76a6e5883"},
-    {file = "aiohttp-3.10.6-cp312-cp312-win_amd64.whl", hash = "sha256:fe517113fe4d35d9072b826c3e147d63c5f808ca8167d450b4f96c520c8a1d8d"},
-    {file = "aiohttp-3.10.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:482f74057ea13d387a7549d7a7ecb60e45146d15f3e58a2d93a0ad2d5a8457cd"},
-    {file = "aiohttp-3.10.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:03fa40d1450ee5196e843315ddf74a51afc7e83d489dbfc380eecefea74158b1"},
-    {file = "aiohttp-3.10.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1e52e59ed5f4cc3a3acfe2a610f8891f216f486de54d95d6600a2c9ba1581f4d"},
-    {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2b3935a22c9e41a8000d90588bed96cf395ef572dbb409be44c6219c61d900d"},
-    {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bef1480ee50f75abcfcb4b11c12de1005968ca9d0172aec4a5057ba9f2b644f"},
-    {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:671745ea7db19693ce867359d503772177f0b20fa8f6ee1e74e00449f4c4151d"},
-    {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b50b367308ca8c12e0b50cba5773bc9abe64c428d3fd2bbf5cd25aab37c77bf"},
-    {file = "aiohttp-3.10.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a504d7cdb431a777d05a124fd0b21efb94498efa743103ea01b1e3136d2e4fb"},
-    {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:66bc81361131763660b969132a22edce2c4d184978ba39614e8f8f95db5c95f8"},
-    {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:27cf19a38506e2e9f12fc17e55f118f04897b0a78537055d93a9de4bf3022e3d"},
-    {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3468b39f977a11271517c6925b226720e148311039a380cc9117b1e2258a721f"},
-    {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9d26da22a793dfd424be1050712a70c0afd96345245c29aced1e35dbace03413"},
-    {file = "aiohttp-3.10.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:844d48ff9173d0b941abed8b2ea6a412f82b56d9ab1edb918c74000c15839362"},
-    {file = "aiohttp-3.10.6-cp313-cp313-win32.whl", hash = "sha256:2dd56e3c43660ed3bea67fd4c5025f1ac1f9ecf6f0b991a6e5efe2e678c490c5"},
-    {file = "aiohttp-3.10.6-cp313-cp313-win_amd64.whl", hash = "sha256:c91781d969fbced1993537f45efe1213bd6fccb4b37bfae2a026e20d6fbed206"},
-    {file = "aiohttp-3.10.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:4407a80bca3e694f2d2a523058e20e1f9f98a416619e04f6dc09dc910352ac8b"},
-    {file = "aiohttp-3.10.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1cb045ec5961f51af3e2c08cd6fe523f07cc6e345033adee711c49b7b91bb954"},
-    {file = "aiohttp-3.10.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4fabdcdc781a36b8fd7b2ca9dea8172f29a99e11d00ca0f83ffeb50958da84a1"},
-    {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a9f42efcc2681790595ab3d03c0e52d01edc23a0973ea09f0dc8d295e12b8e"},
-    {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cca776a440795db437d82c07455761c85bbcf3956221c3c23b8c93176c278ce7"},
-    {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5582de171f0898139cf51dd9fcdc79b848e28d9abd68e837f0803fc9f30807b1"},
-    {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:370e2d47575c53c817ee42a18acc34aad8da4dbdaac0a6c836d58878955f1477"},
-    {file = "aiohttp-3.10.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:444d1704e2af6b30766debed9be8a795958029e552fe77551355badb1944012c"},
-    {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:40271a2a375812967401c9ca8077de9368e09a43a964f4dce0ff603301ec9358"},
-    {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:f3af26f86863fad12e25395805bb0babbd49d512806af91ec9708a272b696248"},
-    {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:4752df44df48fd42b80f51d6a97553b482cda1274d9dc5df214a3a1aa5d8f018"},
-    {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:2cd5290ab66cfca2f90045db2cc6434c1f4f9fbf97c9f1c316e785033782e7d2"},
-    {file = "aiohttp-3.10.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3427031064b0d5c95647e6369c4aa3c556402f324a3e18107cb09517abe5f962"},
-    {file = "aiohttp-3.10.6-cp38-cp38-win32.whl", hash = "sha256:614fc21e86adc28e4165a6391f851a6da6e9cbd7bb232d0df7718b453a89ee98"},
-    {file = "aiohttp-3.10.6-cp38-cp38-win_amd64.whl", hash = "sha256:58c5d7318a136a3874c78717dd6de57519bc64f6363c5827c2b1cb775bea71dd"},
-    {file = "aiohttp-3.10.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5db26bbca8e7968c4c977a0c640e0b9ce7224e1f4dcafa57870dc6ee28e27de6"},
-    {file = "aiohttp-3.10.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fb4216e3ec0dbc01db5ba802f02ed78ad8f07121be54eb9e918448cc3f61b7c"},
-    {file = "aiohttp-3.10.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a976ef488f26e224079deb3d424f29144c6d5ba4ded313198169a8af8f47fb82"},
-    {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a86610174de8a85a920e956e2d4f9945e7da89f29a00e95ac62a4a414c4ef4e"},
-    {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:217791c6a399cc4f2e6577bb44344cba1f5714a2aebf6a0bea04cfa956658284"},
-    {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba3662d41abe2eab0eeec7ee56f33ef4e0b34858f38abf24377687f9e1fb00a5"},
-    {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4dfa5ad4bce9ca30a76117fbaa1c1decf41ebb6c18a4e098df44298941566f9"},
-    {file = "aiohttp-3.10.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0009258e97502936d3bd5bf2ced15769629097d0abb81e6495fba1047824fe0"},
-    {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0a75d5c9fb4f06c41d029ae70ad943c3a844c40c0a769d12be4b99b04f473d3d"},
-    {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:8198b7c002aae2b40b2d16bfe724b9a90bcbc9b78b2566fc96131ef4e382574d"},
-    {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4611db8c907f90fe86be112efdc2398cd7b4c8eeded5a4f0314b70fdea8feab0"},
-    {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ff99ae06eef85c7a565854826114ced72765832ee16c7e3e766c5e4c5b98d20e"},
-    {file = "aiohttp-3.10.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7641920bdcc7cd2d3ddfb8bb9133a6c9536b09dbd49490b79e125180b2d25b93"},
-    {file = "aiohttp-3.10.6-cp39-cp39-win32.whl", hash = "sha256:e2e7d5591ea868d5ec82b90bbeb366a198715672841d46281b623e23079593db"},
-    {file = "aiohttp-3.10.6-cp39-cp39-win_amd64.whl", hash = "sha256:b504c08c45623bf5c7ca41be380156d925f00199b3970efd758aef4a77645feb"},
-    {file = "aiohttp-3.10.6.tar.gz", hash = "sha256:d2578ef941be0c2ba58f6f421a703527d08427237ed45ecb091fed6f83305336"},
+    {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be7443669ae9c016b71f402e43208e13ddf00912f47f623ee5994e12fc7d4b3f"},
+    {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b06b7843929e41a94ea09eb1ce3927865387e3e23ebe108e0d0d09b08d25be9"},
+    {file = "aiohttp-3.10.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:333cf6cf8e65f6a1e06e9eb3e643a0c515bb850d470902274239fea02033e9a8"},
+    {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:274cfa632350225ce3fdeb318c23b4a10ec25c0e2c880eff951a3842cf358ac1"},
+    {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9e5e4a85bdb56d224f412d9c98ae4cbd032cc4f3161818f692cd81766eee65a"},
+    {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b606353da03edcc71130b52388d25f9a30a126e04caef1fd637e31683033abd"},
+    {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab5a5a0c7a7991d90446a198689c0535be89bbd6b410a1f9a66688f0880ec026"},
+    {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:578a4b875af3e0daaf1ac6fa983d93e0bbfec3ead753b6d6f33d467100cdc67b"},
+    {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8105fd8a890df77b76dd3054cddf01a879fc13e8af576805d667e0fa0224c35d"},
+    {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3bcd391d083f636c06a68715e69467963d1f9600f85ef556ea82e9ef25f043f7"},
+    {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fbc6264158392bad9df19537e872d476f7c57adf718944cc1e4495cbabf38e2a"},
+    {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e48d5021a84d341bcaf95c8460b152cfbad770d28e5fe14a768988c461b821bc"},
+    {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2609e9ab08474702cc67b7702dbb8a80e392c54613ebe80db7e8dbdb79837c68"},
+    {file = "aiohttp-3.10.10-cp310-cp310-win32.whl", hash = "sha256:84afcdea18eda514c25bc68b9af2a2b1adea7c08899175a51fe7c4fb6d551257"},
+    {file = "aiohttp-3.10.10-cp310-cp310-win_amd64.whl", hash = "sha256:9c72109213eb9d3874f7ac8c0c5fa90e072d678e117d9061c06e30c85b4cf0e6"},
+    {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c30a0eafc89d28e7f959281b58198a9fa5e99405f716c0289b7892ca345fe45f"},
+    {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:258c5dd01afc10015866114e210fb7365f0d02d9d059c3c3415382ab633fcbcb"},
+    {file = "aiohttp-3.10.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:15ecd889a709b0080f02721255b3f80bb261c2293d3c748151274dfea93ac871"},
+    {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3935f82f6f4a3820270842e90456ebad3af15810cf65932bd24da4463bc0a4c"},
+    {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:413251f6fcf552a33c981c4709a6bba37b12710982fec8e558ae944bfb2abd38"},
+    {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1720b4f14c78a3089562b8875b53e36b51c97c51adc53325a69b79b4b48ebcb"},
+    {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:679abe5d3858b33c2cf74faec299fda60ea9de62916e8b67e625d65bf069a3b7"},
+    {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79019094f87c9fb44f8d769e41dbb664d6e8fcfd62f665ccce36762deaa0e911"},
+    {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fe2fb38c2ed905a2582948e2de560675e9dfbee94c6d5ccdb1301c6d0a5bf092"},
+    {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a3f00003de6eba42d6e94fabb4125600d6e484846dbf90ea8e48a800430cc142"},
+    {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbb122c557a16fafc10354b9d99ebf2f2808a660d78202f10ba9d50786384b9"},
+    {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30ca7c3b94708a9d7ae76ff281b2f47d8eaf2579cd05971b5dc681db8caac6e1"},
+    {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:df9270660711670e68803107d55c2b5949c2e0f2e4896da176e1ecfc068b974a"},
+    {file = "aiohttp-3.10.10-cp311-cp311-win32.whl", hash = "sha256:aafc8ee9b742ce75044ae9a4d3e60e3d918d15a4c2e08a6c3c3e38fa59b92d94"},
+    {file = "aiohttp-3.10.10-cp311-cp311-win_amd64.whl", hash = "sha256:362f641f9071e5f3ee6f8e7d37d5ed0d95aae656adf4ef578313ee585b585959"},
+    {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9294bbb581f92770e6ed5c19559e1e99255e4ca604a22c5c6397b2f9dd3ee42c"},
+    {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a8fa23fe62c436ccf23ff930149c047f060c7126eae3ccea005f0483f27b2e28"},
+    {file = "aiohttp-3.10.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c6a5b8c7926ba5d8545c7dd22961a107526562da31a7a32fa2456baf040939f"},
+    {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:007ec22fbc573e5eb2fb7dec4198ef8f6bf2fe4ce20020798b2eb5d0abda6138"},
+    {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9627cc1a10c8c409b5822a92d57a77f383b554463d1884008e051c32ab1b3742"},
+    {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50edbcad60d8f0e3eccc68da67f37268b5144ecc34d59f27a02f9611c1d4eec7"},
+    {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a45d85cf20b5e0d0aa5a8dca27cce8eddef3292bc29d72dcad1641f4ed50aa16"},
+    {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b00807e2605f16e1e198f33a53ce3c4523114059b0c09c337209ae55e3823a8"},
+    {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f2d4324a98062be0525d16f768a03e0bbb3b9fe301ceee99611dc9a7953124e6"},
+    {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:438cd072f75bb6612f2aca29f8bd7cdf6e35e8f160bc312e49fbecab77c99e3a"},
+    {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:baa42524a82f75303f714108fea528ccacf0386af429b69fff141ffef1c534f9"},
+    {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a7d8d14fe962153fc681f6366bdec33d4356f98a3e3567782aac1b6e0e40109a"},
+    {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c1277cd707c465cd09572a774559a3cc7c7a28802eb3a2a9472588f062097205"},
+    {file = "aiohttp-3.10.10-cp312-cp312-win32.whl", hash = "sha256:59bb3c54aa420521dc4ce3cc2c3fe2ad82adf7b09403fa1f48ae45c0cbde6628"},
+    {file = "aiohttp-3.10.10-cp312-cp312-win_amd64.whl", hash = "sha256:0e1b370d8007c4ae31ee6db7f9a2fe801a42b146cec80a86766e7ad5c4a259cf"},
+    {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ad7593bb24b2ab09e65e8a1d385606f0f47c65b5a2ae6c551db67d6653e78c28"},
+    {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1eb89d3d29adaf533588f209768a9c02e44e4baf832b08118749c5fad191781d"},
+    {file = "aiohttp-3.10.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3fe407bf93533a6fa82dece0e74dbcaaf5d684e5a51862887f9eaebe6372cd79"},
+    {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aed5155f819873d23520919e16703fc8925e509abbb1a1491b0087d1cd969e"},
+    {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f05e9727ce409358baa615dbeb9b969db94324a79b5a5cea45d39bdb01d82e6"},
+    {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dffb610a30d643983aeb185ce134f97f290f8935f0abccdd32c77bed9388b42"},
+    {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa6658732517ddabe22c9036479eabce6036655ba87a0224c612e1ae6af2087e"},
+    {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:741a46d58677d8c733175d7e5aa618d277cd9d880301a380fd296975a9cdd7bc"},
+    {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e00e3505cd80440f6c98c6d69269dcc2a119f86ad0a9fd70bccc59504bebd68a"},
+    {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ffe595f10566f8276b76dc3a11ae4bb7eba1aac8ddd75811736a15b0d5311414"},
+    {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdfcf6443637c148c4e1a20c48c566aa694fa5e288d34b20fcdc58507882fed3"},
+    {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d183cf9c797a5291e8301790ed6d053480ed94070637bfaad914dd38b0981f67"},
+    {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:77abf6665ae54000b98b3c742bc6ea1d1fb31c394bcabf8b5d2c1ac3ebfe7f3b"},
+    {file = "aiohttp-3.10.10-cp313-cp313-win32.whl", hash = "sha256:4470c73c12cd9109db8277287d11f9dd98f77fc54155fc71a7738a83ffcc8ea8"},
+    {file = "aiohttp-3.10.10-cp313-cp313-win_amd64.whl", hash = "sha256:486f7aabfa292719a2753c016cc3a8f8172965cabb3ea2e7f7436c7f5a22a151"},
+    {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1b66ccafef7336a1e1f0e389901f60c1d920102315a56df85e49552308fc0486"},
+    {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:acd48d5b80ee80f9432a165c0ac8cbf9253eaddb6113269a5e18699b33958dbb"},
+    {file = "aiohttp-3.10.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3455522392fb15ff549d92fbf4b73b559d5e43dc522588f7eb3e54c3f38beee7"},
+    {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45c3b868724137f713a38376fef8120c166d1eadd50da1855c112fe97954aed8"},
+    {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:da1dee8948d2137bb51fbb8a53cce6b1bcc86003c6b42565f008438b806cccd8"},
+    {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5ce2ce7c997e1971b7184ee37deb6ea9922ef5163c6ee5aa3c274b05f9e12fa"},
+    {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28529e08fde6f12eba8677f5a8608500ed33c086f974de68cc65ab218713a59d"},
+    {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f7db54c7914cc99d901d93a34704833568d86c20925b2762f9fa779f9cd2e70f"},
+    {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:03a42ac7895406220124c88911ebee31ba8b2d24c98507f4a8bf826b2937c7f2"},
+    {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:7e338c0523d024fad378b376a79faff37fafb3c001872a618cde1d322400a572"},
+    {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:038f514fe39e235e9fef6717fbf944057bfa24f9b3db9ee551a7ecf584b5b480"},
+    {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:64f6c17757251e2b8d885d728b6433d9d970573586a78b78ba8929b0f41d045a"},
+    {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:93429602396f3383a797a2a70e5f1de5df8e35535d7806c9f91df06f297e109b"},
+    {file = "aiohttp-3.10.10-cp38-cp38-win32.whl", hash = "sha256:c823bc3971c44ab93e611ab1a46b1eafeae474c0c844aff4b7474287b75fe49c"},
+    {file = "aiohttp-3.10.10-cp38-cp38-win_amd64.whl", hash = "sha256:54ca74df1be3c7ca1cf7f4c971c79c2daf48d9aa65dea1a662ae18926f5bc8ce"},
+    {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:01948b1d570f83ee7bbf5a60ea2375a89dfb09fd419170e7f5af029510033d24"},
+    {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9fc1500fd2a952c5c8e3b29aaf7e3cc6e27e9cfc0a8819b3bce48cc1b849e4cc"},
+    {file = "aiohttp-3.10.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f614ab0c76397661b90b6851a030004dac502e48260ea10f2441abd2207fbcc7"},
+    {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00819de9e45d42584bed046314c40ea7e9aea95411b38971082cad449392b08c"},
+    {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05646ebe6b94cc93407b3bf34b9eb26c20722384d068eb7339de802154d61bc5"},
+    {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:998f3bd3cfc95e9424a6acd7840cbdd39e45bc09ef87533c006f94ac47296090"},
+    {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9010c31cd6fa59438da4e58a7f19e4753f7f264300cd152e7f90d4602449762"},
+    {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ea7ffc6d6d6f8a11e6f40091a1040995cdff02cfc9ba4c2f30a516cb2633554"},
+    {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ef9c33cc5cbca35808f6c74be11eb7f5f6b14d2311be84a15b594bd3e58b5527"},
+    {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ce0cdc074d540265bfeb31336e678b4e37316849d13b308607efa527e981f5c2"},
+    {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:597a079284b7ee65ee102bc3a6ea226a37d2b96d0418cc9047490f231dc09fe8"},
+    {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:7789050d9e5d0c309c706953e5e8876e38662d57d45f936902e176d19f1c58ab"},
+    {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e7f8b04d83483577fd9200461b057c9f14ced334dcb053090cea1da9c8321a91"},
+    {file = "aiohttp-3.10.10-cp39-cp39-win32.whl", hash = "sha256:c02a30b904282777d872266b87b20ed8cc0d1501855e27f831320f471d54d983"},
+    {file = "aiohttp-3.10.10-cp39-cp39-win_amd64.whl", hash = "sha256:edfe3341033a6b53a5c522c802deb2079eee5cbfbb0af032a55064bd65c73a23"},
+    {file = "aiohttp-3.10.10.tar.gz", hash = "sha256:0631dd7c9f0822cc61c88586ca76d5b5ada26538097d0f1df510b082bad3411a"},
 ]
 
 [package.dependencies]
@@ -519,101 +519,116 @@ files = [
 
 [[package]]
 name = "charset-normalizer"
-version = "3.3.2"
+version = "3.4.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7.0"
 files = [
-    {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"},
-    {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"},
-    {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
-    {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"},
-    {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"},
-    {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"},
-    {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"},
-    {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-win32.whl", hash = "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc"},
+    {file = "charset_normalizer-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99"},
+    {file = "charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-win32.whl", hash = "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7"},
+    {file = "charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-win32.whl", hash = "sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67"},
+    {file = "charset_normalizer-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dbe03226baf438ac4fda9e2d0715022fd579cb641c4cf639fa40d53b2fe6f3e2"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd9a8bd8900e65504a305bf8ae6fa9fbc66de94178c420791d0293702fce2df7"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8831399554b92b72af5932cdbbd4ddc55c55f631bb13ff8fe4e6536a06c5c51"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a14969b8691f7998e74663b77b4c36c0337cb1df552da83d5c9004a93afdb574"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaf7c1524c0542ee2fc82cc8ec337f7a9f7edee2532421ab200d2b920fc97cf"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425c5f215d0eecee9a56cdb703203dda90423247421bf0d67125add85d0c4455"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d5b054862739d276e09928de37c79ddeec42a6e1bfc55863be96a36ba22926f6"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:f3e73a4255342d4eb26ef6df01e3962e73aa29baa3124a8e824c5d3364a65748"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:2f6c34da58ea9c1a9515621f4d9ac379871a8f21168ba1b5e09d74250de5ad62"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:f09cb5a7bbe1ecae6e87901a2eb23e0256bb524a79ccc53eb0b7629fbe7677c4"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-win32.whl", hash = "sha256:9c98230f5042f4945f957d006edccc2af1e03ed5e37ce7c373f00a5a4daa6149"},
+    {file = "charset_normalizer-3.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:62f60aebecfc7f4b82e3f639a7d1433a20ec32824db2199a11ad4f5e146ef5ee"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:af73657b7a68211996527dbfeffbb0864e043d270580c5aef06dc4b659a4b578"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cab5d0b79d987c67f3b9e9c53f54a61360422a5a0bc075f43cab5621d530c3b6"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9289fd5dddcf57bab41d044f1756550f9e7cf0c8e373b8cdf0ce8773dc4bd417"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b493a043635eb376e50eedf7818f2f322eabbaa974e948bd8bdd29eb7ef2a51"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fa2566ca27d67c86569e8c85297aaf413ffab85a8960500f12ea34ff98e4c41"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8e538f46104c815be19c975572d74afb53f29650ea2025bbfaef359d2de2f7f"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fd30dc99682dc2c603c2b315bded2799019cea829f8bf57dc6b61efde6611c8"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2006769bd1640bdf4d5641c69a3d63b71b81445473cac5ded39740a226fa88ab"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dc15e99b2d8a656f8e666854404f1ba54765871104e50c8e9813af8a7db07f12"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ab2e5bef076f5a235c3774b4f4028a680432cded7cad37bba0fd90d64b187d19"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:4ec9dd88a5b71abfc74e9df5ebe7921c35cbb3b641181a531ca65cdb5e8e4dea"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:43193c5cda5d612f247172016c4bb71251c784d7a4d9314677186a838ad34858"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:aa693779a8b50cd97570e5a0f343538a8dbd3e496fa5dcb87e29406ad0299654"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-win32.whl", hash = "sha256:7706f5850360ac01d80c89bcef1640683cc12ed87f42579dab6c5d3ed6888613"},
+    {file = "charset_normalizer-3.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:c3e446d253bd88f6377260d07c895816ebf33ffffd56c1c792b13bff9c3e1ade"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-win32.whl", hash = "sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2"},
+    {file = "charset_normalizer-3.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca"},
+    {file = "charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079"},
+    {file = "charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e"},
 ]
 
 [[package]]
@@ -672,28 +687,28 @@ plotting = ["matplotlib"]
 
 [[package]]
 name = "cmake"
-version = "3.30.3"
+version = "3.30.4"
 description = "CMake is an open-source, cross-platform family of tools designed to build, test and package software"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cmake-3.30.3-py3-none-macosx_11_0_universal2.macosx_11_0_arm64.macosx_10_10_x86_64.whl", hash = "sha256:8cc4c67432cca5e7a24a74eb102bc0472581a71231e58c224e544373dcb147a7"},
-    {file = "cmake-3.30.3-py3-none-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1ca7e29f5952634274d33ec1cb0cd9ddb79cb0b09cc3887b55d24c9852eed9d0"},
-    {file = "cmake-3.30.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:30c2cdf8a863573a5fd7bf39159fbb96e75ac1955e481d35e5295ac601ea23af"},
-    {file = "cmake-3.30.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81e5dc3103a4c6594d3efdf652e21e21d610e264f0c489ebefa3db04b1cdd2bc"},
-    {file = "cmake-3.30.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc5fba153bd0255adb246f27358d98db597a62264b61970d32038f9c7f355a70"},
-    {file = "cmake-3.30.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a5ac1157eaa1e95bd67f11bd6ebc6f85b42ce6f2aac7b93d28dd84a5230be55b"},
-    {file = "cmake-3.30.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba26cb3c19f5b4cb83787394647a5dafbd2922a6de4af39409d7d287536a617f"},
-    {file = "cmake-3.30.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e294e3f424175b085809f713dd7ee36edd36b6b8a579911ef90359d8f884658"},
-    {file = "cmake-3.30.3-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:1616e2806c4c85e21fd0b6e92a61d41cb47479b5305bfa6f0c00baacfd029d7d"},
-    {file = "cmake-3.30.3-py3-none-musllinux_1_1_i686.whl", hash = "sha256:c98cf8980ed75dd15be9948da559a51ce4cd0f017fc44969a72dcd37f507fa61"},
-    {file = "cmake-3.30.3-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:870ebf590fb2f7cc58c8aa5b4dc32b50d4ca9c2fb9f1e46cd0426a995a2ef71e"},
-    {file = "cmake-3.30.3-py3-none-musllinux_1_1_s390x.whl", hash = "sha256:592cfcf280570713b8743bf8a8dec3753e0b82a7791d7d79f5ddb4f2be8b48b8"},
-    {file = "cmake-3.30.3-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:e0fd7746f8895ec54e20c5d5dcc76a42256483e1f4736050264a180a13f9f8ef"},
-    {file = "cmake-3.30.3-py3-none-win32.whl", hash = "sha256:ca990748d1a1d778a1a31cc1e33dcb01f2ed6fb0a752e945ff9e2d5435cff191"},
-    {file = "cmake-3.30.3-py3-none-win_amd64.whl", hash = "sha256:3b41b0fbf3b449dd387c71444c9eb7f23e9a8061554bbf8fd8157ee355427220"},
-    {file = "cmake-3.30.3-py3-none-win_arm64.whl", hash = "sha256:a9e14118824992313bd0e2b3b86d9c85d7883c39b784199ea755fc32aeeb9e81"},
-    {file = "cmake-3.30.3.tar.gz", hash = "sha256:c015d02e5f25973b66b66a060d3ad8c1c382cf38ba7b09712770d9de50b67b80"},
+    {file = "cmake-3.30.4-py3-none-macosx_10_10_universal2.whl", hash = "sha256:8a1a30125213c3d44b81a1af0085ad1dcd77abc61bcdf330556e83898428198a"},
+    {file = "cmake-3.30.4-py3-none-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9f69b3706ae93fa48762871bdc7cb759fbbbadb04452e5eab820537c35fabcb6"},
+    {file = "cmake-3.30.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:969af8432a17168e5b88e2efba11e5e14b7ca38aa638975b7ce1b19044c5183f"},
+    {file = "cmake-3.30.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a5929e21af39a3adf4058aea54aa2197198e06315ebad541dda0baf20e2b32b"},
+    {file = "cmake-3.30.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9808d3744e57c6fd71d93e2ca95142d67578a13a8867f7e8b000f343799899f"},
+    {file = "cmake-3.30.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a223c62cfeebcb7b90f715c16bb2e83ee37e8c3e676efde83b094d62c278ec2"},
+    {file = "cmake-3.30.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08466455fbac67287a4868819ae0e0ab16d60c02eb209ae5e6d70e0e35d0e601"},
+    {file = "cmake-3.30.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b8a4b0e638ddbabd16cad8b053b5a66733ddaf652dc3d46d55b3887314022fe"},
+    {file = "cmake-3.30.4-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:a8f3160cc2b362c0ba03d70300a36bca5a58e1f82c345f4f54a4da7f59b7b2b4"},
+    {file = "cmake-3.30.4-py3-none-musllinux_1_1_i686.whl", hash = "sha256:13bd1afa2e9988973f18c2425823081a044929e80685731601f093ff673d2db7"},
+    {file = "cmake-3.30.4-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:d2ab1018a42e03cf2e843f9565bc2ff7465a1a66c1cbfaba30d494a5e26f763e"},
+    {file = "cmake-3.30.4-py3-none-musllinux_1_1_s390x.whl", hash = "sha256:2d6367a438c11f0863c9cdea843acd09514e94534ce0d115bc8f7905aaff243d"},
+    {file = "cmake-3.30.4-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:e4cc37735bdc7ba058abdddd3f94ac9dc32cae0f94ae68661565b39f64a9a22f"},
+    {file = "cmake-3.30.4-py3-none-win32.whl", hash = "sha256:a08e9a987be5da69941f4a26dd7614fcbb5039394821fbcce9716c20a1571c0c"},
+    {file = "cmake-3.30.4-py3-none-win_amd64.whl", hash = "sha256:2d128d0831924788c1e87d6ca9abe4594e2ccde718712b0fa2c8c3a99b0d1282"},
+    {file = "cmake-3.30.4-py3-none-win_arm64.whl", hash = "sha256:2825874fb84bd9d05c40b1a4347366d9949c9f6bac7a9ace97ac7faf9d573b8b"},
+    {file = "cmake-3.30.4.tar.gz", hash = "sha256:fedd88495e742a1316078c283c2b4c2eeac4c34eca3234401d28f09ee58a320f"},
 ]
 
 [package.extras]
@@ -828,83 +843,73 @@ test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist"
 
 [[package]]
 name = "coverage"
-version = "7.6.1"
+version = "7.6.2"
 description = "Code coverage measurement for Python"
 optional = true
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "coverage-7.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b06079abebbc0e89e6163b8e8f0e16270124c154dc6e4a47b413dd538859af16"},
-    {file = "coverage-7.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cf4b19715bccd7ee27b6b120e7e9dd56037b9c0681dcc1adc9ba9db3d417fa36"},
-    {file = "coverage-7.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61c0abb4c85b095a784ef23fdd4aede7a2628478e7baba7c5e3deba61070a02"},
-    {file = "coverage-7.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd21f6ae3f08b41004dfb433fa895d858f3f5979e7762d052b12aef444e29afc"},
-    {file = "coverage-7.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f59d57baca39b32db42b83b2a7ba6f47ad9c394ec2076b084c3f029b7afca23"},
-    {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a1ac0ae2b8bd743b88ed0502544847c3053d7171a3cff9228af618a068ed9c34"},
-    {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e6a08c0be454c3b3beb105c0596ebdc2371fab6bb90c0c0297f4e58fd7e1012c"},
-    {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f5796e664fe802da4f57a168c85359a8fbf3eab5e55cd4e4569fbacecc903959"},
-    {file = "coverage-7.6.1-cp310-cp310-win32.whl", hash = "sha256:7bb65125fcbef8d989fa1dd0e8a060999497629ca5b0efbca209588a73356232"},
-    {file = "coverage-7.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:3115a95daa9bdba70aea750db7b96b37259a81a709223c8448fa97727d546fe0"},
-    {file = "coverage-7.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dea0889685db8550f839fa202744652e87c60015029ce3f60e006f8c4462c93"},
-    {file = "coverage-7.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed37bd3c3b063412f7620464a9ac1314d33100329f39799255fb8d3027da50d3"},
-    {file = "coverage-7.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85f5e9a5f8b73e2350097c3756ef7e785f55bd71205defa0bfdaf96c31616ff"},
-    {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bc572be474cafb617672c43fe989d6e48d3c83af02ce8de73fff1c6bb3c198d"},
-    {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6"},
-    {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f4aa8219db826ce6be7099d559f8ec311549bfc4046f7f9fe9b5cea5c581c56"},
-    {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fc5a77d0c516700ebad189b587de289a20a78324bc54baee03dd486f0855d234"},
-    {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b48f312cca9621272ae49008c7f613337c53fadca647d6384cc129d2996d1133"},
-    {file = "coverage-7.6.1-cp311-cp311-win32.whl", hash = "sha256:1125ca0e5fd475cbbba3bb67ae20bd2c23a98fac4e32412883f9bcbaa81c314c"},
-    {file = "coverage-7.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:8ae539519c4c040c5ffd0632784e21b2f03fc1340752af711f33e5be83a9d6c6"},
-    {file = "coverage-7.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:95cae0efeb032af8458fc27d191f85d1717b1d4e49f7cb226cf526ff28179778"},
-    {file = "coverage-7.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5621a9175cf9d0b0c84c2ef2b12e9f5f5071357c4d2ea6ca1cf01814f45d2391"},
-    {file = "coverage-7.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:260933720fdcd75340e7dbe9060655aff3af1f0c5d20f46b57f262ab6c86a5e8"},
-    {file = "coverage-7.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07e2ca0ad381b91350c0ed49d52699b625aab2b44b65e1b4e02fa9df0e92ad2d"},
-    {file = "coverage-7.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44fee9975f04b33331cb8eb272827111efc8930cfd582e0320613263ca849ca"},
-    {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877abb17e6339d96bf08e7a622d05095e72b71f8afd8a9fefc82cf30ed944163"},
-    {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e0cadcf6733c09154b461f1ca72d5416635e5e4ec4e536192180d34ec160f8a"},
-    {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3c02d12f837d9683e5ab2f3d9844dc57655b92c74e286c262e0fc54213c216d"},
-    {file = "coverage-7.6.1-cp312-cp312-win32.whl", hash = "sha256:e05882b70b87a18d937ca6768ff33cc3f72847cbc4de4491c8e73880766718e5"},
-    {file = "coverage-7.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:b5d7b556859dd85f3a541db6a4e0167b86e7273e1cdc973e5b175166bb634fdb"},
-    {file = "coverage-7.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a4acd025ecc06185ba2b801f2de85546e0b8ac787cf9d3b06e7e2a69f925b106"},
-    {file = "coverage-7.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a6d3adcf24b624a7b778533480e32434a39ad8fa30c315208f6d3e5542aeb6e9"},
-    {file = "coverage-7.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0c212c49b6c10e6951362f7c6df3329f04c2b1c28499563d4035d964ab8e08c"},
-    {file = "coverage-7.6.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e81d7a3e58882450ec4186ca59a3f20a5d4440f25b1cff6f0902ad890e6748a"},
-    {file = "coverage-7.6.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78b260de9790fd81e69401c2dc8b17da47c8038176a79092a89cb2b7d945d060"},
-    {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a78d169acd38300060b28d600344a803628c3fd585c912cacc9ea8790fe96862"},
-    {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2c09f4ce52cb99dd7505cd0fc8e0e37c77b87f46bc9c1eb03fe3bc9991085388"},
-    {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6878ef48d4227aace338d88c48738a4258213cd7b74fd9a3d4d7582bb1d8a155"},
-    {file = "coverage-7.6.1-cp313-cp313-win32.whl", hash = "sha256:44df346d5215a8c0e360307d46ffaabe0f5d3502c8a1cefd700b34baf31d411a"},
-    {file = "coverage-7.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:8284cf8c0dd272a247bc154eb6c95548722dce90d098c17a883ed36e67cdb129"},
-    {file = "coverage-7.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d3296782ca4eab572a1a4eca686d8bfb00226300dcefdf43faa25b5242ab8a3e"},
-    {file = "coverage-7.6.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:502753043567491d3ff6d08629270127e0c31d4184c4c8d98f92c26f65019962"},
-    {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a89ecca80709d4076b95f89f308544ec8f7b4727e8a547913a35f16717856cb"},
-    {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a318d68e92e80af8b00fa99609796fdbcdfef3629c77c6283566c6f02c6d6704"},
-    {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13b0a73a0896988f053e4fbb7de6d93388e6dd292b0d87ee51d106f2c11b465b"},
-    {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4421712dbfc5562150f7554f13dde997a2e932a6b5f352edcce948a815efee6f"},
-    {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:166811d20dfea725e2e4baa71fffd6c968a958577848d2131f39b60043400223"},
-    {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:225667980479a17db1048cb2bf8bfb39b8e5be8f164b8f6628b64f78a72cf9d3"},
-    {file = "coverage-7.6.1-cp313-cp313t-win32.whl", hash = "sha256:170d444ab405852903b7d04ea9ae9b98f98ab6d7e63e1115e82620807519797f"},
-    {file = "coverage-7.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b9f222de8cded79c49bf184bdbc06630d4c58eec9459b939b4a690c82ed05657"},
-    {file = "coverage-7.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6db04803b6c7291985a761004e9060b2bca08da6d04f26a7f2294b8623a0c1a0"},
-    {file = "coverage-7.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f1adfc8ac319e1a348af294106bc6a8458a0f1633cc62a1446aebc30c5fa186a"},
-    {file = "coverage-7.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a95324a9de9650a729239daea117df21f4b9868ce32e63f8b650ebe6cef5595b"},
-    {file = "coverage-7.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b43c03669dc4618ec25270b06ecd3ee4fa94c7f9b3c14bae6571ca00ef98b0d3"},
-    {file = "coverage-7.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8929543a7192c13d177b770008bc4e8119f2e1f881d563fc6b6305d2d0ebe9de"},
-    {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:a09ece4a69cf399510c8ab25e0950d9cf2b42f7b3cb0374f95d2e2ff594478a6"},
-    {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9054a0754de38d9dbd01a46621636689124d666bad1936d76c0341f7d71bf569"},
-    {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0dbde0f4aa9a16fa4d754356a8f2e36296ff4d83994b2c9d8398aa32f222f989"},
-    {file = "coverage-7.6.1-cp38-cp38-win32.whl", hash = "sha256:da511e6ad4f7323ee5702e6633085fb76c2f893aaf8ce4c51a0ba4fc07580ea7"},
-    {file = "coverage-7.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:3f1156e3e8f2872197af3840d8ad307a9dd18e615dc64d9ee41696f287c57ad8"},
-    {file = "coverage-7.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abd5fd0db5f4dc9289408aaf34908072f805ff7792632250dcb36dc591d24255"},
-    {file = "coverage-7.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:547f45fa1a93154bd82050a7f3cddbc1a7a4dd2a9bf5cb7d06f4ae29fe94eaf8"},
-    {file = "coverage-7.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:645786266c8f18a931b65bfcefdbf6952dd0dea98feee39bd188607a9d307ed2"},
-    {file = "coverage-7.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e0b2df163b8ed01d515807af24f63de04bebcecbd6c3bfeff88385789fdf75a"},
-    {file = "coverage-7.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:609b06f178fe8e9f89ef676532760ec0b4deea15e9969bf754b37f7c40326dbc"},
-    {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:702855feff378050ae4f741045e19a32d57d19f3e0676d589df0575008ea5004"},
-    {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2bdb062ea438f22d99cba0d7829c2ef0af1d768d1e4a4f528087224c90b132cb"},
-    {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9c56863d44bd1c4fe2abb8a4d6f5371d197f1ac0ebdee542f07f35895fc07f36"},
-    {file = "coverage-7.6.1-cp39-cp39-win32.whl", hash = "sha256:6e2cd258d7d927d09493c8df1ce9174ad01b381d4729a9d8d4e38670ca24774c"},
-    {file = "coverage-7.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:06a737c882bd26d0d6ee7269b20b12f14a8704807a01056c80bb881a4b2ce6ca"},
-    {file = "coverage-7.6.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:e9a6e0eb86070e8ccaedfbd9d38fec54864f3125ab95419970575b42af7541df"},
-    {file = "coverage-7.6.1.tar.gz", hash = "sha256:953510dfb7b12ab69d20135a0662397f077c59b1e6379a768e97c59d852ee51d"},
+    {file = "coverage-7.6.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c9df1950fb92d49970cce38100d7e7293c84ed3606eaa16ea0b6bc27175bb667"},
+    {file = "coverage-7.6.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:24500f4b0e03aab60ce575c85365beab64b44d4db837021e08339f61d1fbfe52"},
+    {file = "coverage-7.6.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a663b180b6669c400b4630a24cc776f23a992d38ce7ae72ede2a397ce6b0f170"},
+    {file = "coverage-7.6.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfde025e2793a22efe8c21f807d276bd1d6a4bcc5ba6f19dbdfc4e7a12160909"},
+    {file = "coverage-7.6.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:087932079c065d7b8ebadd3a0160656c55954144af6439886c8bcf78bbbcde7f"},
+    {file = "coverage-7.6.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9c6b0c1cafd96213a0327cf680acb39f70e452caf8e9a25aeb05316db9c07f89"},
+    {file = "coverage-7.6.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6e85830eed5b5263ffa0c62428e43cb844296f3b4461f09e4bdb0d44ec190bc2"},
+    {file = "coverage-7.6.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:62ab4231c01e156ece1b3a187c87173f31cbeee83a5e1f6dff17f288dca93345"},
+    {file = "coverage-7.6.2-cp310-cp310-win32.whl", hash = "sha256:7b80fbb0da3aebde102a37ef0138aeedff45997e22f8962e5f16ae1742852676"},
+    {file = "coverage-7.6.2-cp310-cp310-win_amd64.whl", hash = "sha256:d20c3d1f31f14d6962a4e2f549c21d31e670b90f777ef4171be540fb7fb70f02"},
+    {file = "coverage-7.6.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bb21bac7783c1bf6f4bbe68b1e0ff0d20e7e7732cfb7995bc8d96e23aa90fc7b"},
+    {file = "coverage-7.6.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7b2e437fbd8fae5bc7716b9c7ff97aecc95f0b4d56e4ca08b3c8d8adcaadb84"},
+    {file = "coverage-7.6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:536f77f2bf5797983652d1d55f1a7272a29afcc89e3ae51caa99b2db4e89d658"},
+    {file = "coverage-7.6.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f361296ca7054f0936b02525646b2731b32c8074ba6defab524b79b2b7eeac72"},
+    {file = "coverage-7.6.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7926d8d034e06b479797c199747dd774d5e86179f2ce44294423327a88d66ca7"},
+    {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0bbae11c138585c89fb4e991faefb174a80112e1a7557d507aaa07675c62e66b"},
+    {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fcad7d5d2bbfeae1026b395036a8aa5abf67e8038ae7e6a25c7d0f88b10a8e6a"},
+    {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f01e53575f27097d75d42de33b1b289c74b16891ce576d767ad8c48d17aeb5e0"},
+    {file = "coverage-7.6.2-cp311-cp311-win32.whl", hash = "sha256:7781f4f70c9b0b39e1b129b10c7d43a4e0c91f90c60435e6da8288efc2b73438"},
+    {file = "coverage-7.6.2-cp311-cp311-win_amd64.whl", hash = "sha256:9bcd51eeca35a80e76dc5794a9dd7cb04b97f0e8af620d54711793bfc1fbba4b"},
+    {file = "coverage-7.6.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ebc94fadbd4a3f4215993326a6a00e47d79889391f5659bf310f55fe5d9f581c"},
+    {file = "coverage-7.6.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9681516288e3dcf0aa7c26231178cc0be6cac9705cac06709f2353c5b406cfea"},
+    {file = "coverage-7.6.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d9c5d13927d77af4fbe453953810db766f75401e764727e73a6ee4f82527b3e"},
+    {file = "coverage-7.6.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92f9ca04b3e719d69b02dc4a69debb795af84cb7afd09c5eb5d54b4a1ae2191"},
+    {file = "coverage-7.6.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ff2ef83d6d0b527b5c9dad73819b24a2f76fdddcfd6c4e7a4d7e73ecb0656b4"},
+    {file = "coverage-7.6.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:47ccb6e99a3031ffbbd6e7cc041e70770b4fe405370c66a54dbf26a500ded80b"},
+    {file = "coverage-7.6.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a867d26f06bcd047ef716175b2696b315cb7571ccb951006d61ca80bbc356e9e"},
+    {file = "coverage-7.6.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cdfcf2e914e2ba653101157458afd0ad92a16731eeba9a611b5cbb3e7124e74b"},
+    {file = "coverage-7.6.2-cp312-cp312-win32.whl", hash = "sha256:f9035695dadfb397bee9eeaf1dc7fbeda483bf7664a7397a629846800ce6e276"},
+    {file = "coverage-7.6.2-cp312-cp312-win_amd64.whl", hash = "sha256:5ed69befa9a9fc796fe015a7040c9398722d6b97df73a6b608e9e275fa0932b0"},
+    {file = "coverage-7.6.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eea60c79d36a8f39475b1af887663bc3ae4f31289cd216f514ce18d5938df40"},
+    {file = "coverage-7.6.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa68a6cdbe1bc6793a9dbfc38302c11599bbe1837392ae9b1d238b9ef3dafcf1"},
+    {file = "coverage-7.6.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ec528ae69f0a139690fad6deac8a7d33629fa61ccce693fdd07ddf7e9931fba"},
+    {file = "coverage-7.6.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed5ac02126f74d190fa2cc14a9eb2a5d9837d5863920fa472b02eb1595cdc925"},
+    {file = "coverage-7.6.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21c0ea0d4db8a36b275cb6fb2437a3715697a4ba3cb7b918d3525cc75f726304"},
+    {file = "coverage-7.6.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:35a51598f29b2a19e26d0908bd196f771a9b1c5d9a07bf20be0adf28f1ad4f77"},
+    {file = "coverage-7.6.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c9192925acc33e146864b8cf037e2ed32a91fdf7644ae875f5d46cd2ef086a5f"},
+    {file = "coverage-7.6.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bf4eeecc9e10f5403ec06138978235af79c9a79af494eb6b1d60a50b49ed2869"},
+    {file = "coverage-7.6.2-cp313-cp313-win32.whl", hash = "sha256:e4ee15b267d2dad3e8759ca441ad450c334f3733304c55210c2a44516e8d5530"},
+    {file = "coverage-7.6.2-cp313-cp313-win_amd64.whl", hash = "sha256:c71965d1ced48bf97aab79fad56df82c566b4c498ffc09c2094605727c4b7e36"},
+    {file = "coverage-7.6.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7571e8bbecc6ac066256f9de40365ff833553e2e0c0c004f4482facb131820ef"},
+    {file = "coverage-7.6.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:078a87519057dacb5d77e333f740708ec2a8f768655f1db07f8dfd28d7a005f0"},
+    {file = "coverage-7.6.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e5e92e3e84a8718d2de36cd8387459cba9a4508337b8c5f450ce42b87a9e760"},
+    {file = "coverage-7.6.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ebabdf1c76593a09ee18c1a06cd3022919861365219ea3aca0247ededf6facd6"},
+    {file = "coverage-7.6.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12179eb0575b8900912711688e45474f04ab3934aaa7b624dea7b3c511ecc90f"},
+    {file = "coverage-7.6.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:39d3b964abfe1519b9d313ab28abf1d02faea26cd14b27f5283849bf59479ff5"},
+    {file = "coverage-7.6.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:84c4315577f7cd511d6250ffd0f695c825efe729f4205c0340f7004eda51191f"},
+    {file = "coverage-7.6.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ff797320dcbff57caa6b2301c3913784a010e13b1f6cf4ab3f563f3c5e7919db"},
+    {file = "coverage-7.6.2-cp313-cp313t-win32.whl", hash = "sha256:2b636a301e53964550e2f3094484fa5a96e699db318d65398cfba438c5c92171"},
+    {file = "coverage-7.6.2-cp313-cp313t-win_amd64.whl", hash = "sha256:d03a060ac1a08e10589c27d509bbdb35b65f2d7f3f8d81cf2fa199877c7bc58a"},
+    {file = "coverage-7.6.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c37faddc8acd826cfc5e2392531aba734b229741d3daec7f4c777a8f0d4993e5"},
+    {file = "coverage-7.6.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab31fdd643f162c467cfe6a86e9cb5f1965b632e5e65c072d90854ff486d02cf"},
+    {file = "coverage-7.6.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97df87e1a20deb75ac7d920c812e9326096aa00a9a4b6d07679b4f1f14b06c90"},
+    {file = "coverage-7.6.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:343056c5e0737487a5291f5691f4dfeb25b3e3c8699b4d36b92bb0e586219d14"},
+    {file = "coverage-7.6.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad4ef1c56b47b6b9024b939d503ab487231df1f722065a48f4fc61832130b90e"},
+    {file = "coverage-7.6.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7fca4a92c8a7a73dee6946471bce6d1443d94155694b893b79e19ca2a540d86e"},
+    {file = "coverage-7.6.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69f251804e052fc46d29d0e7348cdc5fcbfc4861dc4a1ebedef7e78d241ad39e"},
+    {file = "coverage-7.6.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e8ea055b3ea046c0f66217af65bc193bbbeca1c8661dc5fd42698db5795d2627"},
+    {file = "coverage-7.6.2-cp39-cp39-win32.whl", hash = "sha256:6c2ba1e0c24d8fae8f2cf0aeb2fc0a2a7f69b6d20bd8d3749fd6b36ecef5edf0"},
+    {file = "coverage-7.6.2-cp39-cp39-win_amd64.whl", hash = "sha256:2186369a654a15628e9c1c9921409a6b3eda833e4b91f3ca2a7d9f77abb4987c"},
+    {file = "coverage-7.6.2-pp39.pp310-none-any.whl", hash = "sha256:667952739daafe9616db19fbedbdb87917eee253ac4f31d70c7587f7ab531b4e"},
+    {file = "coverage-7.6.2.tar.gz", hash = "sha256:a5f81e68aa62bc0cfca04f7b19eaa8f9c826b53fc82ab9e2121976dc74f131f3"},
 ]
 
 [package.dependencies]
@@ -996,13 +1001,13 @@ files = [
 
 [[package]]
 name = "datasets"
-version = "3.0.0"
+version = "3.0.1"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "datasets-3.0.0-py3-none-any.whl", hash = "sha256:c23fefb6c953dcb1cd5f6deb6c502729c733ef98791e0c3f2d80c7ca2d9a01dd"},
-    {file = "datasets-3.0.0.tar.gz", hash = "sha256:592317eb137f0fc5aac068ff283ba13c3c66d10c9c034d44bc8aa584126cf3e2"},
+    {file = "datasets-3.0.1-py3-none-any.whl", hash = "sha256:db080aab41c8cc68645117a0f172e5c6789cbc672f066de0aa5a08fc3eebc686"},
+    {file = "datasets-3.0.1.tar.gz", hash = "sha256:40d63b09e76a3066c32e746d6fdc36fd3f29ed2acd49bf5b1a2100da32936511"},
 ]
 
 [package.dependencies]
@@ -1024,47 +1029,51 @@ xxhash = "*"
 [package.extras]
 audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "transformers (>=4.42.0)", "zstandard"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "torchdata", "transformers", "transformers (>=4.42.0)", "zstandard"]
 docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
 jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 quality = ["ruff (>=0.3.0)"]
 s3 = ["s3fs"]
 tensorflow = ["tensorflow (>=2.6.0)"]
 tensorflow-gpu = ["tensorflow (>=2.6.0)"]
-tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers (>=4.42.0)", "zstandard"]
-tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "transformers (>=4.42.0)", "zstandard"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
+tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
 torch = ["torch"]
 vision = ["Pillow (>=9.4.0)"]
 
 [[package]]
 name = "debugpy"
-version = "1.8.6"
+version = "1.8.7"
 description = "An implementation of the Debug Adapter Protocol for Python"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "debugpy-1.8.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:30f467c5345d9dfdcc0afdb10e018e47f092e383447500f125b4e013236bf14b"},
-    {file = "debugpy-1.8.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d73d8c52614432f4215d0fe79a7e595d0dd162b5c15233762565be2f014803b"},
-    {file = "debugpy-1.8.6-cp310-cp310-win32.whl", hash = "sha256:e3e182cd98eac20ee23a00653503315085b29ab44ed66269482349d307b08df9"},
-    {file = "debugpy-1.8.6-cp310-cp310-win_amd64.whl", hash = "sha256:e3a82da039cfe717b6fb1886cbbe5c4a3f15d7df4765af857f4307585121c2dd"},
-    {file = "debugpy-1.8.6-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:67479a94cf5fd2c2d88f9615e087fcb4fec169ec780464a3f2ba4a9a2bb79955"},
-    {file = "debugpy-1.8.6-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb8653f6cbf1dd0a305ac1aa66ec246002145074ea57933978346ea5afdf70b"},
-    {file = "debugpy-1.8.6-cp311-cp311-win32.whl", hash = "sha256:cdaf0b9691879da2d13fa39b61c01887c34558d1ff6e5c30e2eb698f5384cd43"},
-    {file = "debugpy-1.8.6-cp311-cp311-win_amd64.whl", hash = "sha256:43996632bee7435583952155c06881074b9a742a86cee74e701d87ca532fe833"},
-    {file = "debugpy-1.8.6-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:db891b141fc6ee4b5fc6d1cc8035ec329cabc64bdd2ae672b4550c87d4ecb128"},
-    {file = "debugpy-1.8.6-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:567419081ff67da766c898ccf21e79f1adad0e321381b0dfc7a9c8f7a9347972"},
-    {file = "debugpy-1.8.6-cp312-cp312-win32.whl", hash = "sha256:c9834dfd701a1f6bf0f7f0b8b1573970ae99ebbeee68314116e0ccc5c78eea3c"},
-    {file = "debugpy-1.8.6-cp312-cp312-win_amd64.whl", hash = "sha256:e4ce0570aa4aca87137890d23b86faeadf184924ad892d20c54237bcaab75d8f"},
-    {file = "debugpy-1.8.6-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:df5dc9eb4ca050273b8e374a4cd967c43be1327eeb42bfe2f58b3cdfe7c68dcb"},
-    {file = "debugpy-1.8.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a85707c6a84b0c5b3db92a2df685b5230dd8fb8c108298ba4f11dba157a615a"},
-    {file = "debugpy-1.8.6-cp38-cp38-win32.whl", hash = "sha256:538c6cdcdcdad310bbefd96d7850be1cd46e703079cc9e67d42a9ca776cdc8a8"},
-    {file = "debugpy-1.8.6-cp38-cp38-win_amd64.whl", hash = "sha256:22140bc02c66cda6053b6eb56dfe01bbe22a4447846581ba1dd6df2c9f97982d"},
-    {file = "debugpy-1.8.6-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:c1cef65cffbc96e7b392d9178dbfd524ab0750da6c0023c027ddcac968fd1caa"},
-    {file = "debugpy-1.8.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1e60bd06bb3cc5c0e957df748d1fab501e01416c43a7bdc756d2a992ea1b881"},
-    {file = "debugpy-1.8.6-cp39-cp39-win32.whl", hash = "sha256:f7158252803d0752ed5398d291dee4c553bb12d14547c0e1843ab74ee9c31123"},
-    {file = "debugpy-1.8.6-cp39-cp39-win_amd64.whl", hash = "sha256:3358aa619a073b620cd0d51d8a6176590af24abcc3fe2e479929a154bf591b51"},
-    {file = "debugpy-1.8.6-py2.py3-none-any.whl", hash = "sha256:b48892df4d810eff21d3ef37274f4c60d32cdcafc462ad5647239036b0f0649f"},
-    {file = "debugpy-1.8.6.zip", hash = "sha256:c931a9371a86784cee25dec8d65bc2dc7a21f3f1552e3833d9ef8f919d22280a"},
+    {file = "debugpy-1.8.7-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:95fe04a573b8b22896c404365e03f4eda0ce0ba135b7667a1e57bd079793b96b"},
+    {file = "debugpy-1.8.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:628a11f4b295ffb4141d8242a9bb52b77ad4a63a2ad19217a93be0f77f2c28c9"},
+    {file = "debugpy-1.8.7-cp310-cp310-win32.whl", hash = "sha256:85ce9c1d0eebf622f86cc68618ad64bf66c4fc3197d88f74bb695a416837dd55"},
+    {file = "debugpy-1.8.7-cp310-cp310-win_amd64.whl", hash = "sha256:29e1571c276d643757ea126d014abda081eb5ea4c851628b33de0c2b6245b037"},
+    {file = "debugpy-1.8.7-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:caf528ff9e7308b74a1749c183d6808ffbedbb9fb6af78b033c28974d9b8831f"},
+    {file = "debugpy-1.8.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cba1d078cf2e1e0b8402e6bda528bf8fda7ccd158c3dba6c012b7897747c41a0"},
+    {file = "debugpy-1.8.7-cp311-cp311-win32.whl", hash = "sha256:171899588bcd412151e593bd40d9907133a7622cd6ecdbdb75f89d1551df13c2"},
+    {file = "debugpy-1.8.7-cp311-cp311-win_amd64.whl", hash = "sha256:6e1c4ffb0c79f66e89dfd97944f335880f0d50ad29525dc792785384923e2211"},
+    {file = "debugpy-1.8.7-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:4d27d842311353ede0ad572600c62e4bcd74f458ee01ab0dd3a1a4457e7e3706"},
+    {file = "debugpy-1.8.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:703c1fd62ae0356e194f3e7b7a92acd931f71fe81c4b3be2c17a7b8a4b546ec2"},
+    {file = "debugpy-1.8.7-cp312-cp312-win32.whl", hash = "sha256:2f729228430ef191c1e4df72a75ac94e9bf77413ce5f3f900018712c9da0aaca"},
+    {file = "debugpy-1.8.7-cp312-cp312-win_amd64.whl", hash = "sha256:45c30aaefb3e1975e8a0258f5bbd26cd40cde9bfe71e9e5a7ac82e79bad64e39"},
+    {file = "debugpy-1.8.7-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:d050a1ec7e925f514f0f6594a1e522580317da31fbda1af71d1530d6ea1f2b40"},
+    {file = "debugpy-1.8.7-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f4349a28e3228a42958f8ddaa6333d6f8282d5edaea456070e48609c5983b7"},
+    {file = "debugpy-1.8.7-cp313-cp313-win32.whl", hash = "sha256:11ad72eb9ddb436afb8337891a986302e14944f0f755fd94e90d0d71e9100bba"},
+    {file = "debugpy-1.8.7-cp313-cp313-win_amd64.whl", hash = "sha256:2efb84d6789352d7950b03d7f866e6d180284bc02c7e12cb37b489b7083d81aa"},
+    {file = "debugpy-1.8.7-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:4b908291a1d051ef3331484de8e959ef3e66f12b5e610c203b5b75d2725613a7"},
+    {file = "debugpy-1.8.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da8df5b89a41f1fd31503b179d0a84a5fdb752dddd5b5388dbd1ae23cda31ce9"},
+    {file = "debugpy-1.8.7-cp38-cp38-win32.whl", hash = "sha256:b12515e04720e9e5c2216cc7086d0edadf25d7ab7e3564ec8b4521cf111b4f8c"},
+    {file = "debugpy-1.8.7-cp38-cp38-win_amd64.whl", hash = "sha256:93176e7672551cb5281577cdb62c63aadc87ec036f0c6a486f0ded337c504596"},
+    {file = "debugpy-1.8.7-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:90d93e4f2db442f8222dec5ec55ccfc8005821028982f1968ebf551d32b28907"},
+    {file = "debugpy-1.8.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6db2a370e2700557a976eaadb16243ec9c91bd46f1b3bb15376d7aaa7632c81"},
+    {file = "debugpy-1.8.7-cp39-cp39-win32.whl", hash = "sha256:a6cf2510740e0c0b4a40330640e4b454f928c7b99b0c9dbf48b11efba08a8cda"},
+    {file = "debugpy-1.8.7-cp39-cp39-win_amd64.whl", hash = "sha256:6a9d9d6d31846d8e34f52987ee0f1a904c7baa4912bf4843ab39dadf9b8f3e0d"},
+    {file = "debugpy-1.8.7-py2.py3-none-any.whl", hash = "sha256:57b00de1c8d2c84a61b90880f7e5b6deaf4c312ecbde3a0e8912f2a56c4ac9ae"},
+    {file = "debugpy-1.8.7.zip", hash = "sha256:18b8f731ed3e2e1df8e9cdaa23fb1fc9c24e570cd0081625308ec51c82efe42e"},
 ]
 
 [[package]]
@@ -1154,13 +1163,13 @@ profile = ["gprof2dot (>=2022.7.29)"]
 
 [[package]]
 name = "distlib"
-version = "0.3.8"
+version = "0.3.9"
 description = "Distribution utilities"
 optional = true
 python-versions = "*"
 files = [
-    {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"},
-    {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"},
+    {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"},
+    {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"},
 ]
 
 [[package]]
@@ -1800,7 +1809,7 @@ pyarrow = ">=12.0.0"
 type = "git"
 url = "https://github.com/dora-rs/dora-lerobot.git"
 reference = "HEAD"
-resolved_reference = "3a5dbc4d36d9973439470128f309ef7511d23465"
+resolved_reference = "7844fbdb97d467a4672be3eb102ebca96211e95b"
 subdirectory = "gym_dora"
 
 [[package]]
@@ -1848,13 +1857,13 @@ test = ["pytest (>=8.1.0)", "pytest-cov (>=5.0.0)"]
 
 [[package]]
 name = "gymnasium"
-version = "0.29.1"
+version = "1.0.0"
 description = "A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "gymnasium-0.29.1-py3-none-any.whl", hash = "sha256:61c3384b5575985bb7f85e43213bcb40f36fcdff388cae6bc229304c71f2843e"},
-    {file = "gymnasium-0.29.1.tar.gz", hash = "sha256:1a532752efcb7590478b1cc7aa04f608eb7a2fdad5570cd217b66b6a35274bb1"},
+    {file = "gymnasium-1.0.0-py3-none-any.whl", hash = "sha256:b6f40e1e24c5bd419361e1a5b86a9117d2499baecc3a660d44dfff4c465393ad"},
+    {file = "gymnasium-1.0.0.tar.gz", hash = "sha256:9d2b66f30c1b34fe3c2ce7fae65ecf365d0e9982d2b3d860235e773328a3b403"},
 ]
 
 [package.dependencies]
@@ -1864,34 +1873,34 @@ numpy = ">=1.21.0"
 typing-extensions = ">=4.3.0"
 
 [package.extras]
-accept-rom-license = ["autorom[accept-rom-license] (>=0.4.2,<0.5.0)"]
-all = ["box2d-py (==2.3.5)", "cython (<3)", "imageio (>=2.14.1)", "jax (>=0.4.0)", "jaxlib (>=0.4.0)", "lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (>=2.3.3)", "mujoco-py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (>=2.1.3)", "shimmy[atari] (>=0.1.0,<1.0)", "swig (==4.*)", "torch (>=1.0.0)"]
-atari = ["shimmy[atari] (>=0.1.0,<1.0)"]
+all = ["ale-py (>=0.9)", "box2d-py (==2.3.5)", "cython (<3)", "flax (>=0.5.0)", "imageio (>=2.14.1)", "jax (>=0.4.0)", "jaxlib (>=0.4.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (>=2.1.5)", "mujoco-py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (>=2.1.3)", "swig (==4.*)", "torch (>=1.0.0)"]
+atari = ["ale-py (>=0.9)"]
 box2d = ["box2d-py (==2.3.5)", "pygame (>=2.1.3)", "swig (==4.*)"]
 classic-control = ["pygame (>=2.1.3)", "pygame (>=2.1.3)"]
-jax = ["jax (>=0.4.0)", "jaxlib (>=0.4.0)"]
-mujoco = ["imageio (>=2.14.1)", "mujoco (>=2.3.3)"]
+jax = ["flax (>=0.5.0)", "jax (>=0.4.0)", "jaxlib (>=0.4.0)"]
+mujoco = ["imageio (>=2.14.1)", "mujoco (>=2.1.5)"]
 mujoco-py = ["cython (<3)", "cython (<3)", "mujoco-py (>=2.1,<2.2)", "mujoco-py (>=2.1,<2.2)"]
-other = ["lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "opencv-python (>=3.0)", "torch (>=1.0.0)"]
-testing = ["pytest (==7.1.3)", "scipy (>=1.7.3)"]
+other = ["matplotlib (>=3.0)", "moviepy (>=1.0.0)", "opencv-python (>=3.0)"]
+testing = ["dill (>=0.3.7)", "pytest (==7.1.3)", "scipy (>=1.7.3)"]
+torch = ["torch (>=1.0.0)"]
 toy-text = ["pygame (>=2.1.3)", "pygame (>=2.1.3)"]
 
 [[package]]
 name = "gymnasium-robotics"
-version = "1.2.4"
+version = "1.3.0"
 description = "Robotics environments for the Gymnasium repo."
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "gymnasium-robotics-1.2.4.tar.gz", hash = "sha256:d304192b066f8b800599dfbe3d9d90bba9b761ee884472bdc4d05968a8bc61cb"},
-    {file = "gymnasium_robotics-1.2.4-py3-none-any.whl", hash = "sha256:c2cb23e087ca0280ae6802837eb7b3a6d14e5bd24c00803ab09f015fcff3eef5"},
+    {file = "gymnasium_robotics-1.3.0-py3-none-any.whl", hash = "sha256:d514a291e58e2d0197fd0bafb973685be7a43e40c7d464b5ef842462f1839fd7"},
+    {file = "gymnasium_robotics-1.3.0.tar.gz", hash = "sha256:b3198b1e31ea86ca674434a5643bcf354129cb93f512f1b2b57e18274c22c050"},
 ]
 
 [package.dependencies]
-gymnasium = ">=0.26"
+gymnasium = ">=1.0.0"
 imageio = "*"
 Jinja2 = ">=3.0.3"
-mujoco = ">=2.3.3,<3.0"
+mujoco = ">=2.2.0,<3.2.0"
 numpy = ">=1.21.0"
 PettingZoo = ">=1.23.0"
 
@@ -1912,36 +1921,41 @@ files = [
 
 [[package]]
 name = "h5py"
-version = "3.11.0"
+version = "3.12.1"
 description = "Read and write HDF5 files from Python"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "h5py-3.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1625fd24ad6cfc9c1ccd44a66dac2396e7ee74940776792772819fc69f3a3731"},
-    {file = "h5py-3.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c072655ad1d5fe9ef462445d3e77a8166cbfa5e599045f8aa3c19b75315f10e5"},
-    {file = "h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77b19a40788e3e362b54af4dcf9e6fde59ca016db2c61360aa30b47c7b7cef00"},
-    {file = "h5py-3.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:ef4e2f338fc763f50a8113890f455e1a70acd42a4d083370ceb80c463d803972"},
-    {file = "h5py-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbd732a08187a9e2a6ecf9e8af713f1d68256ee0f7c8b652a32795670fb481ba"},
-    {file = "h5py-3.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75bd7b3d93fbeee40860fd70cdc88df4464e06b70a5ad9ce1446f5f32eb84007"},
-    {file = "h5py-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52c416f8eb0daae39dabe71415cb531f95dce2d81e1f61a74537a50c63b28ab3"},
-    {file = "h5py-3.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:083e0329ae534a264940d6513f47f5ada617da536d8dccbafc3026aefc33c90e"},
-    {file = "h5py-3.11.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a76cae64080210389a571c7d13c94a1a6cf8cb75153044fd1f822a962c97aeab"},
-    {file = "h5py-3.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3736fe21da2b7d8a13fe8fe415f1272d2a1ccdeff4849c1421d2fb30fd533bc"},
-    {file = "h5py-3.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa6ae84a14103e8dc19266ef4c3e5d7c00b68f21d07f2966f0ca7bdb6c2761fb"},
-    {file = "h5py-3.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:21dbdc5343f53b2e25404673c4f00a3335aef25521bd5fa8c707ec3833934892"},
-    {file = "h5py-3.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:754c0c2e373d13d6309f408325343b642eb0f40f1a6ad21779cfa9502209e150"},
-    {file = "h5py-3.11.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:731839240c59ba219d4cb3bc5880d438248533366f102402cfa0621b71796b62"},
-    {file = "h5py-3.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ec9df3dd2018904c4cc06331951e274f3f3fd091e6d6cc350aaa90fa9b42a76"},
-    {file = "h5py-3.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:55106b04e2c83dfb73dc8732e9abad69d83a436b5b82b773481d95d17b9685e1"},
-    {file = "h5py-3.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f4e025e852754ca833401777c25888acb96889ee2c27e7e629a19aee288833f0"},
-    {file = "h5py-3.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c4b760082626120031d7902cd983d8c1f424cdba2809f1067511ef283629d4b"},
-    {file = "h5py-3.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67462d0669f8f5459529de179f7771bd697389fcb3faab54d63bf788599a48ea"},
-    {file = "h5py-3.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:d9c944d364688f827dc889cf83f1fca311caf4fa50b19f009d1f2b525edd33a3"},
-    {file = "h5py-3.11.0.tar.gz", hash = "sha256:7b7e8f78072a2edec87c9836f25f34203fd492a4475709a18b417a33cfb21fa9"},
+    {file = "h5py-3.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f0f1a382cbf494679c07b4371f90c70391dedb027d517ac94fa2c05299dacda"},
+    {file = "h5py-3.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cb65f619dfbdd15e662423e8d257780f9a66677eae5b4b3fc9dca70b5fd2d2a3"},
+    {file = "h5py-3.12.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b15d8dbd912c97541312c0e07438864d27dbca857c5ad634de68110c6beb1c2"},
+    {file = "h5py-3.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59685fe40d8c1fbbee088c88cd4da415a2f8bee5c270337dc5a1c4aa634e3307"},
+    {file = "h5py-3.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:577d618d6b6dea3da07d13cc903ef9634cde5596b13e832476dd861aaf651f3e"},
+    {file = "h5py-3.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ccd9006d92232727d23f784795191bfd02294a4f2ba68708825cb1da39511a93"},
+    {file = "h5py-3.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ad8a76557880aed5234cfe7279805f4ab5ce16b17954606cca90d578d3e713ef"},
+    {file = "h5py-3.12.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1473348139b885393125126258ae2d70753ef7e9cec8e7848434f385ae72069e"},
+    {file = "h5py-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:018a4597f35092ae3fb28ee851fdc756d2b88c96336b8480e124ce1ac6fb9166"},
+    {file = "h5py-3.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:3fdf95092d60e8130ba6ae0ef7a9bd4ade8edbe3569c13ebbaf39baefffc5ba4"},
+    {file = "h5py-3.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:06a903a4e4e9e3ebbc8b548959c3c2552ca2d70dac14fcfa650d9261c66939ed"},
+    {file = "h5py-3.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b3b8f3b48717e46c6a790e3128d39c61ab595ae0a7237f06dfad6a3b51d5351"},
+    {file = "h5py-3.12.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:050a4f2c9126054515169c49cb900949814987f0c7ae74c341b0c9f9b5056834"},
+    {file = "h5py-3.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c4b41d1019322a5afc5082864dfd6359f8935ecd37c11ac0029be78c5d112c9"},
+    {file = "h5py-3.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:e4d51919110a030913201422fb07987db4338eba5ec8c5a15d6fab8e03d443fc"},
+    {file = "h5py-3.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:513171e90ed92236fc2ca363ce7a2fc6f2827375efcbb0cc7fbdd7fe11fecafc"},
+    {file = "h5py-3.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:59400f88343b79655a242068a9c900001a34b63e3afb040bd7cdf717e440f653"},
+    {file = "h5py-3.12.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3e465aee0ec353949f0f46bf6c6f9790a2006af896cee7c178a8c3e5090aa32"},
+    {file = "h5py-3.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba51c0c5e029bb5420a343586ff79d56e7455d496d18a30309616fdbeed1068f"},
+    {file = "h5py-3.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:52ab036c6c97055b85b2a242cb540ff9590bacfda0c03dd0cf0661b311f522f8"},
+    {file = "h5py-3.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d2b8dd64f127d8b324f5d2cd1c0fd6f68af69084e9e47d27efeb9e28e685af3e"},
+    {file = "h5py-3.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4532c7e97fbef3d029735db8b6f5bf01222d9ece41e309b20d63cfaae2fb5c4d"},
+    {file = "h5py-3.12.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fdf6d7936fa824acfa27305fe2d9f39968e539d831c5bae0e0d83ed521ad1ac"},
+    {file = "h5py-3.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84342bffd1f82d4f036433e7039e241a243531a1d3acd7341b35ae58cdab05bf"},
+    {file = "h5py-3.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:62be1fc0ef195891949b2c627ec06bc8e837ff62d5b911b6e42e38e0f20a897d"},
+    {file = "h5py-3.12.1.tar.gz", hash = "sha256:326d70b53d31baa61f00b8aa5f95c2fcb9621a3ee8365d770c551a13dbbcbfdf"},
 ]
 
 [package.dependencies]
-numpy = ">=1.17.3"
+numpy = ">=1.19.3"
 
 [[package]]
 name = "hello-robot-stretch-body"
@@ -2074,13 +2088,13 @@ files = [
 
 [[package]]
 name = "hello-robot-stretch-urdf"
-version = "0.0.29"
+version = "0.1.0"
 description = "Stretch URDF"
 optional = true
 python-versions = "*"
 files = [
-    {file = "hello-robot-stretch-urdf-0.0.29.tar.gz", hash = "sha256:6ae05263c0ca4b817f57ff41feaf149c8284ebd1aa511b65bd230e6ab6d39bdc"},
-    {file = "hello_robot_stretch_urdf-0.0.29-py3-none-any.whl", hash = "sha256:d33fb4cdea14b508ee56d177084cbd157f7fbc25c4048cfd00b465e94a72a2e5"},
+    {file = "hello_robot_stretch_urdf-0.1.0-py3-none-any.whl", hash = "sha256:324f5ce0834b45b343e84bb8e8f5cbdd02f1315c6954856f0c68badb2b03e026"},
+    {file = "hello_robot_stretch_urdf-0.1.0.tar.gz", hash = "sha256:51ed5984dbb6538e9f7cdc573b8a4a283118a13faaa06dc773c9bdda8bfe1034"},
 ]
 
 [package.dependencies]
@@ -2152,13 +2166,13 @@ files = [
 
 [[package]]
 name = "httpcore"
-version = "1.0.5"
+version = "1.0.6"
 description = "A minimal low-level HTTP client."
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
-    {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
+    {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"},
+    {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"},
 ]
 
 [package.dependencies]
@@ -2169,7 +2183,7 @@ h11 = ">=0.13,<0.15"
 asyncio = ["anyio (>=4.0,<5.0)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.26.0)"]
+trio = ["trio (>=0.22.0,<1.0)"]
 
 [[package]]
 name = "httpx"
@@ -2198,13 +2212,13 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "huggingface-hub"
-version = "0.25.1"
+version = "0.25.2"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.25.1-py3-none-any.whl", hash = "sha256:a5158ded931b3188f54ea9028097312cb0acd50bffaaa2612014c3c526b44972"},
-    {file = "huggingface_hub-0.25.1.tar.gz", hash = "sha256:9ff7cb327343211fbd06e2b149b8f362fd1e389454f3f14c6db75a4999ee20ff"},
+    {file = "huggingface_hub-0.25.2-py3-none-any.whl", hash = "sha256:1897caf88ce7f97fe0110603d8f66ac264e3ba6accdf30cd66cc0fed5282ad25"},
+    {file = "huggingface_hub-0.25.2.tar.gz", hash = "sha256:a1014ea111a5f40ccd23f7f7ba8ac46e20fa3b658ced1f86a00c75c06ec6423c"},
 ]
 
 [package.dependencies]
@@ -2478,13 +2492,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio
 
 [[package]]
 name = "ipython"
-version = "8.27.0"
+version = "8.28.0"
 description = "IPython: Productive Interactive Computing"
 optional = true
 python-versions = ">=3.10"
 files = [
-    {file = "ipython-8.27.0-py3-none-any.whl", hash = "sha256:f68b3cb8bde357a5d7adc9598d57e22a45dfbea19eb6b98286fa3b288c9cd55c"},
-    {file = "ipython-8.27.0.tar.gz", hash = "sha256:0b99a2dc9f15fd68692e898e5568725c6d49c527d36a9fb5960ffbdeaa82ff7e"},
+    {file = "ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35"},
+    {file = "ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a"},
 ]
 
 [package.dependencies]
@@ -2648,13 +2662,13 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-
 
 [[package]]
 name = "jsonschema-specifications"
-version = "2023.12.1"
+version = "2024.10.1"
 description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
 optional = true
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"},
-    {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"},
+    {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"},
+    {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"},
 ]
 
 [package.dependencies]
@@ -3315,71 +3329,72 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
 
 [[package]]
 name = "markupsafe"
-version = "2.1.5"
+version = "3.0.1"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
 files = [
-    {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"},
-    {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"},
-    {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"},
-    {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"},
-    {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"},
-    {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"},
-    {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"},
-    {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-win32.whl", hash = "sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97"},
+    {file = "MarkupSafe-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-win32.whl", hash = "sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635"},
+    {file = "MarkupSafe-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-win32.whl", hash = "sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa"},
+    {file = "MarkupSafe-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-win32.whl", hash = "sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-win32.whl", hash = "sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b"},
+    {file = "MarkupSafe-3.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-win32.whl", hash = "sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8"},
+    {file = "MarkupSafe-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b"},
+    {file = "markupsafe-3.0.1.tar.gz", hash = "sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344"},
 ]
 
 [[package]]
@@ -3777,20 +3792,21 @@ files = [
 
 [[package]]
 name = "networkx"
-version = "3.3"
+version = "3.4"
 description = "Python package for creating and manipulating graphs and networks"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
-    {file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"},
+    {file = "networkx-3.4-py3-none-any.whl", hash = "sha256:46dad0ec74a825a968e2b36c37ef5b91faa3868f017b2283d9cbff33112222ce"},
+    {file = "networkx-3.4.tar.gz", hash = "sha256:1269b90f8f0d3a4095f016f49650f35ac169729f49b69d0572b2bb142748162b"},
 ]
 
 [package.extras]
-default = ["matplotlib (>=3.6)", "numpy (>=1.23)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"]
+default = ["matplotlib (>=3.7)", "numpy (>=1.24)", "pandas (>=2.0)", "scipy (>=1.10,!=1.11.0,!=1.11.1)"]
 developer = ["changelist (==0.5)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"]
-doc = ["myst-nb (>=1.0)", "numpydoc (>=1.7)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"]
-extra = ["lxml (>=4.6)", "pydot (>=2.0)", "pygraphviz (>=1.12)", "sympy (>=1.10)"]
+doc = ["intersphinx-registry", "myst-nb (>=1.1)", "numpydoc (>=1.8.0)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.15)", "sphinx (>=7.3)", "sphinx-gallery (>=0.16)", "texext (>=0.6.7)"]
+example = ["cairocffi (>=1.7)", "contextily (>=1.6)", "igraph (>=0.11)", "momepy (>=0.7.2)", "osmnx (>=1.9)", "scikit-learn (>=1.5)", "seaborn (>=0.13)"]
+extra = ["lxml (>=4.6)", "pydot (>=3.0.1)", "pygraphviz (>=1.14)", "sympy (>=1.10)"]
 test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
 
 [[package]]
@@ -3892,31 +3908,35 @@ numpy = ">=1.22,<2.1"
 
 [[package]]
 name = "numcodecs"
-version = "0.13.0"
+version = "0.13.1"
 description = "A Python package providing buffer compression and transformation codecs for use in data storage and communication applications."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "numcodecs-0.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:56e49f68ce6aeba29f144992524c8897d94f846d02bbcc820dd29d7c5c2a073e"},
-    {file = "numcodecs-0.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:17bc4b568214582f4c623700592f633f3afd920848630049c584fa1e535253ad"},
-    {file = "numcodecs-0.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eed420a9c62d0a569aa94a387f93045f068ad3e7bbd787c6ce70bc5fefbaa7d9"},
-    {file = "numcodecs-0.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:e7d3b9693df52eeaf978d2a56971d01cf9b4e284ae769ec764807f2087cce51d"},
-    {file = "numcodecs-0.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f208a1b8b5e66c767ed043812ca74d9045e09b7b2e085d064a585c30b9efc8e7"},
-    {file = "numcodecs-0.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a68368d3ce625ec76fcacd84785f6110d30a232909d5c6093a7aa25628880477"},
-    {file = "numcodecs-0.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5904216811f2e9d312c23ffaad3b3d4c7442a3583d3a8bf81ca8319e9f5deb5"},
-    {file = "numcodecs-0.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:208cab0f4d9cf4409e9c4a4c935e165833786614822c81dee9d865af372da9df"},
-    {file = "numcodecs-0.13.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f3cf462d2357998d7f6baaa0427657b0eeda3eb79fba2b146d2d04542912a513"},
-    {file = "numcodecs-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ac4dd5556fb126271e93bd1a02266e21b01d3617db448d70d00eec8e034506b4"},
-    {file = "numcodecs-0.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:820be89729583c91601a6b35c052008cdd2665b25bfedb91b367cc155fb34ba0"},
-    {file = "numcodecs-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:d67a859dd8a7f026829e91cb1799c26720cc9d29ee4ae0060cc7a581670abc06"},
-    {file = "numcodecs-0.13.0.tar.gz", hash = "sha256:ba4fac7036ea5a078c7afe1d4dffeb9685080d42f19c9c16b12dad866703aa2e"},
+    {file = "numcodecs-0.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b"},
+    {file = "numcodecs-0.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15"},
+    {file = "numcodecs-0.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28"},
+    {file = "numcodecs-0.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab"},
+    {file = "numcodecs-0.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666"},
+    {file = "numcodecs-0.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc"},
+    {file = "numcodecs-0.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f"},
+    {file = "numcodecs-0.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b"},
+    {file = "numcodecs-0.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917"},
+    {file = "numcodecs-0.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6"},
+    {file = "numcodecs-0.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53"},
+    {file = "numcodecs-0.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca"},
+    {file = "numcodecs-0.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43"},
+    {file = "numcodecs-0.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf"},
+    {file = "numcodecs-0.13.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701"},
+    {file = "numcodecs-0.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176"},
+    {file = "numcodecs-0.13.1.tar.gz", hash = "sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc"},
 ]
 
 [package.dependencies]
 numpy = ">=1.7"
 
 [package.extras]
-docs = ["mock", "numpydoc", "pydata-sphinx-theme", "sphinx (<7.0.0)", "sphinx-issues"]
+docs = ["mock", "numpydoc", "pydata-sphinx-theme", "sphinx", "sphinx-issues"]
 msgpack = ["msgpack"]
 pcodec = ["pcodec (>=0.2.0)"]
 test = ["coverage", "pytest", "pytest-cov"]
@@ -4106,14 +4126,14 @@ files = [
 
 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.6.68"
+version = "12.6.77"
 description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b3fd0779845f68b92063ab1393abab1ed0a23412fc520df79a8190d098b5cd6b"},
-    {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl", hash = "sha256:125a6c2a44e96386dda634e13d944e60b07a0402d391a070e8fb4104b34ea1ab"},
-    {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-win_amd64.whl", hash = "sha256:a55744c98d70317c5e23db14866a8cc2b733f7324509e941fc96276f9f37801d"},
+    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d"},
+    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142"},
+    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:410718cd44962bed862a31dd0318620f6f9a8b28a6291967bcfcb446a6516771"},
 ]
 
 [[package]]
@@ -4196,10 +4216,10 @@ files = [
 
 [package.dependencies]
 numpy = [
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
     {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
     {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
-    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 
 [[package]]
@@ -4220,10 +4240,10 @@ files = [
 
 [package.dependencies]
 numpy = [
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
     {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
     {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
-    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 
 [[package]]
@@ -4312,9 +4332,9 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.22.4", markers = "python_version < \"3.11\""},
-    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
     {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.22.4", markers = "python_version < \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -4595,13 +4615,13 @@ testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "pre-commit"
-version = "3.8.0"
+version = "4.0.1"
 description = "A framework for managing and maintaining multi-language pre-commit hooks."
 optional = true
 python-versions = ">=3.9"
 files = [
-    {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"},
-    {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"},
+    {file = "pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878"},
+    {file = "pre_commit-4.0.1.tar.gz", hash = "sha256:80905ac375958c0444c65e9cebebd948b3cdb518f335a091a670a89d652139d2"},
 ]
 
 [package.dependencies]
@@ -4627,18 +4647,125 @@ twisted = ["twisted"]
 
 [[package]]
 name = "prompt-toolkit"
-version = "3.0.47"
+version = "3.0.48"
 description = "Library for building powerful interactive command lines in Python"
 optional = false
 python-versions = ">=3.7.0"
 files = [
-    {file = "prompt_toolkit-3.0.47-py3-none-any.whl", hash = "sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10"},
-    {file = "prompt_toolkit-3.0.47.tar.gz", hash = "sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360"},
+    {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"},
+    {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"},
 ]
 
 [package.dependencies]
 wcwidth = "*"
 
+[[package]]
+name = "propcache"
+version = "0.2.0"
+description = "Accelerated property cache"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"},
+    {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"},
+    {file = "propcache-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33ac8f098df0585c0b53009f039dfd913b38c1d2edafed0cedcc0c32a05aa110"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e48e8875e6c13909c800fa344cd54cc4b2b0db1d5f911f840458a500fde2c2"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388f3217649d6d59292b722d940d4d2e1e6a7003259eb835724092a1cca0203a"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f571aea50ba5623c308aa146eb650eebf7dbe0fd8c5d946e28343cb3b5aad577"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dfafb44f7bb35c0c06eda6b2ab4bfd58f02729e7c4045e179f9a861b07c9850"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3ebe9a75be7ab0b7da2464a77bb27febcb4fab46a34f9288f39d74833db7f61"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2f0d0f976985f85dfb5f3d685697ef769faa6b71993b46b295cdbbd6be8cc37"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a3dc1a4b165283bd865e8f8cb5f0c64c05001e0718ed06250d8cac9bec115b48"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e0f07b42d2a50c7dd2d8675d50f7343d998c64008f1da5fef888396b7f84630"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e63e3e1e0271f374ed489ff5ee73d4b6e7c60710e1f76af5f0e1a6117cd26394"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:56bb5c98f058a41bb58eead194b4db8c05b088c93d94d5161728515bd52b052b"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7665f04d0c7f26ff8bb534e1c65068409bf4687aa2534faf7104d7182debb336"},
+    {file = "propcache-0.2.0-cp310-cp310-win32.whl", hash = "sha256:7cf18abf9764746b9c8704774d8b06714bcb0a63641518a3a89c7f85cc02c2ad"},
+    {file = "propcache-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cfac69017ef97db2438efb854edf24f5a29fd09a536ff3a992b75990720cdc99"},
+    {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:63f13bf09cc3336eb04a837490b8f332e0db41da66995c9fd1ba04552e516354"},
+    {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608cce1da6f2672a56b24a015b42db4ac612ee709f3d29f27a00c943d9e851de"},
+    {file = "propcache-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:466c219deee4536fbc83c08d09115249db301550625c7fef1c5563a584c9bc87"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6ed8db0a556343d566a5c124ee483ae113acc9a557a807d439bcecc44e7dfbb"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91997d9cb4a325b60d4e3f20967f8eb08dfcb32b22554d5ef78e6fd1dda743a2"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7dde9e533c0a49d802b4f3f218fa9ad0a1ce21f2c2eb80d5216565202acab4"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:97a58a28bcf63284e8b4d7b460cbee1edaab24634e82059c7b8c09e65284f178"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:945db8ee295d3af9dbdbb698cce9bbc5c59b5c3fe328bbc4387f59a8a35f998d"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39e104da444a34830751715f45ef9fc537475ba21b7f1f5b0f4d71a3b60d7fe2"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c5ecca8f9bab618340c8e848d340baf68bcd8ad90a8ecd7a4524a81c1764b3db"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c436130cc779806bdf5d5fae0d848713105472b8566b75ff70048c47d3961c5b"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:191db28dc6dcd29d1a3e063c3be0b40688ed76434622c53a284e5427565bbd9b"},
+    {file = "propcache-0.2.0-cp311-cp311-win32.whl", hash = "sha256:5f2564ec89058ee7c7989a7b719115bdfe2a2fb8e7a4543b8d1c0cc4cf6478c1"},
+    {file = "propcache-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e2e54267980349b723cff366d1e29b138b9a60fa376664a157a342689553f71"},
+    {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ee7606193fb267be4b2e3b32714f2d58cad27217638db98a60f9efb5efeccc2"},
+    {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:91ee8fc02ca52e24bcb77b234f22afc03288e1dafbb1f88fe24db308910c4ac7"},
+    {file = "propcache-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e900bad2a8456d00a113cad8c13343f3b1f327534e3589acc2219729237a2e8"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f52a68c21363c45297aca15561812d542f8fc683c85201df0bebe209e349f793"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e41d67757ff4fbc8ef2af99b338bfb955010444b92929e9e55a6d4dcc3c4f09"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a64e32f8bd94c105cc27f42d3b658902b5bcc947ece3c8fe7bc1b05982f60e89"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55346705687dbd7ef0d77883ab4f6fabc48232f587925bdaf95219bae072491e"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00181262b17e517df2cd85656fcd6b4e70946fe62cd625b9d74ac9977b64d8d9"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6994984550eaf25dd7fc7bd1b700ff45c894149341725bb4edc67f0ffa94efa4"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:56295eb1e5f3aecd516d91b00cfd8bf3a13991de5a479df9e27dd569ea23959c"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:439e76255daa0f8151d3cb325f6dd4a3e93043e6403e6491813bcaaaa8733887"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f6475a1b2ecb310c98c28d271a30df74f9dd436ee46d09236a6b750a7599ce57"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3444cdba6628accf384e349014084b1cacd866fbb88433cd9d279d90a54e0b23"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4a9d9b4d0a9b38d1c391bb4ad24aa65f306c6f01b512e10a8a34a2dc5675d348"},
+    {file = "propcache-0.2.0-cp312-cp312-win32.whl", hash = "sha256:69d3a98eebae99a420d4b28756c8ce6ea5a29291baf2dc9ff9414b42676f61d5"},
+    {file = "propcache-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad9c9b99b05f163109466638bd30ada1722abb01bbb85c739c50b6dc11f92dc3"},
+    {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecddc221a077a8132cf7c747d5352a15ed763b674c0448d811f408bf803d9ad7"},
+    {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0e53cb83fdd61cbd67202735e6a6687a7b491c8742dfc39c9e01e80354956763"},
+    {file = "propcache-0.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92fe151145a990c22cbccf9ae15cae8ae9eddabfc949a219c9f667877e40853d"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a21ef516d36909931a2967621eecb256018aeb11fc48656e3257e73e2e247a"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f88a4095e913f98988f5b338c1d4d5d07dbb0b6bad19892fd447484e483ba6b"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5b3bb545ead161be780ee85a2b54fdf7092815995661947812dde94a40f6fb"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67aeb72e0f482709991aa91345a831d0b707d16b0257e8ef88a2ad246a7280bf"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c997f8c44ec9b9b0bcbf2d422cc00a1d9b9c681f56efa6ca149a941e5560da2"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a66df3d4992bc1d725b9aa803e8c5a66c010c65c741ad901e260ece77f58d2f"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3ebbcf2a07621f29638799828b8d8668c421bfb94c6cb04269130d8de4fb7136"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1235c01ddaa80da8235741e80815ce381c5267f96cc49b1477fdcf8c047ef325"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3947483a381259c06921612550867b37d22e1df6d6d7e8361264b6d037595f44"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d5bed7f9805cc29c780f3aee05de3262ee7ce1f47083cfe9f77471e9d6777e83"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4a91d44379f45f5e540971d41e4626dacd7f01004826a18cb048e7da7e96544"},
+    {file = "propcache-0.2.0-cp313-cp313-win32.whl", hash = "sha256:f902804113e032e2cdf8c71015651c97af6418363bea8d78dc0911d56c335032"},
+    {file = "propcache-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:8f188cfcc64fb1266f4684206c9de0e80f54622c3f22a910cbd200478aeae61e"},
+    {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:53d1bd3f979ed529f0805dd35ddaca330f80a9a6d90bc0121d2ff398f8ed8861"},
+    {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:83928404adf8fb3d26793665633ea79b7361efa0287dfbd372a7e74311d51ee6"},
+    {file = "propcache-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77a86c261679ea5f3896ec060be9dc8e365788248cc1e049632a1be682442063"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218db2a3c297a3768c11a34812e63b3ac1c3234c3a086def9c0fee50d35add1f"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7735e82e3498c27bcb2d17cb65d62c14f1100b71723b68362872bca7d0913d90"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20a617c776f520c3875cf4511e0d1db847a076d720714ae35ffe0df3e440be68"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b69535c870670c9f9b14a75d28baa32221d06f6b6fa6f77a0a13c5a7b0a5b9"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4569158070180c3855e9c0791c56be3ceeb192defa2cdf6a3f39e54319e56b89"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:db47514ffdbd91ccdc7e6f8407aac4ee94cc871b15b577c1c324236b013ddd04"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:2a60ad3e2553a74168d275a0ef35e8c0a965448ffbc3b300ab3a5bb9956c2162"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:662dd62358bdeaca0aee5761de8727cfd6861432e3bb828dc2a693aa0471a563"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:25a1f88b471b3bc911d18b935ecb7115dff3a192b6fef46f0bfaf71ff4f12418"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:f60f0ac7005b9f5a6091009b09a419ace1610e163fa5deaba5ce3484341840e7"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:74acd6e291f885678631b7ebc85d2d4aec458dd849b8c841b57ef04047833bed"},
+    {file = "propcache-0.2.0-cp38-cp38-win32.whl", hash = "sha256:d9b6ddac6408194e934002a69bcaadbc88c10b5f38fb9307779d1c629181815d"},
+    {file = "propcache-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:676135dcf3262c9c5081cc8f19ad55c8a64e3f7282a21266d05544450bffc3a5"},
+    {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25c8d773a62ce0451b020c7b29a35cfbc05de8b291163a7a0f3b7904f27253e6"},
+    {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:375a12d7556d462dc64d70475a9ee5982465fbb3d2b364f16b86ba9135793638"},
+    {file = "propcache-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ec43d76b9677637a89d6ab86e1fef70d739217fefa208c65352ecf0282be957"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f45eec587dafd4b2d41ac189c2156461ebd0c1082d2fe7013571598abb8505d1"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc092ba439d91df90aea38168e11f75c655880c12782facf5cf9c00f3d42b562"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa1076244f54bb76e65e22cb6910365779d5c3d71d1f18b275f1dfc7b0d71b4d"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682a7c79a2fbf40f5dbb1eb6bfe2cd865376deeac65acf9beb607505dced9e12"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e40876731f99b6f3c897b66b803c9e1c07a989b366c6b5b475fafd1f7ba3fb8"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:363ea8cd3c5cb6679f1c2f5f1f9669587361c062e4899fce56758efa928728f8"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:140fbf08ab3588b3468932974a9331aff43c0ab8a2ec2c608b6d7d1756dbb6cb"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e70fac33e8b4ac63dfc4c956fd7d85a0b1139adcfc0d964ce288b7c527537fea"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b33d7a286c0dc1a15f5fc864cc48ae92a846df287ceac2dd499926c3801054a6"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f6d5749fdd33d90e34c2efb174c7e236829147a2713334d708746e94c4bde40d"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22aa8f2272d81d9317ff5756bb108021a056805ce63dd3630e27d042c8092798"},
+    {file = "propcache-0.2.0-cp39-cp39-win32.whl", hash = "sha256:73e4b40ea0eda421b115248d7e79b59214411109a5bc47d0d48e4c73e3b8fcf9"},
+    {file = "propcache-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:9517d5e9e0731957468c29dbfd0f976736a0e55afaea843726e887f36fe017df"},
+    {file = "propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036"},
+    {file = "propcache-0.2.0.tar.gz", hash = "sha256:df81779732feb9d01e5d513fad0122efb3d53bbc75f61b2a4f29a020bc985e70"},
+]
+
 [[package]]
 name = "protobuf"
 version = "5.28.2"
@@ -4789,37 +4916,37 @@ test = ["numpy"]
 
 [[package]]
 name = "pyav"
-version = "13.0.0"
+version = "13.1.0"
 description = "Pythonic bindings for FFmpeg's libraries."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "pyav-13.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:9d423966eb46be1cc39d2e73bcdf6ddbf460e84ed397e2ed70a71f02e5a46459"},
-    {file = "pyav-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d06cf4871087460b103c425c9067906bbf42526dac1cf0d6c2f2fb8c287534d4"},
-    {file = "pyav-13.0.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:284cae0c940a3b0f7ae27f965cdc19107bef9d8058dcb6119a028d5fc4f2a370"},
-    {file = "pyav-13.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09678c0c2e287d7d95b7a562d05893b49fbc2e25af2414de710c159475e5ffe9"},
-    {file = "pyav-13.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:4a499eb490ada91e5cdfd77daa2928b24124561e46ad731585ff11ecf9bac8b5"},
-    {file = "pyav-13.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:24bfcc144ffdb895c88075fc518f52ead7cddfc9a3917ff09a2e20bf3db1107a"},
-    {file = "pyav-13.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:99dbf1357db5c33596aeb8c06cb2fd55ad36fd8463bfd5697168ba546bc3d829"},
-    {file = "pyav-13.0.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f95cb104d3e59d1304462d41f7e4ded29f680eed84b01a991b297f915f4363d1"},
-    {file = "pyav-13.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f18db877859a9243a8a128ae130180fd3fd98ed10e9ab6e047b77ac2d5e3155"},
-    {file = "pyav-13.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:80f848385f62faf80b08657f2b864be3b6bbb4dde9247dce38eb63f36bcc5961"},
-    {file = "pyav-13.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:99e5b8d9391a94816156114ee657a1ceb7b6466f5ac75748bcb46f64a5053f48"},
-    {file = "pyav-13.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f860e37745695f35c48b4339fd439b783ebd7da31387bf09b610e12a18c5b60a"},
-    {file = "pyav-13.0.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:618e434d6ac8595e4c8ae35ccf82d56a405b95a51fd153a430c4d89c50c2711d"},
-    {file = "pyav-13.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89d80bbbf1fea0ac05cf184543b33622fca535a7aa70b6a19657bc4c0a28609c"},
-    {file = "pyav-13.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:872c03ad737b86466d5fe38b3b320ccf2210b23a2311bb2b8fa1b7f88f39f0eb"},
-    {file = "pyav-13.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:488dc9ac3cb79a8bb7e873a2eb0ba2570689feba352da80fa9e8fd71f6f72b61"},
-    {file = "pyav-13.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0ec841af409bd15fb84e7e849636ee770a7220b190796f5a9990e168f768f485"},
-    {file = "pyav-13.0.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:361b6a148226c179035f34d922c8aa5ef9215c085eb3a4625f6f3ca022590250"},
-    {file = "pyav-13.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d264bf6dd5f38bd05c28d37bcdb6569f166651155a4c3776b7d14a220e7deb8a"},
-    {file = "pyav-13.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:14bcafcc01df4e1925fd49afb97bcde25fd348c1ee84403fede3d614d7dcd635"},
-    {file = "pyav-13.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b549c980016460c0f08de18a13969fcdc8d69a36a9f0c86a238cb45503944308"},
-    {file = "pyav-13.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:dd8638d438f4813ad45a4a1cc918ad2f035b4e69be396c61a12c4d4b8d1a5d42"},
-    {file = "pyav-13.0.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f922af5d77d8fab6a8846dd56c95a5ffb7fd9cd4aba19a8c3b0bf84af7a6410"},
-    {file = "pyav-13.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e53b5dff8dbfc50c494c82fe029498bdb1089417358713199c520d8e850dacf4"},
-    {file = "pyav-13.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:84f0af351a1d51b001fea4fb457dc905e58e6f3bc6389baf8a64ac9a47ca99ee"},
-    {file = "pyav-13.0.0.tar.gz", hash = "sha256:4ec8ab1eb8dcf5447e6bf5890be116849570d53372b74ab485457694e8b61762"},
+    {file = "pyav-13.1.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:64a81022e60dfba7dee9767a6fd150f42293855ea127979b2f38a3fd86f908fd"},
+    {file = "pyav-13.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3971089334cc91e331c5014c8ea5fcbca0ccc82eb14952c128ce50570010a3cf"},
+    {file = "pyav-13.1.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:154394ba47b4b55d4abda3e66e2b0a79e7b046c983191cb6113ea14769eea53a"},
+    {file = "pyav-13.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b48efcde03b9952ece3c8a8d9d74c685ff84ab91b60ea0ae6960638e30f3f31"},
+    {file = "pyav-13.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:8404d5a5eef975862a35f2338ab8e7ae5d7a7f9af1ac748edef2aca4543f44cd"},
+    {file = "pyav-13.1.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:a75d67dc80ea87f3987fafa5699410047af818b20691046c76d12e18faf3da68"},
+    {file = "pyav-13.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4571175c8511d36128e94955b8cc64b0452e16da42c81ceae745946f88abf477"},
+    {file = "pyav-13.1.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7e1bd1157b21ca116c71696be62cd12bcaefc32179fd99efad90e0a76d300d3"},
+    {file = "pyav-13.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:126386f2f8a0c57487a3ad947ac573385d41326b5ff111783135cc56a8869261"},
+    {file = "pyav-13.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:57d94282ffd445ab055c36e150fee1a4a066e0aee259260c82792dbd349ec08d"},
+    {file = "pyav-13.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b2daf24cbc01ee666c4893e69aac8fc65bab598ea0029382857930f652a5e5ff"},
+    {file = "pyav-13.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83e3a67e2038b8cfd1d5dd2d1a1756ac1143a4c223b1723e64ac8bdb2045fb6a"},
+    {file = "pyav-13.1.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24de515886366b2c952e3827e7fb6466ad06f40b5cb34595a3f922899727be2b"},
+    {file = "pyav-13.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66befb4172facfaaf7f3be94b1659051378b0741f087d5b46d2a25b6bce34b4f"},
+    {file = "pyav-13.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a65d060fceee59e5a1dd70e64bf6ffca55fff2b596af906b206d8ba0057bbdc5"},
+    {file = "pyav-13.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8450899845220a2a4f3ecc3eba0d5f864c169d98a9892be75447e59480162a09"},
+    {file = "pyav-13.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6b21df5daadbb019c4612cc89923202ad7a4dd259be905eba56887a14a344861"},
+    {file = "pyav-13.1.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:21668b5ea9c4f046f61193a555d3deb2ca633b2ffb27a22a3b0eb03e8da64992"},
+    {file = "pyav-13.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ae2413955b7d76826d214d3a5b719714f352de7de318e45275811fa07b9efe3"},
+    {file = "pyav-13.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a3ba8764bbf958e6c94b0dc7b07f670b4a759a157547a69cddc58eabba8aea1d"},
+    {file = "pyav-13.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c92ef209e12660c6a75f81c9d228adc1e07294b875bf91d9b2a58c44a728b2d3"},
+    {file = "pyav-13.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:2e1855824313c17367c5ba658cf99d8b3169e0c3e0bdef5aa87a4c472c46d72b"},
+    {file = "pyav-13.1.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c7a2eb79af1d3414509e31631a1b837b011eba4a21e311ae1308eca95a9f4db"},
+    {file = "pyav-13.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69566d6b5438259e2e4adc2975591d513b7f1280fbf4ed3e0901be10a4567470"},
+    {file = "pyav-13.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2960397dd97d7462effe8e6696557a91f24c2841edf391b0355734db8e4b02cd"},
+    {file = "pyav-13.1.0.tar.gz", hash = "sha256:7049f4df6f94b4b727c1339a094f29c4178f3e0c290a01b9fcf0190a9890704c"},
 ]
 
 [[package]]
@@ -4853,79 +4980,82 @@ files = [
 
 [[package]]
 name = "pygame"
-version = "2.6.0"
+version = "2.6.1"
 description = "Python Game Development"
 optional = true
 python-versions = ">=3.6"
 files = [
-    {file = "pygame-2.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e5707aa9d029752495b3eddc1edff62e0e390a02f699b0f1ce77fe0b8c70ea4f"},
-    {file = "pygame-2.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d3ed0547368733b854c0d9981c982a3cdfabfa01b477d095c57bf47f2199da44"},
-    {file = "pygame-2.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6050f3e95f1f16602153d616b52619c6a2041cee7040eb529f65689e9633fc3e"},
-    {file = "pygame-2.6.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:89be55b7e9e22e0eea08af9d6cfb97aed5da780f0b3a035803437d481a16d972"},
-    {file = "pygame-2.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d65fb222eea1294cfc8206d9e5754d476a1673eb2783c03c4f70e0455320274"},
-    {file = "pygame-2.6.0-cp310-cp310-win32.whl", hash = "sha256:71eebb9803cb350298de188fb7cdd3ebf13299f78d59a71c7e81efc649aae348"},
-    {file = "pygame-2.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:1551852a2cd5b4139a752888f6cbeeb4a96fc0fe6e6f3f8b9d9784eb8fceab13"},
-    {file = "pygame-2.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6e5e6c010b1bf429388acf4d41d7ab2f7ad8fbf241d0db822102d35c9a2eb84"},
-    {file = "pygame-2.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:99902f4a2f6a338057200d99b5120a600c27a9f629ca012a9b0087c045508d08"},
-    {file = "pygame-2.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a284664978a1989c1e31a0888b2f70cfbcbafdfa3bb310e750b0d3366416225"},
-    {file = "pygame-2.6.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:829623cee298b3dbaa1dd9f52c3051ae82f04cad7708c8c67cb9a1a4b8fd3c0b"},
-    {file = "pygame-2.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6acf7949ed764487d51123f4f3606e8f76b0df167fef12ef73ef423c35fdea39"},
-    {file = "pygame-2.6.0-cp311-cp311-win32.whl", hash = "sha256:3f809560c99bd1fb4716610eca0cd36412528f03da1a63841a347b71d0c604ee"},
-    {file = "pygame-2.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:6897ab87f9193510a774a3483e00debfe166f340ca159f544ef99807e2a44ec4"},
-    {file = "pygame-2.6.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b834711ebc8b9d0c2a5f9bfae4403dd277b2c61bcb689e1aa630d01a1ebcf40a"},
-    {file = "pygame-2.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b5ac288655e8a31a303cc286e79cc57979ed2ba19c3a14042d4b6391c1d3bed2"},
-    {file = "pygame-2.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d666667b7826b0a7921b8ce0a282ba5281dfa106976c1a3b24e32a0af65ad3b1"},
-    {file = "pygame-2.6.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd8848a37a7cee37854c7efb8d451334477c9f8ce7ac339c079e724dc1334a76"},
-    {file = "pygame-2.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:315e7b3c1c573984f549ac5da9778ac4709b3b4e3a4061050d94eab63fa4fe31"},
-    {file = "pygame-2.6.0-cp312-cp312-win32.whl", hash = "sha256:e44bde0840cc21a91c9d368846ac538d106cf0668be1a6030f48df139609d1e8"},
-    {file = "pygame-2.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:1c429824b1f881a7a5ce3b5c2014d3d182aa45a22cea33c8347a3971a5446907"},
-    {file = "pygame-2.6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b832200bd8b6fc485e087bf3ef7ec1a21437258536413a5386088f5dcd3a9870"},
-    {file = "pygame-2.6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:098029d01a46ea4e30620dfb7c28a577070b456c8fc96350dde05f85c0bf51b5"},
-    {file = "pygame-2.6.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a858bbdeac5ec473ec9e726c55fb8fbdc2f4aad7c55110e899883738071c7c9b"},
-    {file = "pygame-2.6.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f908762941fd99e1f66d1211d26383184f6045c45673443138b214bf48a89aa"},
-    {file = "pygame-2.6.0-cp36-cp36m-win32.whl", hash = "sha256:4a63daee99d050f47d6ec7fa7dbd1c6597b8f082cdd58b6918d382d2bc31262d"},
-    {file = "pygame-2.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:ace471b3849d68968e5427fc01166ef5afaf552a5c442fc2c28d3b7226786f55"},
-    {file = "pygame-2.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fea019713d0c89dfd5909225aa933010100035d1cd30e6c936e8b6f00529fb80"},
-    {file = "pygame-2.6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:249dbf2d51d9f0266009a380ccf0532e1a57614a1528bb2f89a802b01d61f93e"},
-    {file = "pygame-2.6.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb51533ee3204e8160600b0de34eaad70eb913a182c94a7777b6051e8fc52f1"},
-    {file = "pygame-2.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f637636a44712e94e5601ec69160a080214626471983dfb0b5b68aa0c61563d"},
-    {file = "pygame-2.6.0-cp37-cp37m-win32.whl", hash = "sha256:e432156b6f346f4cc6cab03ce9657600093390f4c9b10bf458716b25beebfe33"},
-    {file = "pygame-2.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a0194652db7874bdde7dfc69d659ca954544c012e04ae527151325bfb970f423"},
-    {file = "pygame-2.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eae3ee62cc172e268121d5bd9dc406a67094d33517de3a91de3323d6ae23eb02"},
-    {file = "pygame-2.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f6a58b0a5a8740a3c2cf6fc5366888bd4514561253437f093c12a9ab4fb3ecae"},
-    {file = "pygame-2.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c71da36997dc7b9b4ee973fa3a5d4a6cfb2149161b5b1c08b712d2f13a63ccfe"},
-    {file = "pygame-2.6.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b86771801a7fc10d9a62218f27f1d5c13341c3a27394aa25578443a9cd199830"},
-    {file = "pygame-2.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4928f3acf5a9ce5fbab384c21f1245304535ffd5fb167ae92a6b4d3cdb55a3b6"},
-    {file = "pygame-2.6.0-cp38-cp38-win32.whl", hash = "sha256:4faab2df9926c4d31215986536b112f0d76f711cf02f395805f1ff5df8fd55fc"},
-    {file = "pygame-2.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:afbb8d97aed93dfb116fe105603dacb68f8dab05b978a40a9e4ab1b6c1f683fd"},
-    {file = "pygame-2.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d11f3646b53819892f4a731e80b8589a9140343d0d4b86b826802191b241228c"},
-    {file = "pygame-2.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5ef92ed93c354eabff4b85e457d4d6980115004ec7ff52a19fd38b929c3b80fb"},
-    {file = "pygame-2.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bc1795f2e36302882546faacd5a0191463c4f4ae2b90e7c334a7733aa4190d2"},
-    {file = "pygame-2.6.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e92294fcc85c4955fe5bc6a0404e4cc870808005dc8f359e881544e3cc214108"},
-    {file = "pygame-2.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0cb7bdf3ee0233a3ac02ef777c01dfe315e6d4670f1312c83b91c1ef124359a"},
-    {file = "pygame-2.6.0-cp39-cp39-win32.whl", hash = "sha256:ac906478ae489bb837bf6d2ae1eb9261d658aa2c34fa5b283027a04149bda81a"},
-    {file = "pygame-2.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:92cf12a9722f6f0bdc5520d8925a8f085cff9c054a2ea462fc409cba3781be27"},
-    {file = "pygame-2.6.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:a6636f452fdaddf604a060849feb84c056930b6a3c036214f607741f16aac942"},
-    {file = "pygame-2.6.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dc242dc15d067d10f25c5b12a1da48ca9436d8e2d72353eaf757e83612fba2f"},
-    {file = "pygame-2.6.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f82df23598a281c8c342d3c90be213c8fe762a26c15815511f60d0aac6e03a70"},
-    {file = "pygame-2.6.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2ed2539bb6bd211fc570b1169dc4a64a74ec5cd95741e62a0ab46bd18fe08e0d"},
-    {file = "pygame-2.6.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:904aaf29710c6b03a7e1a65b198f5467ed6525e8e60bdcc5e90ff8584c1d54ea"},
-    {file = "pygame-2.6.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcd28f96f0fffd28e71a98773843074597e10d7f55a098e2e5bcb2bef1bdcbf5"},
-    {file = "pygame-2.6.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4fad1ab33443ecd4f958dbbb67fc09fcdc7a37e26c34054e3296fb7e26ad641e"},
-    {file = "pygame-2.6.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e909186d4d512add39b662904f0f79b73028fbfc4fbfdaf6f9412aed4e500e9c"},
-    {file = "pygame-2.6.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79abcbf6d12fce51a955a0652ccd50b6d0a355baa27799535eaf21efb43433dd"},
-    {file = "pygame-2.6.0.tar.gz", hash = "sha256:722d33ae676aa8533c1f955eded966411298831346b8d51a77dad22e46ba3e35"},
+    {file = "pygame-2.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9beeb647e555afb5657111fa83acb74b99ad88761108eaea66472e8b8547b55b"},
+    {file = "pygame-2.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:10e3d2a55f001f6c0a6eb44aa79ea7607091c9352b946692acedb2ac1482f1c9"},
+    {file = "pygame-2.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:816e85000c5d8b02a42b9834f761a5925ef3377d2924e3a7c4c143d2990ce5b8"},
+    {file = "pygame-2.6.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a78fd030d98faab4a8e27878536fdff7518d3e062a72761c552f624ebba5a5f"},
+    {file = "pygame-2.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da3ad64d685f84a34ebe5daacb39fff14f1251acb34c098d760d63fee768f50c"},
+    {file = "pygame-2.6.1-cp310-cp310-win32.whl", hash = "sha256:9dd5c054d4bd875a8caf978b82672f02bec332f52a833a76899220c460bb4b58"},
+    {file = "pygame-2.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:00827aba089355925902d533f9c41e79a799641f03746c50a374dc5c3362e43d"},
+    {file = "pygame-2.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:20349195326a5e82a16e351ed93465a7845a7e2a9af55b7bc1b2110ea3e344e1"},
+    {file = "pygame-2.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f3935459109da4bb0b3901da9904f0a3e52028a3332a355d298b1673a334cf21"},
+    {file = "pygame-2.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c31dbdb5d0217f32764797d21c2752e258e5fb7e895326538d82b5f75a0cd856"},
+    {file = "pygame-2.6.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:173badf82fa198e6888017bea40f511cb28e69ecdd5a72b214e81e4dcd66c3b1"},
+    {file = "pygame-2.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce8cc108b92de9b149b344ad2e25eedbe773af0dc41dfb24d1f07f679b558c60"},
+    {file = "pygame-2.6.1-cp311-cp311-win32.whl", hash = "sha256:811e7b925146d8149d79193652cbb83e0eca0aae66476b1cb310f0f4226b8b5c"},
+    {file = "pygame-2.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:91476902426facd4bb0dad4dc3b2573bc82c95c71b135e0daaea072ed528d299"},
+    {file = "pygame-2.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4ee7f2771f588c966fa2fa8b829be26698c9b4836f82ede5e4edc1a68594942e"},
+    {file = "pygame-2.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c8040ea2ab18c6b255af706ec01355c8a6b08dc48d77fd4ee783f8fc46a843bf"},
+    {file = "pygame-2.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c47a6938de93fa610accd4969e638c2aebcb29b2fca518a84c3a39d91ab47116"},
+    {file = "pygame-2.6.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33006f784e1c7d7e466fcb61d5489da59cc5f7eb098712f792a225df1d4e229d"},
+    {file = "pygame-2.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1206125f14cae22c44565c9d333607f1d9f59487b1f1432945dfc809aeaa3e88"},
+    {file = "pygame-2.6.1-cp312-cp312-win32.whl", hash = "sha256:84fc4054e25262140d09d39e094f6880d730199710829902f0d8ceae0213379e"},
+    {file = "pygame-2.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:3a9e7396be0d9633831c3f8d5d82dd63ba373ad65599628294b7a4f8a5a01a65"},
+    {file = "pygame-2.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae6039f3a55d800db80e8010f387557b528d34d534435e0871326804df2a62f2"},
+    {file = "pygame-2.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2a3a1288e2e9b1e5834e425bedd5ba01a3cd4902b5c2bff8ed4a740ccfe98171"},
+    {file = "pygame-2.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27eb17e3dc9640e4b4683074f1890e2e879827447770470c2aba9f125f74510b"},
+    {file = "pygame-2.6.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c1623180e70a03c4a734deb9bac50fc9c82942ae84a3a220779062128e75f3b"},
+    {file = "pygame-2.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef07c0103d79492c21fced9ad68c11c32efa6801ca1920ebfd0f15fb46c78b1c"},
+    {file = "pygame-2.6.1-cp313-cp313-win32.whl", hash = "sha256:3acd8c009317190c2bfd81db681ecef47d5eb108c2151d09596d9c7ea9df5c0e"},
+    {file = "pygame-2.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:813af4fba5d0b2cb8e58f5d95f7910295c34067dcc290d34f1be59c48bd1ea6a"},
+    {file = "pygame-2.6.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:56ffca6059b165bbf64f4b4be23b8068f6a0e220780e4f96ec0bb5ac3c63ec39"},
+    {file = "pygame-2.6.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bede70ec708057e305815d6546012669226d1d80566785feca9b044216062e7"},
+    {file = "pygame-2.6.1-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f84f15d146d6aa93254008a626c56ef96fed276006202881a47b29757f0cd65a"},
+    {file = "pygame-2.6.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14f9dda45469b254c0f15edaaeaa85d2cc072ff6a83584a265f5d684c7f7efd8"},
+    {file = "pygame-2.6.1-cp36-cp36m-win32.whl", hash = "sha256:28b43190436037e428a5be28fc80cf6615304fd528009f2c688cc828f4ff104b"},
+    {file = "pygame-2.6.1-cp36-cp36m-win_amd64.whl", hash = "sha256:a4b8f04fceddd9a3ac30778d11f0254f59efcd1c382d5801271113cea8b4f2f3"},
+    {file = "pygame-2.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a620883d589926f157b8f1d1f543183ac52e5c30507dea445e3927ae0bee1c54"},
+    {file = "pygame-2.6.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b46e68cd168f44d0224c670bb72186688fc692d7079715f79d04096757d703d0"},
+    {file = "pygame-2.6.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c0b11356ac96261162d54a2c2b41a41978f00525631b01ec9c4fe26b01c66595"},
+    {file = "pygame-2.6.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:325a84d072d52e3c2921eff02f87c6a74b7e77d71db3bdf53801c6c975f1b6c4"},
+    {file = "pygame-2.6.1-cp37-cp37m-win32.whl", hash = "sha256:2a615d78b2364e86f541458ff41c2a46181b9a1e9eabd97b389282fdf04efbb3"},
+    {file = "pygame-2.6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:94afd1177680d92f9214c54966ad3517d18210c4fbc5d84a0192d218e93647e0"},
+    {file = "pygame-2.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97ac4e13847b6b293ecaffa5ffce9886c98d09c03309406931cc592f0cea6366"},
+    {file = "pygame-2.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d1a7f2b66ac2e4c9583b6d4c6d6f346fb10a3392c04163f537061f86a448ed5c"},
+    {file = "pygame-2.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac3f033d2be4a9e23660a96afe2986df3a6916227538a6a0061bc218c5088507"},
+    {file = "pygame-2.6.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1bf7ab5311bbced70320f1a56701650b4c18231343ae5af42111eea91e0949a"},
+    {file = "pygame-2.6.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21160d9093533eb831f1b708e630706e5ac16b30750571ec27bc3b8364814f38"},
+    {file = "pygame-2.6.1-cp38-cp38-win32.whl", hash = "sha256:7bffdd3eaf394d9645331d1c3a5df9d782ebcc3c5a78f3b657c7879a828dd111"},
+    {file = "pygame-2.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:818b4eaec9c4acb6ac64805d4ca8edd4062bebca77bd815c18739fe2842c97e9"},
+    {file = "pygame-2.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15efaa11a80a65dd589a95bebe812fa5bfc7e14946b638a424c5bd9ac6cca1a4"},
+    {file = "pygame-2.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:481cfe1bdbb7fe00acc5950c494c26f00240888619bdc396fc8c39a734797432"},
+    {file = "pygame-2.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d09fd950725d187aa5207c0cb8eb9ab0d2f8ce9ab8d189c30eeb470e71b617e"},
+    {file = "pygame-2.6.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:163e66de169bd5670c86e27d0b74aad0d2d745e3b63cf4e7eb5b2bff1231ca8d"},
+    {file = "pygame-2.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb6e8d0547f30ddc845f4fd1e33070ef548233ad0dbf21f7ecea768883d1bbdc"},
+    {file = "pygame-2.6.1-cp39-cp39-win32.whl", hash = "sha256:d29eb9a93f12aa3d997b6e3c447ac85b2a4b142ab2548441523a8fcf5e216042"},
+    {file = "pygame-2.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:6582aa71a681e02e55d43150a9ab41394e6bf4d783d2962a10aea58f424be060"},
+    {file = "pygame-2.6.1-pp36-pypy36_pp73-win32.whl", hash = "sha256:4a8ea113b1bf627322a025a1a5a87e3818a7f55ab3a4077ff1ae5c8c60576614"},
+    {file = "pygame-2.6.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b7f9f8e6f76de36f4725175d686601214af362a4f30614b4dae2240198e72e6f"},
+    {file = "pygame-2.6.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bbb7167c92103a2091366e9af26d4914ba3776666e8677d3c93551353fffa626"},
+    {file = "pygame-2.6.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:17498a2b043bc0e795faedef1b081199c688890200aef34991c1941caa2d2c89"},
+    {file = "pygame-2.6.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7103c60939bbc1e05cfc7ba3f1d2ad3bbf103b7828b82a7166a9ab6f51950146"},
+    {file = "pygame-2.6.1.tar.gz", hash = "sha256:56fb02ead529cee00d415c3e007f75e0780c655909aaa8e8bf616ee09c9feb1f"},
 ]
 
 [[package]]
 name = "pyglet"
-version = "2.0.17"
+version = "2.0.18"
 description = "pyglet is a cross-platform games and multimedia package."
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "pyglet-2.0.17-py3-none-any.whl", hash = "sha256:c881615a5bf14455af36a0915fd9dad0069da904ab5e0ec19b4d6cdfcf1e84c2"},
-    {file = "pyglet-2.0.17.tar.gz", hash = "sha256:50c533c1a7cafdccccf43041338ad921ae26866e9871b4f12bf608500632900a"},
+    {file = "pyglet-2.0.18-py3-none-any.whl", hash = "sha256:e592952ae0297e456c587b6486ed8c3e5f9d0c3519d517bb92dde5fdf4c26b41"},
+    {file = "pyglet-2.0.18.tar.gz", hash = "sha256:7cf9238d70082a2da282759679f8a011cc979753a32224a8ead8ed80e48f99dc"},
 ]
 
 [[package]]
@@ -5206,7 +5336,7 @@ trimesh = "*"
 
 [package.extras]
 dev = ["flake8", "pre-commit", "pytest", "pytest-cov", "tox"]
-docs = ["sphinx", "sphinx-automodapi", "sphinx_rtd_theme"]
+docs = ["sphinx", "sphinx-automodapi", "sphinx-rtd-theme"]
 
 [package.source]
 type = "git"
@@ -5804,18 +5934,19 @@ files = [
 
 [[package]]
 name = "rich"
-version = "13.8.1"
+version = "13.9.2"
 description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
 optional = true
-python-versions = ">=3.7.0"
+python-versions = ">=3.8.0"
 files = [
-    {file = "rich-13.8.1-py3-none-any.whl", hash = "sha256:1760a3c0848469b97b558fc61c85233e3dafb69c7a071b4d60c38099d3cd4c06"},
-    {file = "rich-13.8.1.tar.gz", hash = "sha256:8260cda28e3db6bf04d2d1ef4dbc03ba80a824c88b0e7668a0f23126a424844a"},
+    {file = "rich-13.9.2-py3-none-any.whl", hash = "sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1"},
+    {file = "rich-13.9.2.tar.gz", hash = "sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c"},
 ]
 
 [package.dependencies]
 markdown-it-py = ">=2.2.0"
 pygments = ">=2.13.0,<3.0.0"
+typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.11\""}
 
 [package.extras]
 jupyter = ["ipywidgets (>=7.5.1,<9)"]
@@ -6193,13 +6324,13 @@ win32 = ["pywin32"]
 
 [[package]]
 name = "sentry-sdk"
-version = "2.14.0"
+version = "2.16.0"
 description = "Python client for Sentry (https://sentry.io)"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "sentry_sdk-2.14.0-py2.py3-none-any.whl", hash = "sha256:b8bc3dc51d06590df1291b7519b85c75e2ced4f28d9ea655b6d54033503b5bf4"},
-    {file = "sentry_sdk-2.14.0.tar.gz", hash = "sha256:1e0e2eaf6dad918c7d1e0edac868a7bf20017b177f242cefe2a6bcd47955961d"},
+    {file = "sentry_sdk-2.16.0-py2.py3-none-any.whl", hash = "sha256:49139c31ebcd398f4f6396b18910610a0c1602f6e67083240c33019d1f6aa30c"},
+    {file = "sentry_sdk-2.16.0.tar.gz", hash = "sha256:90f733b32e15dfc1999e6b7aca67a38688a567329de4d6e184154a73f96c6892"},
 ]
 
 [package.dependencies]
@@ -6222,6 +6353,7 @@ falcon = ["falcon (>=1.4)"]
 fastapi = ["fastapi (>=0.79.0)"]
 flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"]
 grpcio = ["grpcio (>=1.21.1)", "protobuf (>=3.8.0)"]
+http2 = ["httpcore[http2] (==1.*)"]
 httpx = ["httpx (>=0.16.0)"]
 huey = ["huey (>=2)"]
 huggingface-hub = ["huggingface-hub (>=0.22)"]
@@ -6364,13 +6496,13 @@ type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11
 
 [[package]]
 name = "sh"
-version = "2.0.7"
+version = "2.1.0"
 description = "Python subprocess replacement"
 optional = true
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "sh-2.0.7-py3-none-any.whl", hash = "sha256:2f2f79a65abd00696cf2e9ad26508cf8abb6dba5745f40255f1c0ded2876926d"},
-    {file = "sh-2.0.7.tar.gz", hash = "sha256:029d45198902bfb967391eccfd13a88d92f7cebd200411e93f99ebacc6afbb35"},
+    {file = "sh-2.1.0-py3-none-any.whl", hash = "sha256:bf5e44178dd96a542126c2774e9b7ab1d89bfe0e2ef84d92e6d0ed7358d63d01"},
+    {file = "sh-2.1.0.tar.gz", hash = "sha256:7e27301c574bec8ca5bf6f211851357526455ee97cd27a7c4c6cc5e2375399cb"},
 ]
 
 [[package]]
@@ -6587,13 +6719,13 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"]
 
 [[package]]
 name = "termcolor"
-version = "2.4.0"
+version = "2.5.0"
 description = "ANSI color formatting for output in terminal"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "termcolor-2.4.0-py3-none-any.whl", hash = "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63"},
-    {file = "termcolor-2.4.0.tar.gz", hash = "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a"},
+    {file = "termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8"},
+    {file = "termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f"},
 ]
 
 [package.extras]
@@ -6662,13 +6794,13 @@ test = ["pytest", "ruff"]
 
 [[package]]
 name = "tomli"
-version = "2.0.1"
+version = "2.0.2"
 description = "A lil' TOML parser"
 optional = true
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
-    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+    {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"},
+    {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"},
 ]
 
 [[package]]
@@ -6877,13 +7009,13 @@ tutorials = ["matplotlib", "pandas", "tabulate"]
 
 [[package]]
 name = "types-python-dateutil"
-version = "2.9.0.20240906"
+version = "2.9.0.20241003"
 description = "Typing stubs for python-dateutil"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "types-python-dateutil-2.9.0.20240906.tar.gz", hash = "sha256:9706c3b68284c25adffc47319ecc7947e5bb86b3773f843c73906fd598bc176e"},
-    {file = "types_python_dateutil-2.9.0.20240906-py3-none-any.whl", hash = "sha256:27c8cc2d058ccb14946eebcaaa503088f4f6dbc4fb6093d3d456a49aef2753f6"},
+    {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"},
+    {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"},
 ]
 
 [[package]]
@@ -6981,13 +7113,13 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "virtualenv"
-version = "20.26.5"
+version = "20.26.6"
 description = "Virtual Python Environment builder"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "virtualenv-20.26.5-py3-none-any.whl", hash = "sha256:4f3ac17b81fba3ce3bd6f4ead2749a72da5929c01774948e243db9ba41df4ff6"},
-    {file = "virtualenv-20.26.5.tar.gz", hash = "sha256:ce489cac131aa58f4b25e321d6d186171f78e6cb13fafbf32a840cee67733ff4"},
+    {file = "virtualenv-20.26.6-py3-none-any.whl", hash = "sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2"},
+    {file = "virtualenv-20.26.6.tar.gz", hash = "sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48"},
 ]
 
 [package.dependencies]
@@ -7001,19 +7133,21 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess
 
 [[package]]
 name = "wandb"
-version = "0.18.1"
+version = "0.18.3"
 description = "A CLI and library for interacting with the Weights & Biases API."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "wandb-0.18.1-py3-none-any.whl", hash = "sha256:be936a193eeb940ce03d966f013b847562497e76256852d5fb170cdcdf50f185"},
-    {file = "wandb-0.18.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1f143b814b0fd51b5f1a676ad8b66bd06a5ee4ad22fc46bcbf24048d76c77d35"},
-    {file = "wandb-0.18.1-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:86b73a9f94f18b07f0e937ae945560244b560b57c16a9dfb8f03e2516d0cc666"},
-    {file = "wandb-0.18.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc404682ebfb2477b48cb436a331e1bea0262e002d6fb3ccafe71d13657dd4ee"},
-    {file = "wandb-0.18.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4c97d69242efd604c1a2077c8b56341e236cfaca78c40f59dcef9b95464fdc"},
-    {file = "wandb-0.18.1-py3-none-win32.whl", hash = "sha256:33c5a0d74bc28879917b519f24d69b0e81530d72e99aba1c115189a2c9aac9cf"},
-    {file = "wandb-0.18.1-py3-none-win_amd64.whl", hash = "sha256:559cbd6e9ab752622f7d6dacdc334ede7f1bc34f42df3f48ed32bde55db42c6e"},
-    {file = "wandb-0.18.1.tar.gz", hash = "sha256:d625e94d53ff4ff961c58a9a17f0a1ea35720d98b9db710a458235924469fc6b"},
+    {file = "wandb-0.18.3-py3-none-any.whl", hash = "sha256:7da64f7da0ff7572439de10bfd45534e8811e71e78ac2ccc3b818f1c0f3a9aef"},
+    {file = "wandb-0.18.3-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:6674d8a5c40c79065b9c7eb765136756d5ebc9457a5f9abc820a660fb23f8b67"},
+    {file = "wandb-0.18.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:741f566e409a2684d3047e4cc25e8e914d78196b901190937b24b6abb8b052e5"},
+    {file = "wandb-0.18.3-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:8be5e877570b693001c52dcc2089e48e6a4dcbf15f3adf5c9349f95148b59d58"},
+    {file = "wandb-0.18.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d788852bd4739fa18de3918f309c3a955b5cef3247fae1c40df3a63af637e1a0"},
+    {file = "wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab81424eb207d78239a8d69c90521a70074fb81e3709055484e43c76fe44dc08"},
+    {file = "wandb-0.18.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2c91315b8b62423eae18577d66a4b4bb8e4341a7d5c849cb2963e3b3dff0bf6d"},
+    {file = "wandb-0.18.3-py3-none-win32.whl", hash = "sha256:92a647dab783938ec87776a9fae8a13e72e6dad939c53e357cdea9d2570f0ad8"},
+    {file = "wandb-0.18.3-py3-none-win_amd64.whl", hash = "sha256:29cac2cfa3124241fed22cfedc9a52e1500275ee9bbb0b428ce4bf63c4723bf0"},
+    {file = "wandb-0.18.3.tar.gz", hash = "sha256:eb2574cea72bc908c6ce1b37edf7a889619e6e06e1b4714eecfe0662ded43c06"},
 ]
 
 [package.dependencies]
@@ -7036,7 +7170,7 @@ gcp = ["google-cloud-storage"]
 importers = ["filelock", "mlflow", "polars (<=1.2.1)", "rich", "tenacity"]
 kubeflow = ["google-cloud-storage", "kubernetes", "minio", "sh"]
 launch = ["awscli", "azure-containerregistry", "azure-identity", "azure-storage-blob", "boto3", "botocore", "chardet", "google-auth", "google-cloud-aiplatform", "google-cloud-artifact-registry", "google-cloud-compute", "google-cloud-storage", "iso8601", "jsonschema", "kubernetes", "kubernetes-asyncio", "nbconvert", "nbformat", "optuna", "pydantic", "pyyaml (>=6.0.0)", "tomli", "typing-extensions"]
-media = ["bokeh", "moviepy", "numpy", "pillow", "plotly (>=5.18.0)", "rdkit-pypi", "soundfile"]
+media = ["bokeh", "imageio", "moviepy", "numpy", "pillow", "plotly (>=5.18.0)", "rdkit", "soundfile"]
 models = ["cloudpickle"]
 perf = ["orjson"]
 sweeps = ["sweeps (>=0.2.0)"]
@@ -7125,13 +7259,13 @@ files = [
 
 [[package]]
 name = "xmltodict"
-version = "0.13.0"
+version = "0.14.1"
 description = "Makes working with XML feel like you are working with JSON"
 optional = true
-python-versions = ">=3.4"
+python-versions = ">=3.6"
 files = [
-    {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"},
-    {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"},
+    {file = "xmltodict-0.14.1-py2.py3-none-any.whl", hash = "sha256:3ef4a7b71c08f19047fcbea572e1d7f4207ab269da1565b5d40e9823d3894e63"},
+    {file = "xmltodict-0.14.1.tar.gz", hash = "sha256:338c8431e4fc554517651972d62f06958718f6262b04316917008e8fd677a6b0"},
 ]
 
 [[package]]
@@ -7268,108 +7402,109 @@ files = [
 
 [[package]]
 name = "yarl"
-version = "1.12.1"
+version = "1.14.0"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "yarl-1.12.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:64c5b0f2b937fe40d0967516eee5504b23cb247b8b7ffeba7213a467d9646fdc"},
-    {file = "yarl-1.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2e430ac432f969ef21770645743611c1618362309e3ad7cab45acd1ad1a540ff"},
-    {file = "yarl-1.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3e26e64f42bce5ddf9002092b2c37b13071c2e6413d5c05f9fa9de58ed2f7749"},
-    {file = "yarl-1.12.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0103c52f8dfe5d573c856322149ddcd6d28f51b4d4a3ee5c4b3c1b0a05c3d034"},
-    {file = "yarl-1.12.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b63465b53baeaf2122a337d4ab57d6bbdd09fcadceb17a974cfa8a0300ad9c67"},
-    {file = "yarl-1.12.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17d4dc4ff47893a06737b8788ed2ba2f5ac4e8bb40281c8603920f7d011d5bdd"},
-    {file = "yarl-1.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8b54949267bd5704324397efe9fbb6aa306466dee067550964e994d309db5f1"},
-    {file = "yarl-1.12.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10b690cd78cbaca2f96a7462f303fdd2b596d3978b49892e4b05a7567c591572"},
-    {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c85ab016e96a975afbdb9d49ca90f3bca9920ef27c64300843fe91c3d59d8d20"},
-    {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c1caa5763d1770216596e0a71b5567f27aac28c95992110212c108ec74589a48"},
-    {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:595bbcdbfc4a9c6989d7489dca8510cba053ff46b16c84ffd95ac8e90711d419"},
-    {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e64f0421892a207d3780903085c1b04efeb53b16803b23d947de5a7261b71355"},
-    {file = "yarl-1.12.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:319c206e83e46ec2421b25b300c8482b6fe8a018baca246be308c736d9dab267"},
-    {file = "yarl-1.12.1-cp310-cp310-win32.whl", hash = "sha256:da045bd1147d12bd43fb032296640a7cc17a7f2eaba67495988362e99db24fd2"},
-    {file = "yarl-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:aebbd47df77190ada603157f0b3670d578c110c31746ecc5875c394fdcc59a99"},
-    {file = "yarl-1.12.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:28389a68981676bf74e2e199fe42f35d1aa27a9c98e3a03e6f58d2d3d054afe1"},
-    {file = "yarl-1.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f736f54565f8dd7e3ab664fef2bc461d7593a389a7f28d4904af8d55a91bd55f"},
-    {file = "yarl-1.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dee0496d5f1a8f57f0f28a16f81a2033fc057a2cf9cd710742d11828f8c80e2"},
-    {file = "yarl-1.12.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8981a94a27ac520a398302afb74ae2c0be1c3d2d215c75c582186a006c9e7b0"},
-    {file = "yarl-1.12.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff54340fc1129e8e181827e2234af3ff659b4f17d9bbe77f43bc19e6577fadec"},
-    {file = "yarl-1.12.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:54c8cee662b5f8c30ad7eedfc26123f845f007798e4ff1001d9528fe959fd23c"},
-    {file = "yarl-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e97a29b37830ba1262d8dfd48ddb5b28ad4d3ebecc5d93a9c7591d98641ec737"},
-    {file = "yarl-1.12.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c89894cc6f6ddd993813e79244b36b215c14f65f9e4f1660b1f2ba9e5594b95"},
-    {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:712ba8722c0699daf186de089ddc4677651eb9875ed7447b2ad50697522cbdd9"},
-    {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6e9a9f50892153bad5046c2a6df153224aa6f0573a5a8ab44fc54a1e886f6e21"},
-    {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1d4017e78fb22bc797c089b746230ad78ecd3cdb215bc0bd61cb72b5867da57e"},
-    {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f494c01b28645c431239863cb17af8b8d15b93b0d697a0320d5dd34cd9d7c2fa"},
-    {file = "yarl-1.12.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:de4544b1fb29cf14870c4e2b8a897c0242449f5dcebd3e0366aa0aa3cf58a23a"},
-    {file = "yarl-1.12.1-cp311-cp311-win32.whl", hash = "sha256:7564525a4673fde53dee7d4c307a961c0951918f0b8c7f09b2c9e02067cf6504"},
-    {file = "yarl-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:f23bb1a7a6e8e8b612a164fdd08e683bcc16c76f928d6dbb7bdbee2374fbfee6"},
-    {file = "yarl-1.12.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a3e2aff8b822ab0e0bdbed9f50494b3a35629c4b9488ae391659973a37a9f53f"},
-    {file = "yarl-1.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:22dda2799c8d39041d731e02bf7690f0ef34f1691d9ac9dfcb98dd1e94c8b058"},
-    {file = "yarl-1.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18c2a7757561f05439c243f517dbbb174cadfae3a72dee4ae7c693f5b336570f"},
-    {file = "yarl-1.12.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:835010cc17d0020e7931d39e487d72c8e01c98e669b6896a8b8c9aa8ca69a949"},
-    {file = "yarl-1.12.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2254fe137c4a360b0a13173a56444f756252c9283ba4d267ca8e9081cd140ea"},
-    {file = "yarl-1.12.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6a071d2c3d39b4104f94fc08ab349e9b19b951ad4b8e3b6d7ea92d6ef7ccaf8"},
-    {file = "yarl-1.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73a183042ae0918c82ce2df38c3db2409b0eeae88e3afdfc80fb67471a95b33b"},
-    {file = "yarl-1.12.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:326b8a079a9afcac0575971e56dabdf7abb2ea89a893e6949b77adfeb058b50e"},
-    {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:126309c0f52a2219b3d1048aca00766429a1346596b186d51d9fa5d2070b7b13"},
-    {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ba1c779b45a399cc25f511c681016626f69e51e45b9d350d7581998722825af9"},
-    {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:af1107299cef049ad00a93df4809517be432283a0847bcae48343ebe5ea340dc"},
-    {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:20d817c0893191b2ab0ba30b45b77761e8dfec30a029b7c7063055ca71157f84"},
-    {file = "yarl-1.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d4f818f6371970d6a5d1e42878389bbfb69dcde631e4bbac5ec1cb11158565ca"},
-    {file = "yarl-1.12.1-cp312-cp312-win32.whl", hash = "sha256:0ac33d22b2604b020569a82d5f8a03ba637ba42cc1adf31f616af70baf81710b"},
-    {file = "yarl-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:fd24996e12e1ba7c397c44be75ca299da14cde34d74bc5508cce233676cc68d0"},
-    {file = "yarl-1.12.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dea360778e0668a7ad25d7727d03364de8a45bfd5d808f81253516b9f2217765"},
-    {file = "yarl-1.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1f50a37aeeb5179d293465e522fd686080928c4d89e0ff215e1f963405ec4def"},
-    {file = "yarl-1.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0274b1b7a9c9c32b7bf250583e673ff99fb9fccb389215841e2652d9982de740"},
-    {file = "yarl-1.12.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4f3ab9eb8ab2d585ece959c48d234f7b39ac0ca1954a34d8b8e58a52064bdb3"},
-    {file = "yarl-1.12.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d31dd0245d88cf7239e96e8f2a99f815b06e458a5854150f8e6f0e61618d41b"},
-    {file = "yarl-1.12.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a96198d5d26f40557d986c1253bfe0e02d18c9d9b93cf389daf1a3c9f7c755fa"},
-    {file = "yarl-1.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddae504cfb556fe220efae65e35be63cd11e3c314b202723fc2119ce19f0ca2e"},
-    {file = "yarl-1.12.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bce00f3b1f7f644faae89677ca68645ed5365f1c7f874fdd5ebf730a69640d38"},
-    {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eee5ff934b0c9f4537ff9596169d56cab1890918004791a7a06b879b3ba2a7ef"},
-    {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4ea99e64b2ad2635e0f0597b63f5ea6c374791ff2fa81cdd4bad8ed9f047f56f"},
-    {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c667b383529520b8dd6bd496fc318678320cb2a6062fdfe6d3618da6b8790f6"},
-    {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d920401941cb898ef089422e889759dd403309eb370d0e54f1bdf6ca07fef603"},
-    {file = "yarl-1.12.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:501a1576716032cc6d48c7c47bcdc42d682273415a8f2908e7e72cb4625801f3"},
-    {file = "yarl-1.12.1-cp313-cp313-win32.whl", hash = "sha256:24416bb5e221e29ddf8aac5b97e94e635ca2c5be44a1617ad6fe32556df44294"},
-    {file = "yarl-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:71af3766bb46738d12cc288d9b8de7ef6f79c31fd62757e2b8a505fe3680b27f"},
-    {file = "yarl-1.12.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c924deab8105f86980983eced740433fb7554a7f66db73991affa4eda99d5402"},
-    {file = "yarl-1.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5fb475a4cdde582c9528bb412b98f899680492daaba318231e96f1a0a1bb0d53"},
-    {file = "yarl-1.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:36ee0115b9edca904153a66bb74a9ff1ce38caff015de94eadfb9ba8e6ecd317"},
-    {file = "yarl-1.12.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2631c9d7386bd2d4ce24ecc6ebf9ae90b3efd713d588d90504eaa77fec4dba01"},
-    {file = "yarl-1.12.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2376d8cf506dffd0e5f2391025ae8675b09711016656590cb03b55894161fcfa"},
-    {file = "yarl-1.12.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24197ba3114cc85ddd4091e19b2ddc62650f2e4a899e51b074dfd52d56cf8c72"},
-    {file = "yarl-1.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfdf419bf5d3644f94cd7052954fc233522f5a1b371fc0b00219ebd9c14d5798"},
-    {file = "yarl-1.12.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8112f640a4f7e7bf59f7cabf0d47a29b8977528c521d73a64d5cc9e99e48a174"},
-    {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:607d12f0901f6419a8adceb139847c42c83864b85371f58270e42753f9780fa6"},
-    {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:664380c7ed524a280b6a2d5d9126389c3e96cd6e88986cdb42ca72baa27421d6"},
-    {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:0d0a5e87bc48d76dfcfc16295201e9812d5f33d55b4a0b7cad1025b92bf8b91b"},
-    {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:eff6bac402719c14e17efe845d6b98593c56c843aca6def72080fbede755fd1f"},
-    {file = "yarl-1.12.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:22839d1d1eab9e4b427828a88a22beb86f67c14d8ff81175505f1cc8493f3500"},
-    {file = "yarl-1.12.1-cp38-cp38-win32.whl", hash = "sha256:717f185086bb9d817d4537dd18d5df5d657598cd00e6fc22e4d54d84de266c1d"},
-    {file = "yarl-1.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:71978ba778948760cff528235c951ea0ef7a4f9c84ac5a49975f8540f76c3f73"},
-    {file = "yarl-1.12.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:30ffc046ebddccb3c4cac72c1a3e1bc343492336f3ca86d24672e90ccc5e788a"},
-    {file = "yarl-1.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f10954b233d4df5cc3137ffa5ced97f8894152df817e5d149bf05a0ef2ab8134"},
-    {file = "yarl-1.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2e912b282466444023610e4498e3795c10e7cfd641744524876239fcf01d538d"},
-    {file = "yarl-1.12.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af871f70cfd5b528bd322c65793b5fd5659858cdfaa35fbe563fb99b667ed1f"},
-    {file = "yarl-1.12.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3e4e1f7b08d1ec6b685ccd3e2d762219c550164fbf524498532e39f9413436e"},
-    {file = "yarl-1.12.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9a7ee79183f0b17dcede8b6723e7da2ded529cf159a878214be9a5d3098f5b1e"},
-    {file = "yarl-1.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96c8ff1e1dd680e38af0887927cab407a4e51d84a5f02ae3d6eb87233036c763"},
-    {file = "yarl-1.12.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e9905fc2dc1319e4c39837b906a024cf71b1261cc66b0cd89678f779c0c61f5"},
-    {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:01549468858b87d36f967c97d02e6e54106f444aeb947ed76f8f71f85ed07cec"},
-    {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:96b34830bd6825ca0220bf005ea99ac83eb9ce51301ddb882dcf613ae6cd95fb"},
-    {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:2aee7594d2c2221c717a8e394bbed4740029df4c0211ceb0f04815686e99c795"},
-    {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:15871130439ad10abb25a4631120d60391aa762b85fcab971411e556247210a0"},
-    {file = "yarl-1.12.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:838dde2cb570cfbb4cab8a876a0974e8b90973ea40b3ac27a79b8a74c8a2db15"},
-    {file = "yarl-1.12.1-cp39-cp39-win32.whl", hash = "sha256:eacbcf30efaca7dc5cb264228ffecdb95fdb1e715b1ec937c0ce6b734161e0c8"},
-    {file = "yarl-1.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:76a59d1b63de859398bc7764c860a769499511463c1232155061fe0147f13e01"},
-    {file = "yarl-1.12.1-py3-none-any.whl", hash = "sha256:dc3192a81ecd5ff954cecd690327badd5a84d00b877e1573f7c9097ce13e5bfb"},
-    {file = "yarl-1.12.1.tar.gz", hash = "sha256:5b860055199aec8d6fe4dcee3c5196ce506ca198a50aab0059ffd26e8e815828"},
+    {file = "yarl-1.14.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1bfc25aa6a7c99cf86564210f79a0b7d4484159c67e01232b116e445b3036547"},
+    {file = "yarl-1.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0cf21f46a15d445417de8fc89f2568852cf57fe8ca1ab3d19ddb24d45c0383ae"},
+    {file = "yarl-1.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1dda53508df0de87b6e6b0a52d6718ff6c62a5aca8f5552748404963df639269"},
+    {file = "yarl-1.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:587c3cc59bc148a9b1c07a019346eda2549bc9f468acd2f9824d185749acf0a6"},
+    {file = "yarl-1.14.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3007a5b75cb50140708420fe688c393e71139324df599434633019314ceb8b59"},
+    {file = "yarl-1.14.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06ff23462398333c78b6f4f8d3d70410d657a471c2c5bbe6086133be43fc8f1a"},
+    {file = "yarl-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689a99a42ee4583fcb0d3a67a0204664aa1539684aed72bdafcbd505197a91c4"},
+    {file = "yarl-1.14.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0547ab1e9345dc468cac8368d88ea4c5bd473ebc1d8d755347d7401982b5dd8"},
+    {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:742aef0a99844faaac200564ea6f5e08facb285d37ea18bd1a5acf2771f3255a"},
+    {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:176110bff341b6730f64a1eb3a7070e12b373cf1c910a9337e7c3240497db76f"},
+    {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:46a9772a1efa93f9cd170ad33101c1817c77e0e9914d4fe33e2da299d7cf0f9b"},
+    {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ee2c68e4f2dd1b1c15b849ba1c96fac105fca6ffdb7c1e8be51da6fabbdeafb9"},
+    {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:047b258e00b99091b6f90355521f026238c63bd76dcf996d93527bb13320eefd"},
+    {file = "yarl-1.14.0-cp310-cp310-win32.whl", hash = "sha256:0aa92e3e30a04f9462a25077db689c4ac5ea9ab6cc68a2e563881b987d42f16d"},
+    {file = "yarl-1.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:d9baec588f015d0ee564057aa7574313c53a530662ffad930b7886becc85abdf"},
+    {file = "yarl-1.14.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:07f9eaf57719d6721ab15805d85f4b01a5b509a0868d7320134371bcb652152d"},
+    {file = "yarl-1.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c14b504a74e58e2deb0378b3eca10f3d076635c100f45b113c18c770b4a47a50"},
+    {file = "yarl-1.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:16a682a127930f3fc4e42583becca6049e1d7214bcad23520c590edd741d2114"},
+    {file = "yarl-1.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73bedd2be05f48af19f0f2e9e1353921ce0c83f4a1c9e8556ecdcf1f1eae4892"},
+    {file = "yarl-1.14.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3ab950f8814f3b7b5e3eebc117986f817ec933676f68f0a6c5b2137dd7c9c69"},
+    {file = "yarl-1.14.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b693c63e7e64b524f54aa4888403c680342d1ad0d97be1707c531584d6aeeb4f"},
+    {file = "yarl-1.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85cb3e40eaa98489f1e2e8b29f5ad02ee1ee40d6ce6b88d50cf0f205de1d9d2c"},
+    {file = "yarl-1.14.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f24f08b6c9b9818fd80612c97857d28f9779f0d1211653ece9844fc7b414df2"},
+    {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:29a84a46ec3ebae7a1c024c055612b11e9363a8a23238b3e905552d77a2bc51b"},
+    {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5cd5dad8366e0168e0fd23d10705a603790484a6dbb9eb272b33673b8f2cce72"},
+    {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a152751af7ef7b5d5fa6d215756e508dd05eb07d0cf2ba51f3e740076aa74373"},
+    {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3d569f877ed9a708e4c71a2d13d2940cb0791da309f70bd970ac1a5c088a0a92"},
+    {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6a615cad11ec3428020fb3c5a88d85ce1b5c69fd66e9fcb91a7daa5e855325dd"},
+    {file = "yarl-1.14.0-cp311-cp311-win32.whl", hash = "sha256:bab03192091681d54e8225c53f270b0517637915d9297028409a2a5114ff4634"},
+    {file = "yarl-1.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:985623575e5c4ea763056ffe0e2d63836f771a8c294b3de06d09480538316b13"},
+    {file = "yarl-1.14.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fc2c80bc87fba076e6cbb926216c27fba274dae7100a7b9a0983b53132dd99f2"},
+    {file = "yarl-1.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:55c144d363ad4626ca744556c049c94e2b95096041ac87098bb363dcc8635e8d"},
+    {file = "yarl-1.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b03384eed107dbeb5f625a99dc3a7de8be04fc8480c9ad42fccbc73434170b20"},
+    {file = "yarl-1.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f72a0d746d38cb299b79ce3d4d60ba0892c84bbc905d0d49c13df5bace1b65f8"},
+    {file = "yarl-1.14.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8648180b34faaea4aa5b5ca7e871d9eb1277033fa439693855cf0ea9195f85f1"},
+    {file = "yarl-1.14.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9557c9322aaa33174d285b0c1961fb32499d65ad1866155b7845edc876c3c835"},
+    {file = "yarl-1.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f50eb3837012a937a2b649ec872b66ba9541ad9d6f103ddcafb8231cfcafd22"},
+    {file = "yarl-1.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8892fa575ac9b1b25fae7b221bc4792a273877b9b56a99ee2d8d03eeb3dbb1d2"},
+    {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6a2c5c5bb2556dfbfffffc2bcfb9c235fd2b566d5006dfb2a37afc7e3278a07"},
+    {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ab3abc0b78a5dfaa4795a6afbe7b282b6aa88d81cf8c1bb5e394993d7cae3457"},
+    {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:47eede5d11d669ab3759b63afb70d28d5328c14744b8edba3323e27dc52d298d"},
+    {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fe4d2536c827f508348d7b40c08767e8c7071614250927233bf0c92170451c0a"},
+    {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0fd7b941dd1b00b5f0acb97455fea2c4b7aac2dd31ea43fb9d155e9bc7b78664"},
+    {file = "yarl-1.14.0-cp312-cp312-win32.whl", hash = "sha256:99ff3744f5fe48288be6bc402533b38e89749623a43208e1d57091fc96b783b9"},
+    {file = "yarl-1.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:1ca3894e9e9f72da93544f64988d9c052254a338a9f855165f37f51edb6591de"},
+    {file = "yarl-1.14.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5d02d700705d67e09e1f57681f758f0b9d4412eeb70b2eb8d96ca6200b486db3"},
+    {file = "yarl-1.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:30600ba5db60f7c0820ef38a2568bb7379e1418ecc947a0f76fd8b2ff4257a97"},
+    {file = "yarl-1.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e85d86527baebb41a214cc3b45c17177177d900a2ad5783dbe6f291642d4906f"},
+    {file = "yarl-1.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37001e5d4621cef710c8dc1429ca04e189e572f128ab12312eab4e04cf007132"},
+    {file = "yarl-1.14.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4f4547944d4f5cfcdc03f3f097d6f05bbbc915eaaf80a2ee120d0e756de377d"},
+    {file = "yarl-1.14.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75ff4c819757f9bdb35de049a509814d6ce851fe26f06eb95a392a5640052482"},
+    {file = "yarl-1.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68ac1a09392ed6e3fd14be880d39b951d7b981fd135416db7d18a6208c536561"},
+    {file = "yarl-1.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96952f642ac69075e44c7d0284528938fdff39422a1d90d3e45ce40b72e5e2d9"},
+    {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a56fbe3d7f3bce1d060ea18d2413a2ca9ca814eea7cedc4d247b5f338d54844e"},
+    {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7e2637d75e92763d1322cb5041573279ec43a80c0f7fbbd2d64f5aee98447b17"},
+    {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9abe80ae2c9d37c17599557b712e6515f4100a80efb2cda15f5f070306477cd2"},
+    {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:217a782020b875538eebf3948fac3a7f9bbbd0fd9bf8538f7c2ad7489e80f4e8"},
+    {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9cfef3f14f75bf6aba73a76caf61f9d00865912a04a4393c468a7ce0981b519"},
+    {file = "yarl-1.14.0-cp313-cp313-win32.whl", hash = "sha256:d8361c7d04e6a264481f0b802e395f647cd3f8bbe27acfa7c12049efea675bd1"},
+    {file = "yarl-1.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:bc24f968b82455f336b79bf37dbb243b7d76cd40897489888d663d4e028f5069"},
+    {file = "yarl-1.14.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:91d875f75fabf76b3018c5f196bf3d308ed2b49ddcb46c1576d6b075754a1393"},
+    {file = "yarl-1.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4009def9be3a7e5175db20aa2d7307ecd00bbf50f7f0f989300710eee1d0b0b9"},
+    {file = "yarl-1.14.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:582cedde49603f139be572252a318b30dc41039bc0b8165f070f279e5d12187f"},
+    {file = "yarl-1.14.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbd9ff43a04f8ffe8a959a944c2dca10d22f5f99fc6a459f49c3ebfb409309d9"},
+    {file = "yarl-1.14.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b9f805e37ed16cc212fdc538a608422d7517e7faf539bedea4fe69425bc55d76"},
+    {file = "yarl-1.14.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95e16e9eaa2d7f5d87421b8fe694dd71606aa61d74b824c8d17fc85cc51983d1"},
+    {file = "yarl-1.14.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:816d24f584edefcc5ca63428f0b38fee00b39fe64e3c5e558f895a18983efe96"},
+    {file = "yarl-1.14.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd2660c01367eb3ef081b8fa0a5da7fe767f9427aa82023a961a5f28f0d4af6c"},
+    {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:94b2bb9bcfd5be9d27004ea4398fb640373dd0c1a9e219084f42c08f77a720ab"},
+    {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:c2089a9afef887664115f7fa6d3c0edd6454adaca5488dba836ca91f60401075"},
+    {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2192f718db4a8509f63dd6d950f143279211fa7e6a2c612edc17d85bf043d36e"},
+    {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:8385ab36bf812e9d37cf7613999a87715f27ef67a53f0687d28c44b819df7cb0"},
+    {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b4c1ecba93e7826dc71ddba75fb7740cdb52e7bd0be9f03136b83f54e6a1f511"},
+    {file = "yarl-1.14.0-cp38-cp38-win32.whl", hash = "sha256:e749af6c912a7bb441d105c50c1a3da720474e8acb91c89350080dd600228f0e"},
+    {file = "yarl-1.14.0-cp38-cp38-win_amd64.whl", hash = "sha256:147e36331f6f63e08a14640acf12369e041e0751bb70d9362df68c2d9dcf0c87"},
+    {file = "yarl-1.14.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a9f917966d27f7ce30039fe8d900f913c5304134096554fd9bea0774bcda6d1"},
+    {file = "yarl-1.14.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a2f8fb7f944bcdfecd4e8d855f84c703804a594da5123dd206f75036e536d4d"},
+    {file = "yarl-1.14.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f4e475f29a9122f908d0f1f706e1f2fc3656536ffd21014ff8a6f2e1b14d1d8"},
+    {file = "yarl-1.14.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8089d4634d8fa2b1806ce44fefa4979b1ab2c12c0bc7ef3dfa45c8a374811348"},
+    {file = "yarl-1.14.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b16f6c75cffc2dc0616ea295abb0e1967601bd1fb1e0af6a1de1c6c887f3439"},
+    {file = "yarl-1.14.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498b3c55087b9d762636bca9b45f60d37e51d24341786dc01b81253f9552a607"},
+    {file = "yarl-1.14.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3f8bfc1db82589ef965ed234b87de30d140db8b6dc50ada9e33951ccd8ec07a"},
+    {file = "yarl-1.14.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:625f207b1799e95e7c823f42f473c1e9dbfb6192bd56bba8695656d92be4535f"},
+    {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:781e2495e408a81e4eaeedeb41ba32b63b1980dddf8b60dbbeff6036bcd35049"},
+    {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:659603d26d40dd4463200df9bfbc339fbfaed3fe32e5c432fe1dc2b5d4aa94b4"},
+    {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4e0d45ebf975634468682c8bec021618b3ad52c37619e5c938f8f831fa1ac5c0"},
+    {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:a2e4725a08cb2b4794db09e350c86dee18202bb8286527210e13a1514dc9a59a"},
+    {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:19268b4fec1d7760134f2de46ef2608c2920134fb1fa61e451f679e41356dc55"},
+    {file = "yarl-1.14.0-cp39-cp39-win32.whl", hash = "sha256:337912bcdcf193ade64b9aae5a4017a0a1950caf8ca140362e361543c6773f21"},
+    {file = "yarl-1.14.0-cp39-cp39-win_amd64.whl", hash = "sha256:b6d0147574ce2e7b812c989e50fa72bbc5338045411a836bd066ce5fc8ac0bce"},
+    {file = "yarl-1.14.0-py3-none-any.whl", hash = "sha256:c8ed4034f0765f8861620c1f2f2364d2e58520ea288497084dae880424fc0d9f"},
+    {file = "yarl-1.14.0.tar.gz", hash = "sha256:88c7d9d58aab0724b979ab5617330acb1c7030b79379c8138c1c8c94e121d1b3"},
 ]
 
 [package.dependencies]
 idna = ">=2.0"
 multidict = ">=4.0"
+propcache = ">=0.2.0"
 
 [[package]]
 name = "zarr"
@@ -7427,4 +7562,4 @@ xarm = ["gym-xarm"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "78f31561a7e4b6f0a97e27a65ec00c2c1826f420d2587396762bb5485d12f676"
+content-hash = "f64e01ce021ae77baa2c9bb82cbd2dd6035ab01a1500207da7acdb7f9d0772e1"
diff --git a/pyproject.toml b/pyproject.toml
index 47e982d1..89ed7ff0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ opencv-python = ">=4.9.0"
 diffusers = ">=0.27.2"
 torchvision = ">=0.17.1"
 h5py = ">=3.10.0"
-huggingface-hub = {extras = ["hf-transfer", "cli"], version = ">=0.25.0"}
+huggingface-hub = {extras = ["hf-transfer", "cli"], version = ">=0.25.2"}
 gymnasium = ">=0.29.1"
 cmake = ">=3.29.0.1"
 gym-dora = { git = "https://github.com/dora-rs/dora-lerobot.git", subdirectory = "gym_dora", optional = true }

From 3ea53124e0487e25266325b8ee2384d287f2be3a Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 11 Oct 2024 17:38:47 +0200
Subject: [PATCH 10/59] Add padding keys and download_data option

---
 lerobot/common/datasets/lerobot_dataset.py | 44 ++++++++++++++++++----
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index b283a185..61d27287 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -52,6 +52,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         image_transforms: Callable | None = None,
         delta_timestamps: dict[list[float]] | None = None,
         tolerance_s: float = 1e-4,
+        download_data: bool = True,
         video_backend: str | None = None,
     ):
         """LeRobotDataset encapsulates 3 main things:
@@ -128,6 +129,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 timestamps is separated to the next by 1/fps +/- tolerance_s. This also applies to frames
                 decoded from video files. It is also used to check that `delta_timestamps` (when provided) are
                 multiples of 1/fps. Defaults to 1e-4.
+            download_data (bool, optional): Flag to download actual data. Defaults to True.
             video_backend (str | None, optional): Video backend to use for decoding videos. There is currently
                 a single option which is the pyav decoder used by Torchvision. Defaults to pyav.
         """
@@ -139,6 +141,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.delta_timestamps = delta_timestamps
         self.episodes = episodes
         self.tolerance_s = tolerance_s
+        self.download_data = download_data
         self.video_backend = video_backend if video_backend is not None else "pyav"
         self.delta_indices = None
 
@@ -149,6 +152,13 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.stats = load_stats(repo_id, self._version, self.root)
         self.tasks = load_tasks(repo_id, self._version, self.root)
 
+        if not self.download_data:
+            # TODO(aliberts): Add actual support for this
+            # maybe use local_files_only=True or HF_HUB_OFFLINE=True
+            # see thread https://huggingface.slack.com/archives/C06ME3E7JUD/p1728637455476019
+            self.hf_dataset, self.episode_data_index = None, None
+            return
+
         # Load actual data
         self.download_episodes()
         self.hf_dataset = load_hf_dataset(self.root, self.data_path, self.total_episodes, self.episodes)
@@ -243,6 +253,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Keys to access image and video streams from cameras (regardless of their storage method)."""
         return self.image_keys + self.video_keys
 
+    @property
+    def names(self) -> dict[list[str]]:
+        """Names of the various dimensions of vector modalities."""
+        return self.info["names"]
+
     @property
     def num_samples(self) -> int:
         """Number of samples/frames."""
@@ -275,21 +290,29 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Number of samples/frames for given episode."""
         return self.info["episodes"][episode_index]["length"]
 
-    def _get_query_indices(self, idx: int, ep_idx: int) -> dict[str, list[int]]:
-        # Pad values outside of current episode range
+    def _get_query_indices(self, idx: int, ep_idx: int) -> tuple[dict[str, list[int | bool]]]:
         ep_start = self.episode_data_index["from"][ep_idx]
         ep_end = self.episode_data_index["to"][ep_idx]
-        return {
+        query_indices = {
             key: [max(ep_start.item(), min(ep_end.item() - 1, idx + delta)) for delta in delta_idx]
             for key, delta_idx in self.delta_indices.items()
         }
+        padding = {  # Pad values outside of current episode range
+            f"{key}_is_pad": torch.BoolTensor(
+                [(idx + delta < ep_start.item()) | (idx + delta >= ep_end.item()) for delta in delta_idx]
+            )
+            for key, delta_idx in self.delta_indices.items()
+        }
+        return query_indices, padding
 
     def _get_query_timestamps(
-        self, query_indices: dict[str, list[int]], current_ts: float
+        self,
+        current_ts: float,
+        query_indices: dict[str, list[int]] | None = None,
     ) -> dict[str, list[float]]:
         query_timestamps = {}
         for key in self.video_keys:
-            if key in query_indices:
+            if query_indices is not None and key in query_indices:
                 timestamps = self.hf_dataset.select(query_indices[key])["timestamp"]
                 query_timestamps[key] = torch.stack(timestamps).tolist()
             else:
@@ -320,6 +343,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
         return item
 
+    def _add_padding_keys(self, item: dict, padding: dict[str, list[bool]]) -> dict:
+        for key, val in padding.items():
+            item[key] = torch.BoolTensor(val)
+        return item
+
     def __len__(self):
         return self.num_samples
 
@@ -327,16 +355,18 @@ class LeRobotDataset(torch.utils.data.Dataset):
         item = self.hf_dataset[idx]
         ep_idx = item["episode_index"].item()
 
+        query_indices = None
         if self.delta_indices is not None:
             current_ep_idx = self.episodes.index(ep_idx) if self.episodes is not None else ep_idx
-            query_indices = self._get_query_indices(idx, current_ep_idx)
+            query_indices, padding = self._get_query_indices(idx, current_ep_idx)
             query_result = self._query_hf_dataset(query_indices)
+            item = {**item, **padding}
             for key, val in query_result.items():
                 item[key] = val
 
         if len(self.video_keys) > 0:
             current_ts = item["timestamp"].item()
-            query_timestamps = self._get_query_timestamps(query_indices, current_ts)
+            query_timestamps = self._get_query_timestamps(current_ts, query_indices)
             video_frames = self._query_videos(query_timestamps, ep_idx)
             item = {**video_frames, **item}
 

From 8bd406e6070e200b36b6a9a864011bb4063fcedc Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 11 Oct 2024 18:52:11 +0200
Subject: [PATCH 11/59] Add suggestions from code review

---
 lerobot/common/datasets/lerobot_dataset.py | 26 ++++++++--------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 61d27287..6b149554 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -48,11 +48,10 @@ class LeRobotDataset(torch.utils.data.Dataset):
         repo_id: str,
         root: Path | None = None,
         episodes: list[int] | None = None,
-        split: str = "train",
         image_transforms: Callable | None = None,
         delta_timestamps: dict[list[float]] | None = None,
         tolerance_s: float = 1e-4,
-        download_data: bool = True,
+        download_videos: bool = True,
         video_backend: str | None = None,
     ):
         """LeRobotDataset encapsulates 3 main things:
@@ -64,7 +63,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             - hf_dataset (from datasets.Dataset), which will read any values from parquet files.
             - (optional) videos from which frames are loaded to be synchronous with data from parquet files.
 
-        3 use modes are available for this class, depending on 3 different use cases:
+        3 modes are available for this class, depending on 3 different use cases:
 
         1. Your dataset already exists on the Hugging Face Hub at the address
         https://huggingface.co/datasets/{repo_id} and is not on your local disk in the 'root' folder:
@@ -119,7 +118,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 '~/.cache/huggingface/lerobot'.
             episodes (list[int] | None, optional): If specified, this will only load episodes specified by
                 their episode_index in this list. Defaults to None.
-            split (str, optional): _description_. Defaults to "train".
             image_transforms (Callable | None, optional): You can pass standard v2 image transforms from
                 torchvision.transforms.v2 here which will be applied to visual modalities (whether they come
                 from videos or images). Defaults to None.
@@ -129,19 +127,18 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 timestamps is separated to the next by 1/fps +/- tolerance_s. This also applies to frames
                 decoded from video files. It is also used to check that `delta_timestamps` (when provided) are
                 multiples of 1/fps. Defaults to 1e-4.
-            download_data (bool, optional): Flag to download actual data. Defaults to True.
+            download_videos (bool, optional): Flag to download the videos. Defaults to True.
             video_backend (str | None, optional): Video backend to use for decoding videos. There is currently
                 a single option which is the pyav decoder used by Torchvision. Defaults to pyav.
         """
         super().__init__()
         self.repo_id = repo_id
         self.root = root if root is not None else LEROBOT_HOME / repo_id
-        self.split = split
         self.image_transforms = image_transforms
         self.delta_timestamps = delta_timestamps
         self.episodes = episodes
         self.tolerance_s = tolerance_s
-        self.download_data = download_data
+        self.download_videos = download_videos
         self.video_backend = video_backend if video_backend is not None else "pyav"
         self.delta_indices = None
 
@@ -152,13 +149,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.stats = load_stats(repo_id, self._version, self.root)
         self.tasks = load_tasks(repo_id, self._version, self.root)
 
-        if not self.download_data:
-            # TODO(aliberts): Add actual support for this
-            # maybe use local_files_only=True or HF_HUB_OFFLINE=True
-            # see thread https://huggingface.slack.com/archives/C06ME3E7JUD/p1728637455476019
-            self.hf_dataset, self.episode_data_index = None, None
-            return
-
         # Load actual data
         self.download_episodes()
         self.hf_dataset = load_hf_dataset(self.root, self.data_path, self.total_episodes, self.episodes)
@@ -192,12 +182,13 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # TODO(rcadene, aliberts): implement faster transfer
         # https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads
         files = None
+        ignore_patterns = None if self.download_videos else "videos/"
         if self.episodes is not None:
             files = [
                 self.data_path.format(episode_index=ep_idx, total_episodes=self.total_episodes)
                 for ep_idx in self.episodes
             ]
-            if len(self.video_keys) > 0:
+            if len(self.video_keys) > 0 and self.download_videos:
                 video_files = [
                     self.videos_path.format(video_key=vid_key, episode_index=ep_idx)
                     for vid_key in self.video_keys
@@ -211,6 +202,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             revision=self._version,
             local_dir=self.root,
             allow_patterns=files,
+            ignore_patterns=ignore_patterns,
         )
 
     @property
@@ -371,7 +363,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
             item = {**video_frames, **item}
 
         if self.image_transforms is not None:
-            for cam in self.camera_keys:
+            image_keys = self.camera_keys if self.download_videos else self.image_keys
+            for cam in image_keys:
                 item[cam] = self.image_transforms(item[cam])
 
         return item
@@ -380,7 +373,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         return (
             f"{self.__class__.__name__}(\n"
             f"  Repository ID: '{self.repo_id}',\n"
-            f"  Split: '{self.split}',\n"
             f"  Number of Samples: {self.num_samples},\n"
             f"  Number of Episodes: {self.num_episodes},\n"
             f"  Type: {'video (.mp4)' if self.video else 'image (.png)'},\n"

From cf633344be82e59e0c777a3825bd49aeb7e0390d Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Sun, 13 Oct 2024 21:21:40 +0200
Subject: [PATCH 12/59] Add multitask support, refactor conversion script

---
 ...16_to_20.py => convert_dataset_v1_to_v2.py | 241 +++++++++++-------
 1 file changed, 149 insertions(+), 92 deletions(-)
 rename convert_dataset_16_to_20.py => convert_dataset_v1_to_v2.py (65%)

diff --git a/convert_dataset_16_to_20.py b/convert_dataset_v1_to_v2.py
similarity index 65%
rename from convert_dataset_16_to_20.py
rename to convert_dataset_v1_to_v2.py
index 1a4f1520..9343c898 100644
--- a/convert_dataset_16_to_20.py
+++ b/convert_dataset_v1_to_v2.py
@@ -3,34 +3,70 @@ This script will help you convert any LeRobot dataset already pushed to the hub
 2.0. You will be required to provide the 'tasks', which is a short but accurate description in plain English
 for each of the task performed in the dataset. This will allow to easily train models with task-conditionning.
 
-If your dataset contains a single task, you can provide it directly via the CLI with the '--task' option (see
-examples below).
+We support 3 different scenarios for these tasks:
+    1. Single task dataset: all episodes of your dataset have the same single task.
+    2. Single task episodes: the episodes of your dataset each contain a single task but they can differ from
+      one episode to the next.
+    3. Multi task episodes: episodes of your dataset may each contain several different tasks.
 
-If your dataset is a multi-task dataset, TODO
 
-In any case, keep in mind that there should only be one task per episode. Multi-task episodes are not
-supported for now.
+# 1. Single task dataset
+If your dataset contains a single task, you can simply provide it directly via the CLI with the
+'--single-task' option (see examples below).
 
-Usage examples
+Examples:
 
-Single-task dataset:
 ```bash
-python convert_dataset_16_to_20.py \
+python convert_dataset_v1_to_v2.py \
     --repo-id lerobot/aloha_sim_insertion_human_image \
-    --task "Insert the peg into the socket." \
+    --single-task "Insert the peg into the socket." \
     --robot-config lerobot/configs/robot/aloha.yaml \
     --local-dir data
 ```
 
 ```bash
-python convert_dataset_16_to_20.py \
+python convert_dataset_v1_to_v2.py \
     --repo-id aliberts/koch_tutorial \
-    --task "Pick the Lego block and drop it in the box on the right." \
+    --single-task "Pick the Lego block and drop it in the box on the right." \
     --robot-config lerobot/configs/robot/koch.yaml \
     --local-dir data
 ```
 
-Multi-task dataset:
+
+# 2. Single task episodes
+If your dataset is a multi-task dataset, you have two options to provide the tasks to this script:
+
+- If your dataset already contains a language instruction column in its parquet file, you can simply provide
+  this column's name with the '--tasks-col' arg.
+
+    Example:
+
+    ```bash
+    python convert_dataset_v1_to_v2.py \
+        --repo-id lerobot/stanford_kuka_multimodal_dataset \
+        --tasks-col "language_instruction" \
+        --local-dir data
+    ```
+
+- If your dataset doesn't contain a language instruction, you should provide the path to a .json file with the
+  '--tasks-path' arg. This file should have the following structure where keys correspond to each
+  episode_index in the dataset, and values are the language instruction for that episode.
+
+    Example:
+
+    ```json
+    {
+        "0": "Do something",
+        "1": "Do something else",
+        "2": "Do something",
+        "3": "Go there",
+        ...
+    }
+    ```
+
+# 3. Multi task episodes
+If you have multiple tasks per episodes, your dataset should contain a language instruction column in its
+parquet file, and you must provide this column's name with the '--tasks-col' arg.
 TODO
 """
 
@@ -39,13 +75,13 @@ import contextlib
 import json
 import math
 import subprocess
-from io import BytesIO
 from pathlib import Path
 
-import pyarrow as pa
+import datasets
 import pyarrow.compute as pc
 import pyarrow.parquet as pq
 import torch
+from datasets import Dataset
 from huggingface_hub import HfApi
 from huggingface_hub.errors import EntryNotFoundError
 from PIL import Image
@@ -123,15 +159,14 @@ def convert_stats_to_json(input_dir: Path, output_dir: Path) -> None:
         torch.testing.assert_close(stats_json[key], stats[key])
 
 
-def get_keys(table: pa.Table) -> dict[str, list]:
-    table_metadata = json.loads(table.schema.metadata[b"huggingface"].decode("utf-8"))
+def get_keys(dataset: Dataset) -> dict[str, list]:
     sequence_keys, image_keys, video_keys = [], [], []
-    for key, val in table_metadata["info"]["features"].items():
-        if val["_type"] == "Sequence":
+    for key, ft in dataset.features.items():
+        if isinstance(ft, datasets.Sequence):
             sequence_keys.append(key)
-        elif val["_type"] == "Image":
+        elif isinstance(ft, datasets.Image):
             image_keys.append(key)
-        elif val["_type"] == "VideoFrame":
+        elif ft._type == "VideoFrame":
             video_keys.append(key)
 
     return {
@@ -141,55 +176,49 @@ def get_keys(table: pa.Table) -> dict[str, list]:
     }
 
 
-def remove_hf_metadata_features(table: pa.Table, features: list[str]) -> pa.Table:
-    # HACK
-    schema = table.schema
-    # decode bytes dict
-    table_metadata = json.loads(schema.metadata[b"huggingface"].decode("utf-8"))
-    for key in features:
-        table_metadata["info"]["features"].pop(key)
+def add_task_index_by_episodes(dataset: Dataset, tasks_by_episodes: dict) -> tuple[Dataset, list[str]]:
+    df = dataset.to_pandas()
+    tasks = list(set(tasks_by_episodes.values()))
+    tasks_to_task_index = {task: task_idx for task_idx, task in enumerate(tasks)}
+    episodes_to_task_index = {ep_idx: tasks_to_task_index[task] for ep_idx, task in tasks_by_episodes.items()}
+    df["task_index"] = df["episode_index"].map(episodes_to_task_index).astype(int)
 
-    # re-encode bytes dict
-    table_metadata = {b"huggingface": json.dumps(table_metadata).encode("utf-8")}
-    new_schema = schema.with_metadata(table_metadata)
-    return table.replace_schema_metadata(new_schema.metadata)
+    features = dataset.features
+    features["task_index"] = datasets.Value(dtype="int64")
+    dataset = Dataset.from_pandas(df, features=features, split="train")
+    return dataset, tasks
 
 
-def add_hf_metadata_features(table: pa.Table, features: dict[str, dict]) -> pa.Table:
-    # HACK
-    schema = table.schema
-    # decode bytes dict
-    table_metadata = json.loads(schema.metadata[b"huggingface"].decode("utf-8"))
-    for key, val in features.items():
-        table_metadata["info"]["features"][key] = val
+def add_task_index_from_tasks_col(
+    dataset: Dataset, tasks_col: str
+) -> tuple[Dataset, dict[str, list[str]], list[str]]:
+    df = dataset.to_pandas()
 
-    # re-encode bytes dict
-    table_metadata = {b"huggingface": json.dumps(table_metadata).encode("utf-8")}
-    new_schema = schema.with_metadata(table_metadata)
-    return table.replace_schema_metadata(new_schema.metadata)
+    # HACK: This is to clean some of the instructions in our version of Open X datasets
+    prefix_to_clean = "tf.Tensor(b'"
+    suffix_to_clean = "', shape=(), dtype=string)"
+    df[tasks_col] = df[tasks_col].str.removeprefix(prefix_to_clean).str.removesuffix(suffix_to_clean)
 
+    # Create task_index col
+    tasks_by_episode = df.groupby("episode_index")[tasks_col].unique().apply(lambda x: x.tolist()).to_dict()
+    tasks = df[tasks_col].unique().tolist()
+    tasks_to_task_index = {task: idx for idx, task in enumerate(tasks)}
+    df["task_index"] = df[tasks_col].map(tasks_to_task_index).astype(int)
 
-def remove_videoframe_from_table(table: pa.Table, image_columns: list) -> pa.Table:
-    table = table.drop(image_columns)
-    table = remove_hf_metadata_features(table, image_columns)
-    return table
+    # Build the dataset back from df
+    features = dataset.features
+    features["task_index"] = datasets.Value(dtype="int64")
+    dataset = Dataset.from_pandas(df, features=features, split="train")
+    dataset = dataset.remove_columns(tasks_col)
 
-
-def add_tasks(table: pa.Table, tasks_by_episodes: dict) -> pa.Table:
-    tasks_index = pa.array([tasks_by_episodes.get(key.as_py(), None) for key in table["episode_index"]])
-    table = table.append_column("task_index", tasks_index)
-    hf_feature = {"task_index": {"dtype": "int64", "_type": "Value"}}
-    table = add_hf_metadata_features(table, hf_feature)
-    return table
+    return dataset, tasks, tasks_by_episode
 
 
 def split_parquet_by_episodes(
-    table: pa.Table, keys: dict[str, list], total_episodes: int, episode_indices: list, output_dir: Path
+    dataset: Dataset, keys: dict[str, list], total_episodes: int, episode_indices: list, output_dir: Path
 ) -> list:
     (output_dir / "data").mkdir(exist_ok=True, parents=True)
-    if len(keys["video"]) > 0:
-        table = remove_videoframe_from_table(table, keys["video"])
-
+    table = dataset.remove_columns(keys["video"])._data.table
     episode_lengths = []
     for episode_index in sorted(episode_indices):
         # Write each episode_index to a new parquet file
@@ -330,11 +359,10 @@ def get_video_shapes(videos_info: dict, video_keys: list) -> dict:
     return video_shapes
 
 
-def get_image_shapes(table: pa.Table, image_keys: list) -> dict:
+def get_image_shapes(dataset: Dataset, image_keys: list) -> dict:
     image_shapes = {}
     for img_key in image_keys:
-        image_bytes = table[img_key][0].as_py()  # Assuming first row
-        image = Image.open(BytesIO(image_bytes["bytes"]))
+        image = dataset[0][img_key]  # Assuming first row
         channels = get_image_pixel_channels(image)
         image_shapes[img_key] = {
             "width": image.width,
@@ -352,8 +380,9 @@ def get_generic_motor_names(sequence_shapes: dict) -> dict:
 def convert_dataset(
     repo_id: str,
     local_dir: Path,
-    tasks: dict,
-    tasks_by_episodes: dict | None = None,
+    single_task: str | None = None,
+    tasks_path: Path | None = None,
+    tasks_col: Path | None = None,
     robot_config: dict | None = None,
 ):
     v1_6_dir = local_dir / V1_6 / repo_id
@@ -367,29 +396,40 @@ def convert_dataset(
     )
 
     metadata_v1_6 = load_json(v1_6_dir / "meta_data" / "info.json")
-
-    table = pq.read_table(v1_6_dir / "data")
-    keys = get_keys(table)
+    dataset = datasets.load_dataset("parquet", data_dir=v1_6_dir / "data", split="train")
+    keys = get_keys(dataset)
 
     # Episodes
-    episode_indices = sorted(table["episode_index"].unique().to_pylist())
+    episode_indices = sorted(dataset.unique("episode_index"))
     total_episodes = len(episode_indices)
     assert episode_indices == list(range(total_episodes))
 
     # Tasks
-    if tasks_by_episodes is None:  # Single task dataset
-        tasks_by_episodes = {ep_idx: 0 for ep_idx in episode_indices}
+    if single_task:
+        tasks_by_episodes = {ep_idx: single_task for ep_idx in episode_indices}
+        dataset, tasks = add_task_index_by_episodes(dataset, tasks_by_episodes)
+        tasks_by_episodes = {ep_idx: [task] for ep_idx, task in tasks_by_episodes.items()}
+    elif tasks_path:
+        tasks_by_episodes = load_json(tasks_path)
+        tasks_by_episodes = {int(ep_idx): task for ep_idx, task in tasks_by_episodes.items()}
+        # tasks = list(set(tasks_by_episodes.values()))
+        dataset, tasks = add_task_index_by_episodes(dataset, tasks_by_episodes)
+        tasks_by_episodes = {ep_idx: [task] for ep_idx, task in tasks_by_episodes.items()}
+    elif tasks_col:
+        dataset, tasks, tasks_by_episodes = add_task_index_from_tasks_col(dataset, tasks_col)
+    else:
+        raise ValueError
 
-    assert set(tasks) == set(tasks_by_episodes.values())
-    table = add_tasks(table, tasks_by_episodes)
-    write_json(tasks, v2_0_dir / "meta" / "tasks.json")
+    assert set(tasks) == {task for ep_tasks in tasks_by_episodes.values() for task in ep_tasks}
+    task_json = [{"task_index": task_idx, "task": task} for task_idx, task in enumerate(tasks)]
+    write_json(task_json, v2_0_dir / "meta" / "tasks.json")
 
     # Split data into 1 parquet file by episode
-    episode_lengths = split_parquet_by_episodes(table, keys, total_episodes, episode_indices, v2_0_dir)
+    episode_lengths = split_parquet_by_episodes(dataset, keys, total_episodes, episode_indices, v2_0_dir)
 
     # Shapes
-    sequence_shapes = {key: len(table[key][0]) for key in keys["sequence"]}
-    image_shapes = get_image_shapes(table, keys["image"]) if len(keys["image"]) > 0 else {}
+    sequence_shapes = {key: len(dataset[key][0]) for key in keys["sequence"]}
+    image_shapes = get_image_shapes(dataset, keys["image"]) if len(keys["image"]) > 0 else {}
     if len(keys["video"]) > 0:
         assert metadata_v1_6.get("video", False)
         videos_info = get_videos_info(repo_id, v1_6_dir, video_keys=keys["video"])
@@ -416,11 +456,12 @@ def convert_dataset(
     for key in sequence_shapes:
         assert len(names[key]) == sequence_shapes[key]
 
-    # Episodes info
+    # Episodes
     episodes = [
-        {"index": ep_idx, "task": tasks_by_episodes[ep_idx], "length": episode_lengths[ep_idx]}
+        {"episode_index": ep_idx, "tasks": [tasks_by_episodes[ep_idx]], "length": episode_lengths[ep_idx]}
         for ep_idx in episode_indices
     ]
+    write_json(episodes, v2_0_dir / "meta" / "episodes.json")
 
     # Assemble metadata v2.0
     metadata_v2_0 = {
@@ -437,11 +478,17 @@ def convert_dataset(
         "shapes": {**sequence_shapes, **video_shapes, **image_shapes},
         "names": names,
         "videos": videos_info,
-        "episodes": episodes,
     }
     write_json(metadata_v2_0, v2_0_dir / "meta" / "info.json")
     convert_stats_to_json(v1_6_dir / "meta_data", v2_0_dir / "meta")
 
+    #### TODO: delete
+    repo_id = f"aliberts/{repo_id.split('/')[1]}"
+    # if hub_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
+    #     hub_api.delete_repo(repo_id=repo_id, repo_type="dataset")
+    hub_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
+    ####
+
     with contextlib.suppress(EntryNotFoundError):
         hub_api.delete_folder(repo_id=repo_id, path_in_repo="data", repo_type="dataset", revision="main")
 
@@ -455,6 +502,13 @@ def convert_dataset(
         repo_type="dataset",
         revision="main",
     )
+    hub_api.upload_folder(
+        repo_id=repo_id,
+        path_in_repo="videos",
+        folder_path=v1_6_dir / "videos",
+        repo_type="dataset",
+        revision="main",
+    )
     hub_api.upload_folder(
         repo_id=repo_id,
         path_in_repo="meta",
@@ -463,7 +517,6 @@ def convert_dataset(
         revision="main",
     )
 
-    metadata_v2_0.pop("episodes")
     card_text = f"[meta/info.json](meta/info.json)\n```json\n{json.dumps(metadata_v2_0, indent=4)}\n```"
     push_dataset_card_to_hub(repo_id=repo_id, revision="main", tags=repo_tags, text=card_text)
     create_branch(repo_id=repo_id, branch=V2_0, repo_type="dataset")
@@ -478,12 +531,13 @@ def convert_dataset(
     # - [X] Add robot_type
     # - [X] Add splits
     # - [X] Push properly to branch v2.0 and delete v1.6 stuff from that branch
+    # - [X] Handle multitask datasets
     # - [/] Add sanity checks (encoding, shapes)
-    # - [ ] Handle multitask datasets
 
 
 def main():
     parser = argparse.ArgumentParser()
+    task_args = parser.add_mutually_exclusive_group(required=True)
 
     parser.add_argument(
         "--repo-id",
@@ -491,11 +545,20 @@ def main():
         required=True,
         help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset (e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).",
     )
-    parser.add_argument(
-        "--task",
+    task_args.add_argument(
+        "--single-task",
         type=str,
-        required=True,
-        help="A short but accurate description of the task performed in the dataset.",
+        help="A short but accurate description of the single task performed in the dataset.",
+    )
+    task_args.add_argument(
+        "--tasks-col",
+        type=str,
+        help="The name of the column containing language instructions",
+    )
+    task_args.add_argument(
+        "--tasks-path",
+        type=Path,
+        help="The path to a .json file containing one language instruction for each episode_index",
     )
     parser.add_argument(
         "--robot-config",
@@ -517,19 +580,13 @@ def main():
     )
 
     args = parser.parse_args()
-    if args.local_dir is None:
+    if not args.local_dir:
         args.local_dir = Path(f"/tmp/{args.repo_id}")
 
-    tasks = {0: args.task}
-    del args.task
-
-    if args.robot_config is not None:
-        robot_config = parse_robot_config(args.robot_config, args.robot_overrides)
-    else:
-        robot_config = None
+    robot_config = parse_robot_config(args.robot_config, args.robot_overrides) if args.robot_config else None
     del args.robot_config, args.robot_overrides
 
-    convert_dataset(**vars(args), tasks=tasks, robot_config=robot_config)
+    convert_dataset(**vars(args), robot_config=robot_config)
 
 
 if __name__ == "__main__":

From cbc51e13417a8f6d1c5de4ed26e3f26ebceaf90f Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Mon, 14 Oct 2024 10:14:27 +0200
Subject: [PATCH 13/59] Extend v1 compatibility

---
 convert_dataset_v1_to_v2.py | 68 +++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/convert_dataset_v1_to_v2.py b/convert_dataset_v1_to_v2.py
index 9343c898..79749667 100644
--- a/convert_dataset_v1_to_v2.py
+++ b/convert_dataset_v1_to_v2.py
@@ -12,7 +12,7 @@ We support 3 different scenarios for these tasks:
 
 # 1. Single task dataset
 If your dataset contains a single task, you can simply provide it directly via the CLI with the
-'--single-task' option (see examples below).
+'--single-task' option.
 
 Examples:
 
@@ -67,7 +67,15 @@ If your dataset is a multi-task dataset, you have two options to provide the tas
 # 3. Multi task episodes
 If you have multiple tasks per episodes, your dataset should contain a language instruction column in its
 parquet file, and you must provide this column's name with the '--tasks-col' arg.
-TODO
+
+Example:
+
+```bash
+python convert_dataset_v1_to_v2.py \
+    --repo-id lerobot/stanford_kuka_multimodal_dataset \
+    --tasks-col "language_instruction" \
+    --local-dir data
+```
 """
 
 import argparse
@@ -87,12 +95,12 @@ from huggingface_hub.errors import EntryNotFoundError
 from PIL import Image
 from safetensors.torch import load_file
 
-from lerobot.common.datasets.utils import create_branch, flatten_dict, unflatten_dict
+from lerobot.common.datasets.utils import create_branch, flatten_dict, get_hub_safe_version, unflatten_dict
 from lerobot.common.utils.utils import init_hydra_config
 from lerobot.scripts.push_dataset_to_hub import push_dataset_card_to_hub
 
-V1_6 = "v1.6"
-V2_0 = "v2.0"
+V16 = "v1.6"
+V20 = "v2.0"
 
 PARQUET_PATH = "data/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
 VIDEO_PATH = "videos/{video_key}_episode_{episode_index:06d}.mp4"
@@ -385,18 +393,19 @@ def convert_dataset(
     tasks_col: Path | None = None,
     robot_config: dict | None = None,
 ):
-    v1_6_dir = local_dir / V1_6 / repo_id
-    v2_0_dir = local_dir / V2_0 / repo_id
-    v1_6_dir.mkdir(parents=True, exist_ok=True)
-    v2_0_dir.mkdir(parents=True, exist_ok=True)
+    v1 = get_hub_safe_version(repo_id, V16)
+    v1x_dir = local_dir / v1 / repo_id
+    v20_dir = local_dir / V20 / repo_id
+    v1x_dir.mkdir(parents=True, exist_ok=True)
+    v20_dir.mkdir(parents=True, exist_ok=True)
 
     hub_api = HfApi()
     hub_api.snapshot_download(
-        repo_id=repo_id, repo_type="dataset", revision=V1_6, local_dir=v1_6_dir, ignore_patterns="videos/"
+        repo_id=repo_id, repo_type="dataset", revision=v1, local_dir=v1x_dir, ignore_patterns="videos/"
     )
 
-    metadata_v1_6 = load_json(v1_6_dir / "meta_data" / "info.json")
-    dataset = datasets.load_dataset("parquet", data_dir=v1_6_dir / "data", split="train")
+    metadata_v1 = load_json(v1x_dir / "meta_data" / "info.json")
+    dataset = datasets.load_dataset("parquet", data_dir=v1x_dir / "data", split="train")
     keys = get_keys(dataset)
 
     # Episodes
@@ -422,21 +431,22 @@ def convert_dataset(
 
     assert set(tasks) == {task for ep_tasks in tasks_by_episodes.values() for task in ep_tasks}
     task_json = [{"task_index": task_idx, "task": task} for task_idx, task in enumerate(tasks)]
-    write_json(task_json, v2_0_dir / "meta" / "tasks.json")
+    write_json(task_json, v20_dir / "meta" / "tasks.json")
 
     # Split data into 1 parquet file by episode
-    episode_lengths = split_parquet_by_episodes(dataset, keys, total_episodes, episode_indices, v2_0_dir)
+    episode_lengths = split_parquet_by_episodes(dataset, keys, total_episodes, episode_indices, v20_dir)
 
     # Shapes
     sequence_shapes = {key: len(dataset[key][0]) for key in keys["sequence"]}
     image_shapes = get_image_shapes(dataset, keys["image"]) if len(keys["image"]) > 0 else {}
     if len(keys["video"]) > 0:
-        assert metadata_v1_6.get("video", False)
-        videos_info = get_videos_info(repo_id, v1_6_dir, video_keys=keys["video"])
+        assert metadata_v1.get("video", False)
+        videos_info = get_videos_info(repo_id, v1x_dir, video_keys=keys["video"])
         video_shapes = get_video_shapes(videos_info, keys["video"])
         for img_key in keys["video"]:
-            assert videos_info[img_key]["video.pix_fmt"] == metadata_v1_6["encoding"]["pix_fmt"]
-            assert math.isclose(videos_info[img_key]["video.fps"], metadata_v1_6["fps"], rel_tol=1e-3)
+            assert math.isclose(videos_info[img_key]["video.fps"], metadata_v1["fps"], rel_tol=1e-3)
+            if "encoding" in metadata_v1:
+                assert videos_info[img_key]["video.pix_fmt"] == metadata_v1["encoding"]["pix_fmt"]
     else:
         assert len(keys["video"]) == 0
         videos_info = None
@@ -461,16 +471,16 @@ def convert_dataset(
         {"episode_index": ep_idx, "tasks": [tasks_by_episodes[ep_idx]], "length": episode_lengths[ep_idx]}
         for ep_idx in episode_indices
     ]
-    write_json(episodes, v2_0_dir / "meta" / "episodes.json")
+    write_json(episodes, v20_dir / "meta" / "episodes.json")
 
     # Assemble metadata v2.0
     metadata_v2_0 = {
-        "codebase_version": V2_0,
+        "codebase_version": V20,
         "data_path": PARQUET_PATH,
         "robot_type": robot_type,
         "total_episodes": total_episodes,
         "total_tasks": len(tasks),
-        "fps": metadata_v1_6["fps"],
+        "fps": metadata_v1["fps"],
         "splits": {"train": f"0:{total_episodes}"},
         "keys": keys["sequence"],
         "video_keys": keys["video"],
@@ -479,14 +489,14 @@ def convert_dataset(
         "names": names,
         "videos": videos_info,
     }
-    write_json(metadata_v2_0, v2_0_dir / "meta" / "info.json")
-    convert_stats_to_json(v1_6_dir / "meta_data", v2_0_dir / "meta")
+    write_json(metadata_v2_0, v20_dir / "meta" / "info.json")
+    convert_stats_to_json(v1x_dir / "meta_data", v20_dir / "meta")
 
     #### TODO: delete
-    repo_id = f"aliberts/{repo_id.split('/')[1]}"
+    # repo_id = f"aliberts/{repo_id.split('/')[1]}"
     # if hub_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
     #     hub_api.delete_repo(repo_id=repo_id, repo_type="dataset")
-    hub_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
+    # hub_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
     ####
 
     with contextlib.suppress(EntryNotFoundError):
@@ -498,28 +508,28 @@ def convert_dataset(
     hub_api.upload_folder(
         repo_id=repo_id,
         path_in_repo="data",
-        folder_path=v2_0_dir / "data",
+        folder_path=v20_dir / "data",
         repo_type="dataset",
         revision="main",
     )
     hub_api.upload_folder(
         repo_id=repo_id,
         path_in_repo="videos",
-        folder_path=v1_6_dir / "videos",
+        folder_path=v1x_dir / "videos",
         repo_type="dataset",
         revision="main",
     )
     hub_api.upload_folder(
         repo_id=repo_id,
         path_in_repo="meta",
-        folder_path=v2_0_dir / "meta",
+        folder_path=v20_dir / "meta",
         repo_type="dataset",
         revision="main",
     )
 
     card_text = f"[meta/info.json](meta/info.json)\n```json\n{json.dumps(metadata_v2_0, indent=4)}\n```"
     push_dataset_card_to_hub(repo_id=repo_id, revision="main", tags=repo_tags, text=card_text)
-    create_branch(repo_id=repo_id, branch=V2_0, repo_type="dataset")
+    create_branch(repo_id=repo_id, branch=V20, repo_type="dataset")
 
     # TODO:
     # - [X] Add shapes

From f96773de1032684db4c3b8e7c0f5b1ff162c6449 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Mon, 14 Oct 2024 13:51:40 +0200
Subject: [PATCH 14/59] Fix safe_version

---
 convert_dataset_v1_to_v2.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/convert_dataset_v1_to_v2.py b/convert_dataset_v1_to_v2.py
index 79749667..ede8905f 100644
--- a/convert_dataset_v1_to_v2.py
+++ b/convert_dataset_v1_to_v2.py
@@ -393,8 +393,8 @@ def convert_dataset(
     tasks_col: Path | None = None,
     robot_config: dict | None = None,
 ):
-    v1 = get_hub_safe_version(repo_id, V16)
-    v1x_dir = local_dir / v1 / repo_id
+    v1 = get_hub_safe_version(repo_id, V16, enforce_v2=False)
+    v1x_dir = local_dir / V16 / repo_id
     v20_dir = local_dir / V20 / repo_id
     v1x_dir.mkdir(parents=True, exist_ok=True)
     v20_dir.mkdir(parents=True, exist_ok=True)
@@ -493,10 +493,10 @@ def convert_dataset(
     convert_stats_to_json(v1x_dir / "meta_data", v20_dir / "meta")
 
     #### TODO: delete
-    # repo_id = f"aliberts/{repo_id.split('/')[1]}"
+    repo_id = f"aliberts/{repo_id.split('/')[1]}"
     # if hub_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
     #     hub_api.delete_repo(repo_id=repo_id, repo_type="dataset")
-    # hub_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
+    hub_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
     ####
 
     with contextlib.suppress(EntryNotFoundError):

From 835ab5a81b360caecd35db34693b08cc8693b09d Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 15 Oct 2024 11:05:16 +0200
Subject: [PATCH 15/59] Cleanup, fix load_tasks

---
 lerobot/common/datasets/utils.py              | 11 +++++---
 .../datasets/v2/convert_dataset_v1_to_v2.py   | 26 +++++++++----------
 2 files changed, 20 insertions(+), 17 deletions(-)
 rename convert_dataset_v1_to_v2.py => lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py (96%)

diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index b20b63fe..ae8fa001 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -80,6 +80,7 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
         if isinstance(first_item, PILImage.Image):
             to_tensor = transforms.ToTensor()
             items_dict[key] = [to_tensor(img) for img in items_dict[key]]
+        # TODO(aliberts): remove this part as we'll be using task_index
         elif isinstance(first_item, str):
             # TODO (michel-aractingi): add str2embedding via language tokenizer
             # For now we leave this part up to the user to choose how to address
@@ -96,13 +97,13 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
 
 
 @cache
-def get_hub_safe_version(repo_id: str, version: str) -> str:
+def get_hub_safe_version(repo_id: str, version: str, enforce_v2: bool = True) -> str:
     num_version = float(version.strip("v"))
-    if num_version < 2:
+    if num_version < 2 and enforce_v2:
         raise ValueError(
             f"""The dataset you requested ({repo_id}) is in {version} format. We introduced a new
             format with v2.0 that is not backward compatible. Please use our conversion script
-            first (convert_dataset_16_to_20.py) to convert your dataset to this new format."""
+            first (convert_dataset_v1_to_v2.py) to convert your dataset to this new format."""
         )
     api = HfApi()
     dataset_info = api.list_repo_refs(repo_id, repo_type="dataset")
@@ -192,7 +193,9 @@ def load_tasks(repo_id: str, version: str, local_dir: Path) -> dict:
         repo_id, filename="meta/tasks.json", local_dir=local_dir, repo_type="dataset", revision=version
     )
     with open(fpath) as f:
-        return json.load(f)
+        tasks = json.load(f)
+
+    return {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])}
 
 
 def get_episode_data_index(episodes: list, episode_dicts: list[dict]) -> dict[str, torch.Tensor]:
diff --git a/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
similarity index 96%
rename from convert_dataset_v1_to_v2.py
rename to lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index ede8905f..cecab0df 100644
--- a/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -3,13 +3,18 @@ This script will help you convert any LeRobot dataset already pushed to the hub
 2.0. You will be required to provide the 'tasks', which is a short but accurate description in plain English
 for each of the task performed in the dataset. This will allow to easily train models with task-conditionning.
 
-We support 3 different scenarios for these tasks:
+We support 3 different scenarios for these tasks (see instructions below):
     1. Single task dataset: all episodes of your dataset have the same single task.
     2. Single task episodes: the episodes of your dataset each contain a single task but they can differ from
       one episode to the next.
     3. Multi task episodes: episodes of your dataset may each contain several different tasks.
 
 
+Can you can also provide a robot config .yaml file (not mandatory) to this script via the option
+'--robot-config' so that it writes information about the robot (robot type, motors names) this dataset was
+recorded with. For now, only Aloha/Koch type robots are supported with this option.
+
+
 # 1. Single task dataset
 If your dataset contains a single task, you can simply provide it directly via the CLI with the
 '--single-task' option.
@@ -17,7 +22,7 @@ If your dataset contains a single task, you can simply provide it directly via t
 Examples:
 
 ```bash
-python convert_dataset_v1_to_v2.py \
+python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
     --repo-id lerobot/aloha_sim_insertion_human_image \
     --single-task "Insert the peg into the socket." \
     --robot-config lerobot/configs/robot/aloha.yaml \
@@ -25,7 +30,7 @@ python convert_dataset_v1_to_v2.py \
 ```
 
 ```bash
-python convert_dataset_v1_to_v2.py \
+python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
     --repo-id aliberts/koch_tutorial \
     --single-task "Pick the Lego block and drop it in the box on the right." \
     --robot-config lerobot/configs/robot/koch.yaml \
@@ -42,7 +47,7 @@ If your dataset is a multi-task dataset, you have two options to provide the tas
     Example:
 
     ```bash
-    python convert_dataset_v1_to_v2.py \
+    python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
         --repo-id lerobot/stanford_kuka_multimodal_dataset \
         --tasks-col "language_instruction" \
         --local-dir data
@@ -71,7 +76,7 @@ parquet file, and you must provide this column's name with the '--tasks-col' arg
 Example:
 
 ```bash
-python convert_dataset_v1_to_v2.py \
+python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
     --repo-id lerobot/stanford_kuka_multimodal_dataset \
     --tasks-col "language_instruction" \
     --local-dir data
@@ -321,6 +326,7 @@ def get_videos_info(repo_id: str, local_dir: Path, video_keys: list[str]) -> dic
     hub_api = HfApi()
     videos_info_dict = {"videos_path": VIDEO_PATH}
     for vid_key in video_keys:
+        # Assumes first episode
         video_path = VIDEO_PATH.format(video_key=vid_key, episode_index=0)
         video_path = hub_api.hf_hub_download(
             repo_id=repo_id, repo_type="dataset", local_dir=local_dir, filename=video_path
@@ -437,7 +443,7 @@ def convert_dataset(
     episode_lengths = split_parquet_by_episodes(dataset, keys, total_episodes, episode_indices, v20_dir)
 
     # Shapes
-    sequence_shapes = {key: len(dataset[key][0]) for key in keys["sequence"]}
+    sequence_shapes = {key: dataset.features[key].length for key in keys["sequence"]}
     image_shapes = get_image_shapes(dataset, keys["image"]) if len(keys["image"]) > 0 else {}
     if len(keys["video"]) > 0:
         assert metadata_v1.get("video", False)
@@ -479,6 +485,7 @@ def convert_dataset(
         "data_path": PARQUET_PATH,
         "robot_type": robot_type,
         "total_episodes": total_episodes,
+        "total_frames": len(dataset),
         "total_tasks": len(tasks),
         "fps": metadata_v1["fps"],
         "splits": {"train": f"0:{total_episodes}"},
@@ -512,13 +519,6 @@ def convert_dataset(
         repo_type="dataset",
         revision="main",
     )
-    hub_api.upload_folder(
-        repo_id=repo_id,
-        path_in_repo="videos",
-        folder_path=v1x_dir / "videos",
-        repo_type="dataset",
-        revision="main",
-    )
     hub_api.upload_folder(
         repo_id=repo_id,
         path_in_repo="meta",

From da78bbfd162d5d603e093521033a85424ec37350 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 15 Oct 2024 11:06:28 +0200
Subject: [PATCH 16/59] Update load_tasks doc

---
 lerobot/common/datasets/utils.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index ae8fa001..fbf4dd5f 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -180,15 +180,7 @@ def load_info(repo_id: str, version: str, local_dir: Path) -> dict:
 
 
 def load_tasks(repo_id: str, version: str, local_dir: Path) -> dict:
-    """tasks contains all the tasks of the dataset, indexed by their task_index.
-
-    Example:
-    ```json
-    {
-        "0": "Pick the Lego block and drop it in the box on the right."
-    }
-    ```
-    """
+    """tasks contains all the tasks of the dataset, indexed by their task_index."""
     fpath = hf_hub_download(
         repo_id, filename="meta/tasks.json", local_dir=local_dir, repo_type="dataset", revision=version
     )

From 9433ac52ec3c23d448741a8ff6801779faadf8e6 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 15 Oct 2024 13:08:31 +0200
Subject: [PATCH 17/59] WIP add batch convert

---
 lerobot/__init__.py                           |   4 +-
 .../v2/batch_convert_dataset_v1_to_v2.py      | 144 ++++++++++++++++++
 2 files changed, 146 insertions(+), 2 deletions(-)
 create mode 100644 lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py

diff --git a/lerobot/__init__.py b/lerobot/__init__.py
index 851383dd..7b3f3cd4 100644
--- a/lerobot/__init__.py
+++ b/lerobot/__init__.py
@@ -181,8 +181,8 @@ available_real_world_datasets = [
     "lerobot/usc_cloth_sim",
 ]
 
-available_datasets = list(
-    itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets)
+available_datasets = sorted(
+    set(itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets))
 )
 
 # lists all available policies from `lerobot/common/policies`
diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
new file mode 100644
index 00000000..a5c5440f
--- /dev/null
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -0,0 +1,144 @@
+from pprint import pprint
+
+from lerobot import available_datasets
+
+# from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset
+
+pprint(available_datasets)
+
+for repo_id in available_datasets:
+    name = repo_id.split("/")[1]
+    if "aloha" in name:
+        if "insertion" in name:
+            single_task = "Insert the peg into the socket."
+        elif "transfer" in name:
+            single_task = "Pick up the cube with the right arm and transfer it to the left arm."
+        elif "battery" in name:
+            single_task = "Place the battery into the slot of the remote controller."
+        elif "candy" in name:
+            single_task = "Pick up the candy and unwrap it."
+        elif "coffee_new" in name:
+            single_task = "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons."
+        elif "coffee" in name:
+            single_task = "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray."
+        elif "cups_open" in name:
+            single_task = "Pick up the plastic cup and open its lid."
+        elif "fork_pick_up" in name:
+            single_task = "Pick up the fork and place it on the plate."
+        elif "pingpong_test" in name:
+            single_task = "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass."
+        elif "pro_pencil" in name:
+            single_task = "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table."
+        elif "screw_driver" in name:
+            single_task = "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup."
+        elif "tape" in name:
+            single_task = (
+                "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge."
+            )
+        elif "towel" in name:
+            single_task = "Pick up a piece of paper towel and place it on the spilled liquid."
+        elif "vinh_cup_left" in name:
+            single_task = "Pick up the platic cup with the right arm, then pop its lid open with the left arm"
+        elif "thread_velcro" in name:
+            single_task = "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm."
+        elif "shrimp" in name:
+            single_task = "Sauté the raw shrimp on both sides, then serve it in the bowl."
+        elif "wash_pan" in name:
+            single_task = ""
+
+
+# datasets = [
+#     'lerobot/aloha_mobile_cabinet',
+#     'lerobot/aloha_mobile_chair',
+#     'lerobot/aloha_mobile_elevator',
+#     'lerobot/aloha_mobile_shrimp',
+#     'lerobot/aloha_mobile_wash_pan',
+#     'lerobot/aloha_mobile_wipe_wine',
+#     'lerobot/aloha_sim_insertion_human',
+#     'lerobot/aloha_sim_insertion_human_image',
+#     'lerobot/aloha_sim_insertion_scripted',
+#     'lerobot/aloha_sim_insertion_scripted_image',
+#     'lerobot/aloha_sim_transfer_cube_human',
+#     'lerobot/aloha_sim_transfer_cube_human_image',
+#     'lerobot/aloha_sim_transfer_cube_scripted',
+#     'lerobot/aloha_sim_transfer_cube_scripted_image',
+#     'lerobot/aloha_static_battery',
+#     'lerobot/aloha_static_candy',
+#     'lerobot/aloha_static_coffee',
+#     'lerobot/aloha_static_coffee_new',
+#     'lerobot/aloha_static_cups_open',
+#     'lerobot/aloha_static_fork_pick_up',
+#     'lerobot/aloha_static_pingpong_test',
+#     'lerobot/aloha_static_pro_pencil',
+#     'lerobot/aloha_static_screw_driver',
+#     'lerobot/aloha_static_tape',
+#     'lerobot/aloha_static_thread_velcro',
+#     'lerobot/aloha_static_towel',
+#     'lerobot/aloha_static_vinh_cup',
+#     'lerobot/aloha_static_vinh_cup_left',
+#     'lerobot/aloha_static_ziploc_slide',
+#     'lerobot/asu_table_top',
+#     'lerobot/austin_buds_dataset',
+#     'lerobot/austin_sailor_dataset',
+#     'lerobot/austin_sirius_dataset',
+#     'lerobot/berkeley_autolab_ur5',
+#     'lerobot/berkeley_cable_routing',
+#     'lerobot/berkeley_fanuc_manipulation',
+#     'lerobot/berkeley_gnm_cory_hall',
+#     'lerobot/berkeley_gnm_recon',
+#     'lerobot/berkeley_gnm_sac_son',
+#     'lerobot/berkeley_mvp',
+#     'lerobot/berkeley_rpt',
+#     'lerobot/cmu_franka_exploration_dataset',
+#     'lerobot/cmu_play_fusion',
+#     'lerobot/cmu_stretch',
+#     'lerobot/columbia_cairlab_pusht_real',
+#     'lerobot/conq_hose_manipulation',
+#     'lerobot/dlr_edan_shared_control',
+#     'lerobot/dlr_sara_grid_clamp',
+#     'lerobot/dlr_sara_pour',
+#     'lerobot/droid_100',
+#     'lerobot/fmb',
+#     'lerobot/iamlab_cmu_pickup_insert',
+#     'lerobot/imperialcollege_sawyer_wrist_cam',
+#     'lerobot/jaco_play',
+#     'lerobot/kaist_nonprehensile',
+#     'lerobot/nyu_door_opening_surprising_effectiveness',
+#     'lerobot/nyu_franka_play_dataset',
+#     'lerobot/nyu_rot_dataset',
+#     'lerobot/pusht',
+#     'lerobot/pusht_image',
+#     'lerobot/roboturk',
+#     'lerobot/stanford_hydra_dataset',
+#     'lerobot/stanford_kuka_multimodal_dataset',
+#     'lerobot/stanford_robocook',
+#     'lerobot/taco_play',
+#     'lerobot/tokyo_u_lsmo',
+#     'lerobot/toto',
+#     'lerobot/ucsd_kitchen_dataset',
+#     'lerobot/ucsd_pick_and_place_dataset',
+#     'lerobot/uiuc_d3field',
+#     'lerobot/umi_cup_in_the_wild',
+#     'lerobot/unitreeh1_fold_clothes',
+#     'lerobot/unitreeh1_rearrange_objects',
+#     'lerobot/unitreeh1_two_robot_greeting',
+#     'lerobot/unitreeh1_warehouse',
+#     'lerobot/usc_cloth_sim',
+#     'lerobot/utaustin_mutex',
+#     'lerobot/utokyo_pr2_opening_fridge',
+#     'lerobot/utokyo_pr2_tabletop_manipulation',
+#     'lerobot/utokyo_saytap',
+#     'lerobot/utokyo_xarm_bimanual',
+#     'lerobot/utokyo_xarm_pick_and_place',
+#     'lerobot/viola',
+#     'lerobot/xarm_lift_medium',
+#     'lerobot/xarm_lift_medium_image',
+#     'lerobot/xarm_lift_medium_replay',
+#     'lerobot/xarm_lift_medium_replay_image',
+#     'lerobot/xarm_push_medium',
+#     'lerobot/xarm_push_medium_image',
+#     'lerobot/xarm_push_medium_replay',
+#     'lerobot/xarm_push_medium_replay_image',
+# ]
+
+# convert_dataset(repo_id=repo_id)

From 110264000f74b7b44e0040873100a178f373af7f Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 15 Oct 2024 19:03:11 +0200
Subject: [PATCH 18/59] Add fixes for batch convert

---
 .../v2/batch_convert_dataset_v1_to_v2.py      | 291 +++++++++---------
 .../datasets/v2/convert_dataset_v1_to_v2.py   |  17 +-
 2 files changed, 169 insertions(+), 139 deletions(-)

diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
index a5c5440f..dcb949c2 100644
--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -1,144 +1,161 @@
-from pprint import pprint
+# 'lerobot/aloha_mobile_cabinet',
+# 'lerobot/aloha_mobile_chair',
+# 'lerobot/aloha_mobile_elevator',
+# 'lerobot/aloha_mobile_shrimp',
+# 'lerobot/aloha_mobile_wash_pan',
+# 'lerobot/aloha_mobile_wipe_wine',
+# 'lerobot/aloha_sim_insertion_human',
+# 'lerobot/aloha_sim_insertion_human_image',
+# 'lerobot/aloha_sim_insertion_scripted',
+# 'lerobot/aloha_sim_insertion_scripted_image',
+# 'lerobot/aloha_sim_transfer_cube_human',
+# 'lerobot/aloha_sim_transfer_cube_human_image',
+# 'lerobot/aloha_sim_transfer_cube_scripted',
+# 'lerobot/aloha_sim_transfer_cube_scripted_image',
+# 'lerobot/aloha_static_battery',
+# 'lerobot/aloha_static_candy',
+# 'lerobot/aloha_static_coffee',
+# 'lerobot/aloha_static_coffee_new',
+# 'lerobot/aloha_static_cups_open',
+# 'lerobot/aloha_static_fork_pick_up',
+# 'lerobot/aloha_static_pingpong_test',
+# 'lerobot/aloha_static_pro_pencil',
+# 'lerobot/aloha_static_screw_driver',
+# 'lerobot/aloha_static_tape',
+# 'lerobot/aloha_static_thread_velcro',
+# 'lerobot/aloha_static_towel',
+# 'lerobot/aloha_static_vinh_cup',
+# 'lerobot/aloha_static_vinh_cup_left',
+# 'lerobot/aloha_static_ziploc_slide',
+# 'lerobot/asu_table_top',
+# 'lerobot/austin_buds_dataset',
+# 'lerobot/austin_sailor_dataset',
+# 'lerobot/austin_sirius_dataset',
+# 'lerobot/berkeley_autolab_ur5',
+# 'lerobot/berkeley_cable_routing',
+# 'lerobot/berkeley_fanuc_manipulation',
+# 'lerobot/berkeley_gnm_cory_hall',
+# 'lerobot/berkeley_gnm_recon',
+# 'lerobot/berkeley_gnm_sac_son',
+# 'lerobot/berkeley_mvp',
+# 'lerobot/berkeley_rpt',
+# 'lerobot/cmu_franka_exploration_dataset',
+# 'lerobot/cmu_play_fusion',
+# 'lerobot/cmu_stretch',
+# 'lerobot/columbia_cairlab_pusht_real',
+# 'lerobot/conq_hose_manipulation',
+# 'lerobot/dlr_edan_shared_control',
+# 'lerobot/dlr_sara_grid_clamp',
+# 'lerobot/dlr_sara_pour',
+# 'lerobot/droid_100',
+# 'lerobot/fmb',
+# 'lerobot/iamlab_cmu_pickup_insert',
+# 'lerobot/imperialcollege_sawyer_wrist_cam',
+# 'lerobot/jaco_play',
+# 'lerobot/kaist_nonprehensile',
+# 'lerobot/nyu_door_opening_surprising_effectiveness',
+# 'lerobot/nyu_franka_play_dataset',
+# 'lerobot/nyu_rot_dataset',
+# 'lerobot/pusht',
+# 'lerobot/pusht_image',
+# 'lerobot/roboturk',
+# 'lerobot/stanford_hydra_dataset',
+# 'lerobot/stanford_kuka_multimodal_dataset',
+# 'lerobot/stanford_robocook',
+# 'lerobot/taco_play',
+# 'lerobot/tokyo_u_lsmo',
+# 'lerobot/toto',
+# 'lerobot/ucsd_kitchen_dataset',
+# 'lerobot/ucsd_pick_and_place_dataset',
+# 'lerobot/uiuc_d3field',
+# 'lerobot/umi_cup_in_the_wild',
+# 'lerobot/unitreeh1_fold_clothes',
+# 'lerobot/unitreeh1_rearrange_objects',
+# 'lerobot/unitreeh1_two_robot_greeting',
+# 'lerobot/unitreeh1_warehouse',
+# 'lerobot/usc_cloth_sim',
+# 'lerobot/utaustin_mutex',
+# 'lerobot/utokyo_pr2_opening_fridge',
+# 'lerobot/utokyo_pr2_tabletop_manipulation',
+# 'lerobot/utokyo_saytap',
+# 'lerobot/utokyo_xarm_bimanual',
+# 'lerobot/utokyo_xarm_pick_and_place',
+# 'lerobot/viola',
+# 'lerobot/xarm_lift_medium',
+# 'lerobot/xarm_lift_medium_image',
+# 'lerobot/xarm_lift_medium_replay',
+# 'lerobot/xarm_lift_medium_replay_image',
+# 'lerobot/xarm_push_medium',
+# 'lerobot/xarm_push_medium_image',
+# 'lerobot/xarm_push_medium_replay',
+# 'lerobot/xarm_push_medium_replay_image',
+
+from pathlib import Path
 
 from lerobot import available_datasets
+from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset, parse_robot_config
 
-# from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset
+# import tensorflow_datasets as tfds
+# builder = tfds.builder("columbia_cairlab_pusht_real")
+# builder.info.features
 
-pprint(available_datasets)
-
-for repo_id in available_datasets:
-    name = repo_id.split("/")[1]
-    if "aloha" in name:
-        if "insertion" in name:
-            single_task = "Insert the peg into the socket."
-        elif "transfer" in name:
-            single_task = "Pick up the cube with the right arm and transfer it to the left arm."
-        elif "battery" in name:
-            single_task = "Place the battery into the slot of the remote controller."
-        elif "candy" in name:
-            single_task = "Pick up the candy and unwrap it."
-        elif "coffee_new" in name:
-            single_task = "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons."
-        elif "coffee" in name:
-            single_task = "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray."
-        elif "cups_open" in name:
-            single_task = "Pick up the plastic cup and open its lid."
-        elif "fork_pick_up" in name:
-            single_task = "Pick up the fork and place it on the plate."
-        elif "pingpong_test" in name:
-            single_task = "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass."
-        elif "pro_pencil" in name:
-            single_task = "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table."
-        elif "screw_driver" in name:
-            single_task = "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup."
-        elif "tape" in name:
-            single_task = (
-                "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge."
-            )
-        elif "towel" in name:
-            single_task = "Pick up a piece of paper towel and place it on the spilled liquid."
-        elif "vinh_cup_left" in name:
-            single_task = "Pick up the platic cup with the right arm, then pop its lid open with the left arm"
-        elif "thread_velcro" in name:
-            single_task = "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm."
-        elif "shrimp" in name:
-            single_task = "Sauté the raw shrimp on both sides, then serve it in the bowl."
-        elif "wash_pan" in name:
-            single_task = ""
+LOCAL_DIR = Path("data/")
+ALOHA_SINGLE_TASKS_REAL = {
+    "aloha_mobile_cabinet": "Open the top cabinet, store the pot inside it then close the cabinet.",
+    "aloha_mobile_chair": "Push the chairs in front of the desk to place them against it.",
+    "aloha_mobile_elevator": "Take the elevator to the 1st floor.",
+    # Alternative version, not sure what's best.
+    # 'aloha_mobile_elevator': "Navigate to the elevator and call it. When it arrives, get inside et push the 1st floor button.",
+    "aloha_mobile_shrimp": "Sauté the raw shrimp on both sides, then serve it in the bowl.",
+    "aloha_mobile_wash_pan": "Pick up the pan, rinse it in the sink and then place it in the drying rack.",
+    "aloha_mobile_wipe_wine": "Pick up the wet cloth on the faucet and use it to clean the spilled wine on the table and underneath the glass.",
+    "aloha_static_battery": "Place the battery into the slot of the remote controller.",
+    "aloha_static_candy": "Pick up the candy and unwrap it.",
+    "aloha_static_coffee": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons.",
+    "aloha_static_coffee_new": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray.",
+    "aloha_static_cups_open": "Pick up the plastic cup and open its lid.",
+    "aloha_static_fork_pick_up": "Pick up the fork and place it on the plate.",
+    "aloha_static_pingpong_test": "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass.",
+    "aloha_static_pro_pencil": "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table.",
+    "aloha_static_screw_driver": "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup.",
+    "aloha_static_tape": "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge.",
+    "aloha_static_thread_velcro": "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm.",
+    "aloha_static_towel": "Pick up a piece of paper towel and place it on the spilled liquid.",
+    "aloha_static_vinh_cup": "Pick up the platic cup with the right arm, then pop its lid open with the left arm.",
+    "aloha_static_vinh_cup_left": "Pick up the platic cup with the left arm, then pop its lid open with the right arm.",
+    "aloha_static_ziploc_slide": "Slide open the ziploc bag.",
+}
+ALOHA_CONFIG = Path("lerobot/configs/robot/aloha.yaml")
 
 
-# datasets = [
-#     'lerobot/aloha_mobile_cabinet',
-#     'lerobot/aloha_mobile_chair',
-#     'lerobot/aloha_mobile_elevator',
-#     'lerobot/aloha_mobile_shrimp',
-#     'lerobot/aloha_mobile_wash_pan',
-#     'lerobot/aloha_mobile_wipe_wine',
-#     'lerobot/aloha_sim_insertion_human',
-#     'lerobot/aloha_sim_insertion_human_image',
-#     'lerobot/aloha_sim_insertion_scripted',
-#     'lerobot/aloha_sim_insertion_scripted_image',
-#     'lerobot/aloha_sim_transfer_cube_human',
-#     'lerobot/aloha_sim_transfer_cube_human_image',
-#     'lerobot/aloha_sim_transfer_cube_scripted',
-#     'lerobot/aloha_sim_transfer_cube_scripted_image',
-#     'lerobot/aloha_static_battery',
-#     'lerobot/aloha_static_candy',
-#     'lerobot/aloha_static_coffee',
-#     'lerobot/aloha_static_coffee_new',
-#     'lerobot/aloha_static_cups_open',
-#     'lerobot/aloha_static_fork_pick_up',
-#     'lerobot/aloha_static_pingpong_test',
-#     'lerobot/aloha_static_pro_pencil',
-#     'lerobot/aloha_static_screw_driver',
-#     'lerobot/aloha_static_tape',
-#     'lerobot/aloha_static_thread_velcro',
-#     'lerobot/aloha_static_towel',
-#     'lerobot/aloha_static_vinh_cup',
-#     'lerobot/aloha_static_vinh_cup_left',
-#     'lerobot/aloha_static_ziploc_slide',
-#     'lerobot/asu_table_top',
-#     'lerobot/austin_buds_dataset',
-#     'lerobot/austin_sailor_dataset',
-#     'lerobot/austin_sirius_dataset',
-#     'lerobot/berkeley_autolab_ur5',
-#     'lerobot/berkeley_cable_routing',
-#     'lerobot/berkeley_fanuc_manipulation',
-#     'lerobot/berkeley_gnm_cory_hall',
-#     'lerobot/berkeley_gnm_recon',
-#     'lerobot/berkeley_gnm_sac_son',
-#     'lerobot/berkeley_mvp',
-#     'lerobot/berkeley_rpt',
-#     'lerobot/cmu_franka_exploration_dataset',
-#     'lerobot/cmu_play_fusion',
-#     'lerobot/cmu_stretch',
-#     'lerobot/columbia_cairlab_pusht_real',
-#     'lerobot/conq_hose_manipulation',
-#     'lerobot/dlr_edan_shared_control',
-#     'lerobot/dlr_sara_grid_clamp',
-#     'lerobot/dlr_sara_pour',
-#     'lerobot/droid_100',
-#     'lerobot/fmb',
-#     'lerobot/iamlab_cmu_pickup_insert',
-#     'lerobot/imperialcollege_sawyer_wrist_cam',
-#     'lerobot/jaco_play',
-#     'lerobot/kaist_nonprehensile',
-#     'lerobot/nyu_door_opening_surprising_effectiveness',
-#     'lerobot/nyu_franka_play_dataset',
-#     'lerobot/nyu_rot_dataset',
-#     'lerobot/pusht',
-#     'lerobot/pusht_image',
-#     'lerobot/roboturk',
-#     'lerobot/stanford_hydra_dataset',
-#     'lerobot/stanford_kuka_multimodal_dataset',
-#     'lerobot/stanford_robocook',
-#     'lerobot/taco_play',
-#     'lerobot/tokyo_u_lsmo',
-#     'lerobot/toto',
-#     'lerobot/ucsd_kitchen_dataset',
-#     'lerobot/ucsd_pick_and_place_dataset',
-#     'lerobot/uiuc_d3field',
-#     'lerobot/umi_cup_in_the_wild',
-#     'lerobot/unitreeh1_fold_clothes',
-#     'lerobot/unitreeh1_rearrange_objects',
-#     'lerobot/unitreeh1_two_robot_greeting',
-#     'lerobot/unitreeh1_warehouse',
-#     'lerobot/usc_cloth_sim',
-#     'lerobot/utaustin_mutex',
-#     'lerobot/utokyo_pr2_opening_fridge',
-#     'lerobot/utokyo_pr2_tabletop_manipulation',
-#     'lerobot/utokyo_saytap',
-#     'lerobot/utokyo_xarm_bimanual',
-#     'lerobot/utokyo_xarm_pick_and_place',
-#     'lerobot/viola',
-#     'lerobot/xarm_lift_medium',
-#     'lerobot/xarm_lift_medium_image',
-#     'lerobot/xarm_lift_medium_replay',
-#     'lerobot/xarm_lift_medium_replay_image',
-#     'lerobot/xarm_push_medium',
-#     'lerobot/xarm_push_medium_image',
-#     'lerobot/xarm_push_medium_replay',
-#     'lerobot/xarm_push_medium_replay_image',
-# ]
+def batch_convert():
+    for num, repo_id in enumerate(available_datasets):
+        print(f"Converting {repo_id} ({num}/{len(available_datasets)})")
+        name = repo_id.split("/")[1]
+        single_task, tasks_col, robot_config = None, None, None
+        if "aloha" in name:
+            robot_config = parse_robot_config(ALOHA_CONFIG)
+            if "sim_insertion" in name:
+                single_task = "Insert the peg into the socket."
+            elif "sim_transfer" in name:
+                single_task = "Pick up the cube with the right arm and transfer it to the left arm."
+            else:
+                single_task = ALOHA_SINGLE_TASKS_REAL[name]
+        elif name != "columbia_cairlab_pusht_real" and "pusht" in name:
+            single_task = "Push the T-shaped block onto the T-shaped target."
+        elif "xarm_lift" in name or "xarm_push" in name:
+            single_task = "Pick up the cube and lift it."
+        else:
+            tasks_col = "language_instruction"
 
-# convert_dataset(repo_id=repo_id)
+        convert_dataset(
+            repo_id=repo_id,
+            local_dir=LOCAL_DIR,
+            single_task=single_task,
+            tasks_col=tasks_col,
+            robot_config=robot_config,
+        )
+
+
+if __name__ == "__main__":
+    batch_convert()
diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index cecab0df..ffd9f05e 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -88,6 +88,7 @@ import contextlib
 import json
 import math
 import subprocess
+import warnings
 from pathlib import Path
 
 import datasets
@@ -414,6 +415,14 @@ def convert_dataset(
     dataset = datasets.load_dataset("parquet", data_dir=v1x_dir / "data", split="train")
     keys = get_keys(dataset)
 
+    if single_task and "language_instruction" in dataset.column_names:
+        warnings.warn(
+            "'single_task' provided but 'language_instruction' tasks_col found. Using 'language_instruction'.",
+            stacklevel=1,
+        )
+        single_task = None
+        tasks_col = "language_instruction"
+
     # Episodes
     episode_indices = sorted(dataset.unique("episode_index"))
     total_episodes = len(episode_indices)
@@ -462,6 +471,10 @@ def convert_dataset(
     if robot_config is not None:
         robot_type = robot_config["robot_type"]
         names = robot_config["names"]
+        if "observation.effort" in keys["sequence"]:
+            names["observation.effort"] = names["observation.state"]
+        if "observation.velocity" in keys["sequence"]:
+            names["observation.velocity"] = names["observation.state"]
         repo_tags = [robot_type]
     else:
         robot_type = "unknown"
@@ -500,10 +513,10 @@ def convert_dataset(
     convert_stats_to_json(v1x_dir / "meta_data", v20_dir / "meta")
 
     #### TODO: delete
-    repo_id = f"aliberts/{repo_id.split('/')[1]}"
+    # repo_id = f"aliberts/{repo_id.split('/')[1]}"
     # if hub_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
     #     hub_api.delete_repo(repo_id=repo_id, repo_type="dataset")
-    hub_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
+    # hub_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
     ####
 
     with contextlib.suppress(EntryNotFoundError):

From c146ba936fa1be988c7417013125beee8ea76875 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 16 Oct 2024 23:34:54 +0200
Subject: [PATCH 19/59] Add episode chunks logic, move_videos & lfs tracking
 fix

---
 .../datasets/v2/convert_dataset_v1_to_v2.py   | 220 ++++++++++++++----
 1 file changed, 176 insertions(+), 44 deletions(-)

diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index ffd9f05e..6ddfd2a5 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -87,6 +87,7 @@ import argparse
 import contextlib
 import json
 import math
+import shutil
 import subprocess
 import warnings
 from pathlib import Path
@@ -108,8 +109,15 @@ from lerobot.scripts.push_dataset_to_hub import push_dataset_card_to_hub
 V16 = "v1.6"
 V20 = "v2.0"
 
-PARQUET_PATH = "data/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
-VIDEO_PATH = "videos/{video_key}_episode_{episode_index:06d}.mp4"
+EPISODE_CHUNK_SIZE = 1000
+
+CLEAN_GITATTRIBUTES = Path("data/.gitattributes")
+
+VIDEO_FILE = "{video_key}_episode_{episode_index:06d}.mp4"
+PARQUET_CHUNK_PATH = (
+    "data/chunk-{episode_chunk:03d}/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
+)
+VIDEO_CHUNK_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
 
 
 def parse_robot_config(config_path: Path, config_overrides: list[str] | None = None) -> tuple[str, dict]:
@@ -229,23 +237,125 @@ def add_task_index_from_tasks_col(
 
 
 def split_parquet_by_episodes(
-    dataset: Dataset, keys: dict[str, list], total_episodes: int, episode_indices: list, output_dir: Path
+    dataset: Dataset,
+    keys: dict[str, list],
+    total_episodes: int,
+    total_chunks: int,
+    output_dir: Path,
 ) -> list:
-    (output_dir / "data").mkdir(exist_ok=True, parents=True)
     table = dataset.remove_columns(keys["video"])._data.table
     episode_lengths = []
-    for episode_index in sorted(episode_indices):
-        # Write each episode_index to a new parquet file
-        filtered_table = table.filter(pc.equal(table["episode_index"], episode_index))
-        episode_lengths.insert(episode_index, len(filtered_table))
-        output_file = output_dir / PARQUET_PATH.format(
-            episode_index=episode_index, total_episodes=total_episodes
-        )
-        pq.write_table(filtered_table, output_file)
+    for ep_chunk in range(total_chunks):
+        ep_chunk_start = EPISODE_CHUNK_SIZE * ep_chunk
+        ep_chunk_end = min(EPISODE_CHUNK_SIZE * (ep_chunk + 1), total_episodes)
+
+        chunk_dir = "/".join(PARQUET_CHUNK_PATH.split("/")[:-1]).format(episode_chunk=ep_chunk)
+        (output_dir / chunk_dir).mkdir(parents=True, exist_ok=True)
+        for ep_idx in range(ep_chunk_start, ep_chunk_end):
+            ep_table = table.filter(pc.equal(table["episode_index"], ep_idx))
+            episode_lengths.insert(ep_idx, len(ep_table))
+            output_file = output_dir / PARQUET_CHUNK_PATH.format(
+                episode_chunk=ep_chunk, episode_index=ep_idx, total_episodes=total_episodes
+            )
+            pq.write_table(ep_table, output_file)
 
     return episode_lengths
 
 
+def move_videos(
+    repo_id: str,
+    video_keys: list[str],
+    total_episodes: int,
+    total_chunks: int,
+    work_dir: Path,
+    branch: str = "main",
+):
+    """
+    HACK: Since HfApi() doesn't provide a way to move files directly in a repo, this function will run git
+    commands to fetch git lfs video files references to move them into subdirectories without having to
+    actually download them.
+    """
+    _lfs_clone(repo_id, work_dir, branch)
+
+    video_files = [str(f.relative_to(work_dir)) for f in work_dir.glob("videos*/*.mp4")]
+    total_videos = len(video_files)
+    assert total_videos == total_episodes * len(video_keys)
+
+    fix_lfs_video_files_tracking(work_dir, video_files, CLEAN_GITATTRIBUTES)
+
+    video_dirs = sorted(work_dir.glob("videos*/"))
+    for ep_chunk in range(total_chunks):
+        ep_chunk_start = EPISODE_CHUNK_SIZE * ep_chunk
+        ep_chunk_end = min(EPISODE_CHUNK_SIZE * (ep_chunk + 1), total_episodes)
+        for vid_key in video_keys:
+            chunk_dir = "/".join(VIDEO_CHUNK_PATH.split("/")[:-1]).format(
+                episode_chunk=ep_chunk, video_key=vid_key
+            )
+            (work_dir / chunk_dir).mkdir(parents=True, exist_ok=True)
+
+            for ep_idx in range(ep_chunk_start, ep_chunk_end):
+                target_path = VIDEO_CHUNK_PATH.format(
+                    episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_idx
+                )
+                video_file = VIDEO_FILE.format(video_key=vid_key, episode_index=ep_idx)
+                if len(video_dirs) == 1:
+                    video_path = video_dirs[0] / video_file
+                else:
+                    for dir in video_dirs:
+                        if (dir / video_file).is_file():
+                            video_path = dir / video_file
+                            break
+
+                video_path.rename(work_dir / target_path)
+
+    commit_message = "Move video files into chunk subdirectories"
+    subprocess.run(["git", "add", "."], cwd=work_dir, check=True)
+    subprocess.run(["git", "commit", "-m", commit_message], cwd=work_dir, check=True)
+    subprocess.run(["git", "push"], cwd=work_dir, check=True)
+
+
+def fix_lfs_video_files_tracking(work_dir: Path, video_files: list[str], clean_gitattributes_path: Path):
+    """
+    HACK: This function fixes the tracking by git lfs which was not properly set on some repos. In that case,
+    there's no other option than to download the actual files and reupload them with lfs tracking.
+    """
+    # _lfs_clone(repo_id, work_dir, branch)
+    lfs_tracked_files = subprocess.run(
+        ["git", "lfs", "ls-files", "-n"], cwd=work_dir, capture_output=True, text=True, check=True
+    )
+    lfs_tracked_files = set(lfs_tracked_files.stdout.splitlines())
+    lfs_untracked_videos = [f for f in video_files if f not in lfs_tracked_files]
+
+    if lfs_untracked_videos:
+        shutil.copyfile(clean_gitattributes_path, work_dir / ".gitattributes")
+        subprocess.run(["git", "add", ".gitattributes"], cwd=work_dir, check=True)
+        for i in range(0, len(lfs_untracked_videos), 100):
+            files = lfs_untracked_videos[i : i + 100]
+            try:
+                subprocess.run(
+                    ["git", "rm", "--cached", *files], cwd=work_dir, capture_output=True, check=True
+                )
+            except subprocess.CalledProcessError as e:
+                print("git rm --cached ERROR:")
+                print(e.stderr)
+            subprocess.run(["git", "add", *files], cwd=work_dir, check=True)
+
+        commit_message = "Track video files with git lfs"
+        subprocess.run(["git", "commit", "-m", commit_message], cwd=work_dir, check=True)
+        subprocess.run(["git", "push"], cwd=work_dir, check=True)
+
+
+def _lfs_clone(repo_id: str, work_dir: Path, branch: str) -> None:
+    subprocess.run(["git", "lfs", "install"], cwd=work_dir, check=True)
+    repo_url = f"https://huggingface.co/datasets/{repo_id}"
+    env = {"GIT_LFS_SKIP_SMUDGE": "1"}  # Prevent downloading LFS files
+    subprocess.run(
+        ["git", "clone", "--branch", branch, "--single-branch", "--depth", "1", repo_url, str(work_dir)],
+        check=True,
+        env=env,
+    )
+
+
 def _get_audio_info(video_path: Path | str) -> dict:
     ffprobe_audio_cmd = [
         "ffprobe",
@@ -323,16 +433,19 @@ def _get_video_info(video_path: Path | str) -> dict:
     return video_info
 
 
-def get_videos_info(repo_id: str, local_dir: Path, video_keys: list[str]) -> dict:
+def get_videos_info(repo_id: str, local_dir: Path, video_keys: list[str], branch: str) -> dict:
     hub_api = HfApi()
-    videos_info_dict = {"videos_path": VIDEO_PATH}
-    for vid_key in video_keys:
-        # Assumes first episode
-        video_path = VIDEO_PATH.format(video_key=vid_key, episode_index=0)
-        video_path = hub_api.hf_hub_download(
-            repo_id=repo_id, repo_type="dataset", local_dir=local_dir, filename=video_path
-        )
-        videos_info_dict[vid_key] = _get_video_info(video_path)
+    videos_info_dict = {"videos_path": VIDEO_CHUNK_PATH}
+
+    # Assumes first episode
+    video_files = [
+        VIDEO_CHUNK_PATH.format(episode_chunk=0, video_key=vid_key, episode_index=0) for vid_key in video_keys
+    ]
+    hub_api.snapshot_download(
+        repo_id=repo_id, repo_type="dataset", local_dir=local_dir, revision=branch, allow_patterns=video_files
+    )
+    for vid_key, vid_path in zip(video_keys, video_files, strict=True):
+        videos_info_dict[vid_key] = _get_video_info(local_dir / vid_path)
 
     return videos_info_dict
 
@@ -399,6 +512,7 @@ def convert_dataset(
     tasks_path: Path | None = None,
     tasks_col: Path | None = None,
     robot_config: dict | None = None,
+    test_branch: str | None = None,
 ):
     v1 = get_hub_safe_version(repo_id, V16, enforce_v2=False)
     v1x_dir = local_dir / V16 / repo_id
@@ -408,8 +522,12 @@ def convert_dataset(
 
     hub_api = HfApi()
     hub_api.snapshot_download(
-        repo_id=repo_id, repo_type="dataset", revision=v1, local_dir=v1x_dir, ignore_patterns="videos/"
+        repo_id=repo_id, repo_type="dataset", revision=v1, local_dir=v1x_dir, ignore_patterns="videos*/"
     )
+    branch = "main"
+    if test_branch:
+        branch = test_branch
+        create_branch(repo_id=repo_id, branch=test_branch, repo_type="dataset")
 
     metadata_v1 = load_json(v1x_dir / "meta_data" / "info.json")
     dataset = datasets.load_dataset("parquet", data_dir=v1x_dir / "data", split="train")
@@ -423,10 +541,14 @@ def convert_dataset(
         single_task = None
         tasks_col = "language_instruction"
 
-    # Episodes
+    # Episodes & chunks
     episode_indices = sorted(dataset.unique("episode_index"))
     total_episodes = len(episode_indices)
     assert episode_indices == list(range(total_episodes))
+    total_videos = total_episodes * len(keys["video"])
+    total_chunks = total_episodes // EPISODE_CHUNK_SIZE
+    if total_episodes % EPISODE_CHUNK_SIZE != 0:
+        total_chunks += 1
 
     # Tasks
     if single_task:
@@ -448,25 +570,30 @@ def convert_dataset(
     task_json = [{"task_index": task_idx, "task": task} for task_idx, task in enumerate(tasks)]
     write_json(task_json, v20_dir / "meta" / "tasks.json")
 
-    # Split data into 1 parquet file by episode
-    episode_lengths = split_parquet_by_episodes(dataset, keys, total_episodes, episode_indices, v20_dir)
-
     # Shapes
     sequence_shapes = {key: dataset.features[key].length for key in keys["sequence"]}
     image_shapes = get_image_shapes(dataset, keys["image"]) if len(keys["image"]) > 0 else {}
+
+    # Videos
     if len(keys["video"]) > 0:
         assert metadata_v1.get("video", False)
-        videos_info = get_videos_info(repo_id, v1x_dir, video_keys=keys["video"])
+        tmp_video_dir = local_dir / "videos" / V20 / repo_id
+        tmp_video_dir.mkdir(parents=True, exist_ok=True)
+        move_videos(repo_id, keys["video"], total_episodes, total_chunks, tmp_video_dir, branch)
+        videos_info = get_videos_info(repo_id, v1x_dir, video_keys=keys["video"], branch=branch)
         video_shapes = get_video_shapes(videos_info, keys["video"])
         for img_key in keys["video"]:
             assert math.isclose(videos_info[img_key]["video.fps"], metadata_v1["fps"], rel_tol=1e-3)
             if "encoding" in metadata_v1:
                 assert videos_info[img_key]["video.pix_fmt"] == metadata_v1["encoding"]["pix_fmt"]
     else:
-        assert len(keys["video"]) == 0
+        assert metadata_v1.get("video", 0) == 0
         videos_info = None
         video_shapes = {}
 
+    # Split data into 1 parquet file by episode
+    episode_lengths = split_parquet_by_episodes(dataset, keys, total_episodes, total_chunks, v20_dir)
+
     # Names
     if robot_config is not None:
         robot_type = robot_config["robot_type"]
@@ -495,11 +622,14 @@ def convert_dataset(
     # Assemble metadata v2.0
     metadata_v2_0 = {
         "codebase_version": V20,
-        "data_path": PARQUET_PATH,
+        "data_path": PARQUET_CHUNK_PATH,
         "robot_type": robot_type,
         "total_episodes": total_episodes,
         "total_frames": len(dataset),
         "total_tasks": len(tasks),
+        "total_videos": total_videos,
+        "total_chunks": total_chunks,
+        "chunks_size": EPISODE_CHUNK_SIZE,
         "fps": metadata_v1["fps"],
         "splits": {"train": f"0:{total_episodes}"},
         "keys": keys["sequence"],
@@ -512,37 +642,31 @@ def convert_dataset(
     write_json(metadata_v2_0, v20_dir / "meta" / "info.json")
     convert_stats_to_json(v1x_dir / "meta_data", v20_dir / "meta")
 
-    #### TODO: delete
-    # repo_id = f"aliberts/{repo_id.split('/')[1]}"
-    # if hub_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
-    #     hub_api.delete_repo(repo_id=repo_id, repo_type="dataset")
-    # hub_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
-    ####
+    with contextlib.suppress(EntryNotFoundError):
+        hub_api.delete_folder(repo_id=repo_id, path_in_repo="data", repo_type="dataset", revision=branch)
 
     with contextlib.suppress(EntryNotFoundError):
-        hub_api.delete_folder(repo_id=repo_id, path_in_repo="data", repo_type="dataset", revision="main")
-
-    with contextlib.suppress(EntryNotFoundError):
-        hub_api.delete_folder(repo_id=repo_id, path_in_repo="meta_data", repo_type="dataset", revision="main")
+        hub_api.delete_folder(repo_id=repo_id, path_in_repo="meta_data", repo_type="dataset", revision=branch)
 
     hub_api.upload_folder(
         repo_id=repo_id,
         path_in_repo="data",
         folder_path=v20_dir / "data",
         repo_type="dataset",
-        revision="main",
+        revision=branch,
     )
     hub_api.upload_folder(
         repo_id=repo_id,
         path_in_repo="meta",
         folder_path=v20_dir / "meta",
         repo_type="dataset",
-        revision="main",
+        revision=branch,
     )
 
     card_text = f"[meta/info.json](meta/info.json)\n```json\n{json.dumps(metadata_v2_0, indent=4)}\n```"
-    push_dataset_card_to_hub(repo_id=repo_id, revision="main", tags=repo_tags, text=card_text)
-    create_branch(repo_id=repo_id, branch=V20, repo_type="dataset")
+    push_dataset_card_to_hub(repo_id=repo_id, revision=branch, tags=repo_tags, text=card_text)
+    if not test_branch:
+        create_branch(repo_id=repo_id, branch=V20, repo_type="dataset")
 
     # TODO:
     # - [X] Add shapes
@@ -555,7 +679,9 @@ def convert_dataset(
     # - [X] Add splits
     # - [X] Push properly to branch v2.0 and delete v1.6 stuff from that branch
     # - [X] Handle multitask datasets
-    # - [/] Add sanity checks (encoding, shapes)
+    # - [X] Handle hf hub repo limits (add chunks logic)
+    # - [X] Add test-branch
+    # - [X] Add sanity checks (encoding, shapes)
 
 
 def main():
@@ -601,6 +727,12 @@ def main():
         default=None,
         help="Local directory to store the dataset during conversion. Defaults to /tmp/{repo_id}",
     )
+    parser.add_argument(
+        "--test-branch",
+        type=str,
+        default=None,
+        help="Repo branch to test your conversion first (e.g. 'v2.0.test')",
+    )
 
     args = parser.parse_args()
     if not args.local_dir:

From 50a75ad3fe47437f09da71f0946c781e459d5547 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 17 Oct 2024 10:17:27 +0200
Subject: [PATCH 20/59] Write episodes as jsonlines

---
 .../datasets/v2/convert_dataset_v1_to_v2.py   | 12 +++++++-
 poetry.lock                                   | 28 ++++++++++++++-----
 pyproject.toml                                |  1 +
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index 6ddfd2a5..81131f3b 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -93,6 +93,7 @@ import warnings
 from pathlib import Path
 
 import datasets
+import jsonlines
 import pyarrow.compute as pc
 import pyarrow.parquet as pq
 import torch
@@ -160,6 +161,11 @@ def write_json(data: dict, fpath: Path) -> None:
         json.dump(data, f, indent=4)
 
 
+def write_jsonlines(data: dict, fpath: Path) -> None:
+    with jsonlines.open(fpath, "w") as writer:
+        writer.write_all(data)
+
+
 def convert_stats_to_json(input_dir: Path, output_dir: Path) -> None:
     safetensor_path = input_dir / "stats.safetensors"
     stats = load_file(safetensor_path)
@@ -617,7 +623,7 @@ def convert_dataset(
         {"episode_index": ep_idx, "tasks": [tasks_by_episodes[ep_idx]], "length": episode_lengths[ep_idx]}
         for ep_idx in episode_indices
     ]
-    write_json(episodes, v20_dir / "meta" / "episodes.json")
+    write_jsonlines(episodes, v20_dir / "meta" / "episodes.jsonl")
 
     # Assemble metadata v2.0
     metadata_v2_0 = {
@@ -648,6 +654,9 @@ def convert_dataset(
     with contextlib.suppress(EntryNotFoundError):
         hub_api.delete_folder(repo_id=repo_id, path_in_repo="meta_data", repo_type="dataset", revision=branch)
 
+    with contextlib.suppress(EntryNotFoundError):
+        hub_api.delete_folder(repo_id=repo_id, path_in_repo="meta", repo_type="dataset", revision=branch)
+
     hub_api.upload_folder(
         repo_id=repo_id,
         path_in_repo="data",
@@ -681,6 +690,7 @@ def convert_dataset(
     # - [X] Handle multitask datasets
     # - [X] Handle hf hub repo limits (add chunks logic)
     # - [X] Add test-branch
+    # - [X] Use jsonlines for episodes
     # - [X] Add sanity checks (encoding, shapes)
 
 
diff --git a/poetry.lock b/poetry.lock
index b4d491ae..011e76ef 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2620,6 +2620,20 @@ files = [
     {file = "json5-0.9.25.tar.gz", hash = "sha256:548e41b9be043f9426776f05df8635a00fe06104ea51ed24b67f908856e151ae"},
 ]
 
+[[package]]
+name = "jsonlines"
+version = "4.0.0"
+description = "Library with helpers for the jsonlines file format"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55"},
+    {file = "jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74"},
+]
+
+[package.dependencies]
+attrs = ">=19.2.0"
+
 [[package]]
 name = "jsonpointer"
 version = "3.0.0"
@@ -4216,10 +4230,10 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
-    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
     {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
     {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 
 [[package]]
@@ -4240,10 +4254,10 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
-    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
     {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
     {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 
 [[package]]
@@ -4332,9 +4346,9 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
-    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
     {version = ">=1.22.4", markers = "python_version < \"3.11\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -7562,4 +7576,4 @@ xarm = ["gym-xarm"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "f64e01ce021ae77baa2c9bb82cbd2dd6035ab01a1500207da7acdb7f9d0772e1"
+content-hash = "b79d32bec01c53a3ca48548b85e6f991c9d8fc091f3f528e0b54c6e9fac63ff9"
diff --git a/pyproject.toml b/pyproject.toml
index 89ed7ff0..85390c19 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,6 +69,7 @@ pyrealsense2 = {version = ">=2.55.1.6486", markers = "sys_platform != 'darwin'",
 pyrender = {git = "https://github.com/mmatl/pyrender.git", markers = "sys_platform == 'linux'", optional = true}
 hello-robot-stretch-body = {version = ">=0.7.27", markers = "sys_platform == 'linux'", optional = true}
 pyserial = {version = ">=3.5", optional = true}
+jsonlines = "^4.0.0"
 
 
 [tool.poetry.extras]

From ad3f112d161509d1fbb87efea25d2cd72d31bab0 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 17 Oct 2024 12:58:48 +0200
Subject: [PATCH 21/59] Add fixes for lfs tracking

---
 .../datasets/v2/convert_dataset_v1_to_v2.py   | 90 ++++++++++++-------
 1 file changed, 58 insertions(+), 32 deletions(-)

diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index 81131f3b..f0237a05 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -85,6 +85,7 @@ python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \
 
 import argparse
 import contextlib
+import filecmp
 import json
 import math
 import shutil
@@ -112,7 +113,7 @@ V20 = "v2.0"
 
 EPISODE_CHUNK_SIZE = 1000
 
-CLEAN_GITATTRIBUTES = Path("data/.gitattributes")
+GITATTRIBUTES_REF = "aliberts/gitattributes_reference"
 
 VIDEO_FILE = "{video_key}_episode_{episode_index:06d}.mp4"
 PARQUET_CHUNK_PATH = (
@@ -158,7 +159,7 @@ def load_json(fpath: Path) -> dict:
 def write_json(data: dict, fpath: Path) -> None:
     fpath.parent.mkdir(exist_ok=True, parents=True)
     with open(fpath, "w") as f:
-        json.dump(data, f, indent=4)
+        json.dump(data, f, indent=4, ensure_ascii=False)
 
 
 def write_jsonlines(data: dict, fpath: Path) -> None:
@@ -274,8 +275,9 @@ def move_videos(
     total_episodes: int,
     total_chunks: int,
     work_dir: Path,
+    clean_gittatributes: Path,
     branch: str = "main",
-):
+) -> None:
     """
     HACK: Since HfApi() doesn't provide a way to move files directly in a repo, this function will run git
     commands to fetch git lfs video files references to move them into subdirectories without having to
@@ -283,11 +285,25 @@ def move_videos(
     """
     _lfs_clone(repo_id, work_dir, branch)
 
+    videos_moved = False
     video_files = [str(f.relative_to(work_dir)) for f in work_dir.glob("videos*/*.mp4")]
-    total_videos = len(video_files)
-    assert total_videos == total_episodes * len(video_keys)
+    if len(video_files) == 0:
+        video_files = [str(f.relative_to(work_dir)) for f in work_dir.glob("videos*/*/*/*.mp4")]
+        videos_moved = True  # Videos have already been moved
 
-    fix_lfs_video_files_tracking(work_dir, video_files, CLEAN_GITATTRIBUTES)
+    assert len(video_files) == total_episodes * len(video_keys)
+
+    lfs_untracked_videos = _get_lfs_untracked_videos(work_dir, video_files)
+
+    current_gittatributes = work_dir / ".gitattributes"
+    if not filecmp.cmp(current_gittatributes, clean_gittatributes, shallow=False):
+        fix_gitattributes(work_dir, current_gittatributes, clean_gittatributes)
+
+    if lfs_untracked_videos:
+        fix_lfs_video_files_tracking(work_dir, video_files)
+
+    if videos_moved:
+        return
 
     video_dirs = sorted(work_dir.glob("videos*/"))
     for ep_chunk in range(total_chunks):
@@ -320,35 +336,30 @@ def move_videos(
     subprocess.run(["git", "push"], cwd=work_dir, check=True)
 
 
-def fix_lfs_video_files_tracking(work_dir: Path, video_files: list[str], clean_gitattributes_path: Path):
+def fix_lfs_video_files_tracking(work_dir: Path, lfs_untracked_videos: list[str]) -> None:
     """
     HACK: This function fixes the tracking by git lfs which was not properly set on some repos. In that case,
     there's no other option than to download the actual files and reupload them with lfs tracking.
     """
-    # _lfs_clone(repo_id, work_dir, branch)
-    lfs_tracked_files = subprocess.run(
-        ["git", "lfs", "ls-files", "-n"], cwd=work_dir, capture_output=True, text=True, check=True
-    )
-    lfs_tracked_files = set(lfs_tracked_files.stdout.splitlines())
-    lfs_untracked_videos = [f for f in video_files if f not in lfs_tracked_files]
+    for i in range(0, len(lfs_untracked_videos), 100):
+        files = lfs_untracked_videos[i : i + 100]
+        try:
+            subprocess.run(["git", "rm", "--cached", *files], cwd=work_dir, capture_output=True, check=True)
+        except subprocess.CalledProcessError as e:
+            print("git rm --cached ERROR:")
+            print(e.stderr)
+        subprocess.run(["git", "add", *files], cwd=work_dir, check=True)
 
-    if lfs_untracked_videos:
-        shutil.copyfile(clean_gitattributes_path, work_dir / ".gitattributes")
-        subprocess.run(["git", "add", ".gitattributes"], cwd=work_dir, check=True)
-        for i in range(0, len(lfs_untracked_videos), 100):
-            files = lfs_untracked_videos[i : i + 100]
-            try:
-                subprocess.run(
-                    ["git", "rm", "--cached", *files], cwd=work_dir, capture_output=True, check=True
-                )
-            except subprocess.CalledProcessError as e:
-                print("git rm --cached ERROR:")
-                print(e.stderr)
-            subprocess.run(["git", "add", *files], cwd=work_dir, check=True)
+    commit_message = "Track video files with git lfs"
+    subprocess.run(["git", "commit", "-m", commit_message], cwd=work_dir, check=True)
+    subprocess.run(["git", "push"], cwd=work_dir, check=True)
 
-        commit_message = "Track video files with git lfs"
-        subprocess.run(["git", "commit", "-m", commit_message], cwd=work_dir, check=True)
-        subprocess.run(["git", "push"], cwd=work_dir, check=True)
+
+def fix_gitattributes(work_dir: Path, current_gittatributes: Path, clean_gittatributes: Path) -> None:
+    shutil.copyfile(clean_gittatributes, current_gittatributes)
+    subprocess.run(["git", "add", ".gitattributes"], cwd=work_dir, check=True)
+    subprocess.run(["git", "commit", "-m", "Fix .gitattributes"], cwd=work_dir, check=True)
+    subprocess.run(["git", "push"], cwd=work_dir, check=True)
 
 
 def _lfs_clone(repo_id: str, work_dir: Path, branch: str) -> None:
@@ -362,6 +373,14 @@ def _lfs_clone(repo_id: str, work_dir: Path, branch: str) -> None:
     )
 
 
+def _get_lfs_untracked_videos(work_dir: Path, video_files: list[str]) -> list[str]:
+    lfs_tracked_files = subprocess.run(
+        ["git", "lfs", "ls-files", "-n"], cwd=work_dir, capture_output=True, text=True, check=True
+    )
+    lfs_tracked_files = set(lfs_tracked_files.stdout.splitlines())
+    return [f for f in video_files if f not in lfs_tracked_files]
+
+
 def _get_audio_info(video_path: Path | str) -> dict:
     ffprobe_audio_cmd = [
         "ffprobe",
@@ -585,7 +604,14 @@ def convert_dataset(
         assert metadata_v1.get("video", False)
         tmp_video_dir = local_dir / "videos" / V20 / repo_id
         tmp_video_dir.mkdir(parents=True, exist_ok=True)
-        move_videos(repo_id, keys["video"], total_episodes, total_chunks, tmp_video_dir, branch)
+        clean_gitattr = Path(
+            hub_api.hf_hub_download(
+                repo_id=GITATTRIBUTES_REF, repo_type="dataset", local_dir=local_dir, filename=".gitattributes"
+            )
+        ).absolute()
+        move_videos(
+            repo_id, keys["video"], total_episodes, total_chunks, tmp_video_dir, clean_gitattr, branch
+        )
         videos_info = get_videos_info(repo_id, v1x_dir, video_keys=keys["video"], branch=branch)
         video_shapes = get_video_shapes(videos_info, keys["video"])
         for img_key in keys["video"]:
@@ -735,7 +761,7 @@ def main():
         "--local-dir",
         type=Path,
         default=None,
-        help="Local directory to store the dataset during conversion. Defaults to /tmp/{repo_id}",
+        help="Local directory to store the dataset during conversion. Defaults to /tmp/lerobot_dataset_v2",
     )
     parser.add_argument(
         "--test-branch",
@@ -746,7 +772,7 @@ def main():
 
     args = parser.parse_args()
     if not args.local_dir:
-        args.local_dir = Path(f"/tmp/{args.repo_id}")
+        args.local_dir = Path("/tmp/lerobot_dataset_v2")
 
     robot_config = parse_robot_config(args.robot_config, args.robot_overrides) if args.robot_config else None
     del args.robot_config, args.robot_overrides

From 3ee3739edc2986b7bc38ed60f6e79c3e90e17645 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 17 Oct 2024 13:08:58 +0200
Subject: [PATCH 22/59] Add batch conversion log

---
 .../v2/batch_convert_dataset_v1_to_v2.py      | 40 +++++++++++++------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
index dcb949c2..d9a1b93b 100644
--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -95,10 +95,6 @@ from pathlib import Path
 from lerobot import available_datasets
 from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset, parse_robot_config
 
-# import tensorflow_datasets as tfds
-# builder = tfds.builder("columbia_cairlab_pusht_real")
-# builder.info.features
-
 LOCAL_DIR = Path("data/")
 ALOHA_SINGLE_TASKS_REAL = {
     "aloha_mobile_cabinet": "Open the top cabinet, store the pot inside it then close the cabinet.",
@@ -129,10 +125,21 @@ ALOHA_CONFIG = Path("lerobot/configs/robot/aloha.yaml")
 
 
 def batch_convert():
+    status = {}
+    logfile = LOCAL_DIR / "conversion_log.txt"
     for num, repo_id in enumerate(available_datasets):
-        print(f"Converting {repo_id} ({num}/{len(available_datasets)})")
+        print(f"\nConverting {repo_id} ({num}/{len(available_datasets)})")
+        print("---------------------------------------------------------")
         name = repo_id.split("/")[1]
         single_task, tasks_col, robot_config = None, None, None
+
+        # TODO(aliberts) issues with these datasets:
+        # if name in [
+        #     "aloha_mobile_shrimp",  # 18 videos files per camera but 17 episodes in the parquet
+        #     # "aloha_mobile_wash_pan",  #
+        # ]:
+        #     continue
+
         if "aloha" in name:
             robot_config = parse_robot_config(ALOHA_CONFIG)
             if "sim_insertion" in name:
@@ -148,13 +155,22 @@ def batch_convert():
         else:
             tasks_col = "language_instruction"
 
-        convert_dataset(
-            repo_id=repo_id,
-            local_dir=LOCAL_DIR,
-            single_task=single_task,
-            tasks_col=tasks_col,
-            robot_config=robot_config,
-        )
+        try:
+            convert_dataset(
+                repo_id=repo_id,
+                local_dir=LOCAL_DIR,
+                single_task=single_task,
+                tasks_col=tasks_col,
+                robot_config=robot_config,
+            )
+            status = f"{repo_id}: success."
+            with open(logfile, "a") as file:
+                file.write(status + "\n")
+        except Exception as e:
+            status = f"{repo_id}: {e}"
+            with open(logfile, "a") as file:
+                file.write(status + "\n")
+            continue
 
 
 if __name__ == "__main__":

From 7242c57400d58385f9c123025670e5ad0b02292d Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 17 Oct 2024 16:08:37 +0200
Subject: [PATCH 23/59] Cleanup

---
 .../common/datasets/v2/batch_convert_dataset_v1_to_v2.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
index d9a1b93b..73939bd1 100644
--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -100,8 +100,6 @@ ALOHA_SINGLE_TASKS_REAL = {
     "aloha_mobile_cabinet": "Open the top cabinet, store the pot inside it then close the cabinet.",
     "aloha_mobile_chair": "Push the chairs in front of the desk to place them against it.",
     "aloha_mobile_elevator": "Take the elevator to the 1st floor.",
-    # Alternative version, not sure what's best.
-    # 'aloha_mobile_elevator': "Navigate to the elevator and call it. When it arrives, get inside et push the 1st floor button.",
     "aloha_mobile_shrimp": "Sauté the raw shrimp on both sides, then serve it in the bowl.",
     "aloha_mobile_wash_pan": "Pick up the pan, rinse it in the sink and then place it in the drying rack.",
     "aloha_mobile_wipe_wine": "Pick up the wet cloth on the faucet and use it to clean the spilled wine on the table and underneath the glass.",
@@ -133,13 +131,6 @@ def batch_convert():
         name = repo_id.split("/")[1]
         single_task, tasks_col, robot_config = None, None, None
 
-        # TODO(aliberts) issues with these datasets:
-        # if name in [
-        #     "aloha_mobile_shrimp",  # 18 videos files per camera but 17 episodes in the parquet
-        #     # "aloha_mobile_wash_pan",  #
-        # ]:
-        #     continue
-
         if "aloha" in name:
             robot_config = parse_robot_config(ALOHA_CONFIG)
             if "sim_insertion" in name:

From d0d8193625864e9588b1d4a95564e8f37d9294db Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 17 Oct 2024 23:33:51 +0200
Subject: [PATCH 24/59] Add unitreeh and umi

---
 .../v2/batch_convert_dataset_v1_to_v2.py       | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
index 73939bd1..00a47473 100644
--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -90,6 +90,7 @@
 # 'lerobot/xarm_push_medium_replay',
 # 'lerobot/xarm_push_medium_replay_image',
 
+import traceback
 from pathlib import Path
 
 from lerobot import available_datasets
@@ -139,10 +140,23 @@ def batch_convert():
                 single_task = "Pick up the cube with the right arm and transfer it to the left arm."
             else:
                 single_task = ALOHA_SINGLE_TASKS_REAL[name]
+        elif "unitreeh1" in name:
+            if "fold_clothes" in name:
+                single_task = "Fold the sweatshirt."
+            elif "rearrange_objects" in name or "rearrange_objects" in name:
+                single_task = "Put the object into the bin."
+            elif "two_robot_greeting" in name:
+                single_task = "Greet the other robot with a high five."
+            elif "warehouse" in name:
+                single_task = (
+                    "Grab the spray paint on the shelf and place it in the bin on top of the robot dog."
+                )
         elif name != "columbia_cairlab_pusht_real" and "pusht" in name:
             single_task = "Push the T-shaped block onto the T-shaped target."
         elif "xarm_lift" in name or "xarm_push" in name:
             single_task = "Pick up the cube and lift it."
+        elif name == "umi_cup_in_the_wild":
+            single_task = "Put the cup on the plate."
         else:
             tasks_col = "language_instruction"
 
@@ -157,8 +171,8 @@ def batch_convert():
             status = f"{repo_id}: success."
             with open(logfile, "a") as file:
                 file.write(status + "\n")
-        except Exception as e:
-            status = f"{repo_id}: {e}"
+        except Exception:
+            status = f"{repo_id}: failed\n    {traceback.format_exc()}"
             with open(logfile, "a") as file:
                 file.write(status + "\n")
             continue

From be64d54bd9e5efd0dae6c899d5a7fc05bf463529 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 18 Oct 2024 00:29:50 +0200
Subject: [PATCH 25/59] Update doc

---
 lerobot/common/datasets/lerobot_dataset.py | 38 ++++++++++++++--------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 6b149554..e2b65a19 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -83,25 +83,37 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
         In terms of files, a typical LeRobotDataset looks like this from its root path:
         .
-        ├── README.md
         ├── data
-        │   ├── train-00000-of-00050.parquet
-        │   ├── train-00001-of-00050.parquet
-        │   ├── train-00002-of-00050.parquet
-        │   ...
+        │   ├── chunk-000
+        │   │   ├── train-00000-of-03603.parquet
+        │   │   ├── train-00001-of-03603.parquet
+        │   │   ├── train-00002-of-03603.parquet
+        │   │   └── ...
+        │   ├── chunk-001
+        │   │   ├── train-01000-of-03603.parquet
+        │   │   ├── train-01001-of-03603.parquet
+        │   │   ├── train-01002-of-03603.parquet
+        │   │   └── ...
+        │   └── ...
         ├── meta
+        │   ├── episodes.jsonl
         │   ├── info.json
         │   ├── stats.json
         │   └── tasks.json
         └── videos (optional)
-            ├── observation.images.laptop_episode_000000.mp4
-            ├── observation.images.laptop_episode_000001.mp4
-            ├── observation.images.laptop_episode_000002.mp4
-            ...
-            ├── observation.images.phone_episode_000000.mp4
-            ├── observation.images.phone_episode_000001.mp4
-            ├── observation.images.phone_episode_000002.mp4
-            ...
+            ├── chunk-000
+            │   ├── observation.images.laptop
+            │   │   ├── episode_000000.mp4
+            │   │   ├── episode_000001.mp4
+            │   │   ├── episode_000002.mp4
+            │   │   └── ...
+            │   ├── observation.images.phone
+            │   │   ├── episode_000000.mp4
+            │   │   ├── episode_000001.mp4
+            │   │   ├── episode_000002.mp4
+            │   │   └── ...
+            ├── chunk-001
+            └── ...
 
         Note that this file-based structure is designed to be as versatile as possible. The files are split by
         episodes which allows a more granular control over which episodes one wants to use and download. The

From beacb7e95796da092dfe870eaaff712ecd628169 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 18 Oct 2024 00:30:16 +0200
Subject: [PATCH 26/59] Cleanup

---
 .../v2/batch_convert_dataset_v1_to_v2.py      | 92 -------------------
 1 file changed, 92 deletions(-)

diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
index 00a47473..a964c226 100644
--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -1,95 +1,3 @@
-# 'lerobot/aloha_mobile_cabinet',
-# 'lerobot/aloha_mobile_chair',
-# 'lerobot/aloha_mobile_elevator',
-# 'lerobot/aloha_mobile_shrimp',
-# 'lerobot/aloha_mobile_wash_pan',
-# 'lerobot/aloha_mobile_wipe_wine',
-# 'lerobot/aloha_sim_insertion_human',
-# 'lerobot/aloha_sim_insertion_human_image',
-# 'lerobot/aloha_sim_insertion_scripted',
-# 'lerobot/aloha_sim_insertion_scripted_image',
-# 'lerobot/aloha_sim_transfer_cube_human',
-# 'lerobot/aloha_sim_transfer_cube_human_image',
-# 'lerobot/aloha_sim_transfer_cube_scripted',
-# 'lerobot/aloha_sim_transfer_cube_scripted_image',
-# 'lerobot/aloha_static_battery',
-# 'lerobot/aloha_static_candy',
-# 'lerobot/aloha_static_coffee',
-# 'lerobot/aloha_static_coffee_new',
-# 'lerobot/aloha_static_cups_open',
-# 'lerobot/aloha_static_fork_pick_up',
-# 'lerobot/aloha_static_pingpong_test',
-# 'lerobot/aloha_static_pro_pencil',
-# 'lerobot/aloha_static_screw_driver',
-# 'lerobot/aloha_static_tape',
-# 'lerobot/aloha_static_thread_velcro',
-# 'lerobot/aloha_static_towel',
-# 'lerobot/aloha_static_vinh_cup',
-# 'lerobot/aloha_static_vinh_cup_left',
-# 'lerobot/aloha_static_ziploc_slide',
-# 'lerobot/asu_table_top',
-# 'lerobot/austin_buds_dataset',
-# 'lerobot/austin_sailor_dataset',
-# 'lerobot/austin_sirius_dataset',
-# 'lerobot/berkeley_autolab_ur5',
-# 'lerobot/berkeley_cable_routing',
-# 'lerobot/berkeley_fanuc_manipulation',
-# 'lerobot/berkeley_gnm_cory_hall',
-# 'lerobot/berkeley_gnm_recon',
-# 'lerobot/berkeley_gnm_sac_son',
-# 'lerobot/berkeley_mvp',
-# 'lerobot/berkeley_rpt',
-# 'lerobot/cmu_franka_exploration_dataset',
-# 'lerobot/cmu_play_fusion',
-# 'lerobot/cmu_stretch',
-# 'lerobot/columbia_cairlab_pusht_real',
-# 'lerobot/conq_hose_manipulation',
-# 'lerobot/dlr_edan_shared_control',
-# 'lerobot/dlr_sara_grid_clamp',
-# 'lerobot/dlr_sara_pour',
-# 'lerobot/droid_100',
-# 'lerobot/fmb',
-# 'lerobot/iamlab_cmu_pickup_insert',
-# 'lerobot/imperialcollege_sawyer_wrist_cam',
-# 'lerobot/jaco_play',
-# 'lerobot/kaist_nonprehensile',
-# 'lerobot/nyu_door_opening_surprising_effectiveness',
-# 'lerobot/nyu_franka_play_dataset',
-# 'lerobot/nyu_rot_dataset',
-# 'lerobot/pusht',
-# 'lerobot/pusht_image',
-# 'lerobot/roboturk',
-# 'lerobot/stanford_hydra_dataset',
-# 'lerobot/stanford_kuka_multimodal_dataset',
-# 'lerobot/stanford_robocook',
-# 'lerobot/taco_play',
-# 'lerobot/tokyo_u_lsmo',
-# 'lerobot/toto',
-# 'lerobot/ucsd_kitchen_dataset',
-# 'lerobot/ucsd_pick_and_place_dataset',
-# 'lerobot/uiuc_d3field',
-# 'lerobot/umi_cup_in_the_wild',
-# 'lerobot/unitreeh1_fold_clothes',
-# 'lerobot/unitreeh1_rearrange_objects',
-# 'lerobot/unitreeh1_two_robot_greeting',
-# 'lerobot/unitreeh1_warehouse',
-# 'lerobot/usc_cloth_sim',
-# 'lerobot/utaustin_mutex',
-# 'lerobot/utokyo_pr2_opening_fridge',
-# 'lerobot/utokyo_pr2_tabletop_manipulation',
-# 'lerobot/utokyo_saytap',
-# 'lerobot/utokyo_xarm_bimanual',
-# 'lerobot/utokyo_xarm_pick_and_place',
-# 'lerobot/viola',
-# 'lerobot/xarm_lift_medium',
-# 'lerobot/xarm_lift_medium_image',
-# 'lerobot/xarm_lift_medium_replay',
-# 'lerobot/xarm_lift_medium_replay_image',
-# 'lerobot/xarm_push_medium',
-# 'lerobot/xarm_push_medium_image',
-# 'lerobot/xarm_push_medium_replay',
-# 'lerobot/xarm_push_medium_replay_image',
-
 import traceback
 from pathlib import Path
 

From 3a9f9644299760dcae8c93a9980c8cea5e9bcf5f Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 18 Oct 2024 00:31:21 +0200
Subject: [PATCH 27/59] Add copyrights

---
 .../v2/batch_convert_dataset_v1_to_v2.py         | 16 ++++++++++++++++
 .../datasets/v2/convert_dataset_v1_to_v2.py      | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
index a964c226..37c9583d 100644
--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@@ -1,3 +1,19 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import traceback
 from pathlib import Path
 
diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index f0237a05..ee719cd2 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -1,3 +1,19 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """
 This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 1.6 to
 2.0. You will be required to provide the 'tasks', which is a short but accurate description in plain English

From 91e8ce772b97039cb8188e9ab17a5b856ae45cfa Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 18 Oct 2024 09:48:38 +0200
Subject: [PATCH 28/59] Remove caret requirement

---
 poetry.lock    | 4 ++--
 pyproject.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a8bd3268..57684e32 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -5389,7 +5389,7 @@ docs = ["sphinx", "sphinx-automodapi", "sphinx-rtd-theme"]
 name = "pyserial"
 version = "3.5"
 description = "Python Serial Port Extension"
-optional = false
+optional = true
 python-versions = "*"
 files = [
     {file = "pyserial-3.5-py2.py3-none-any.whl", hash = "sha256:c4451db6ba391ca6ca299fb3ec7bae67a5c55dde170964c7a14ceefec02f2cf0"},
@@ -7576,4 +7576,4 @@ xarm = ["gym-xarm"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "b79d32bec01c53a3ca48548b85e6f991c9d8fc091f3f528e0b54c6e9fac63ff9"
+content-hash = "a89f20d969c8cabb45732282832c7d997385399d436e45830a48584f11fa3135"
diff --git a/pyproject.toml b/pyproject.toml
index 85390c19..3ed68852 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,7 @@ pyrealsense2 = {version = ">=2.55.1.6486", markers = "sys_platform != 'darwin'",
 pyrender = {git = "https://github.com/mmatl/pyrender.git", markers = "sys_platform == 'linux'", optional = true}
 hello-robot-stretch-body = {version = ">=0.7.27", markers = "sys_platform == 'linux'", optional = true}
 pyserial = {version = ">=3.5", optional = true}
-jsonlines = "^4.0.0"
+jsonlines = ">=4.0.0"
 
 
 [tool.poetry.extras]

From e7355ba5954b5756b8465b9e63267d2e135eaae8 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 18 Oct 2024 12:03:29 +0200
Subject: [PATCH 29/59] Fix episodes.jsonl

---
 lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index ee719cd2..36113fb1 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -662,7 +662,7 @@ def convert_dataset(
 
     # Episodes
     episodes = [
-        {"episode_index": ep_idx, "tasks": [tasks_by_episodes[ep_idx]], "length": episode_lengths[ep_idx]}
+        {"episode_index": ep_idx, "tasks": tasks_by_episodes[ep_idx], "length": episode_lengths[ep_idx]}
         for ep_idx in episode_indices
     ]
     write_jsonlines(episodes, v20_dir / "meta" / "episodes.jsonl")

From 1a51505ec6bcb66ff208421500a6cc4500e60fe7 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 18 Oct 2024 14:48:34 +0200
Subject: [PATCH 30/59] Add download_metadata, move default paths

---
 lerobot/common/datasets/lerobot_dataset.py    | 91 ++++++++++---------
 .../datasets/v2/convert_dataset_v1_to_v2.py   | 41 ++++-----
 2 files changed, 68 insertions(+), 64 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index e2b65a19..4e100d1f 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -31,9 +31,7 @@ from lerobot.common.datasets.utils import (
     get_episode_data_index,
     get_hub_safe_version,
     load_hf_dataset,
-    load_info,
-    load_stats,
-    load_tasks,
+    load_metadata,
 )
 from lerobot.common.datasets.video_utils import VideoFrame, decode_video_frames_torchvision
 
@@ -41,6 +39,12 @@ from lerobot.common.datasets.video_utils import VideoFrame, decode_video_frames_
 CODEBASE_VERSION = "v2.0"
 LEROBOT_HOME = Path(os.getenv("LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser()
 
+DEFAULT_CHUNK_SIZE = 1000
+DEFAULT_VIDEO_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
+DEFAULT_PARQUET_PATH = (
+    "data/chunk-{episode_chunk:03d}/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
+)
+
 
 class LeRobotDataset(torch.utils.data.Dataset):
     def __init__(
@@ -70,7 +74,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             Instantiating this class with this 'repo_id' will download the dataset from that address and load
             it, pending your dataset is compliant with codebase_version v2.0. If your dataset has been created
             before this new format, you will be prompted to convert it using our conversion script from v1.6
-            to v2.0, which you can find at [TODO(aliberts): move conversion script & add location here].
+            to v2.0, which you can find at lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py.
 
         2. Your dataset already exists on your local disk in the 'root' folder:
             This is typically the case when you recorded your dataset locally and you may or may not have
@@ -139,7 +143,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 timestamps is separated to the next by 1/fps +/- tolerance_s. This also applies to frames
                 decoded from video files. It is also used to check that `delta_timestamps` (when provided) are
                 multiples of 1/fps. Defaults to 1e-4.
-            download_videos (bool, optional): Flag to download the videos. Defaults to True.
+            download_videos (bool, optional): Flag to download the videos. Note that when set to True but the
+                video files are already present on local disk, they won't be downloaded again. Defaults to
+                True.
             video_backend (str | None, optional): Video backend to use for decoding videos. There is currently
                 a single option which is the pyav decoder used by Torchvision. Defaults to pyav.
         """
@@ -157,9 +163,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # Load metadata
         self.root.mkdir(exist_ok=True, parents=True)
         self._version = get_hub_safe_version(repo_id, CODEBASE_VERSION)
-        self.info = load_info(repo_id, self._version, self.root)
-        self.stats = load_stats(repo_id, self._version, self.root)
-        self.tasks = load_tasks(repo_id, self._version, self.root)
+        self.download_metadata()
+        self.info, self.episode_dicts, self.stats, self.tasks = load_metadata(self.root)
 
         # Load actual data
         self.download_episodes()
@@ -185,6 +190,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
         #     - [ ] Update episode_index (arg update=True)
         #     - [ ] Update info.json (arg update=True)
 
+    def download_metadata(self) -> None:
+        snapshot_download(
+            self.repo_id,
+            repo_type="dataset",
+            revision=self._version,
+            local_dir=self.root,
+            allow_patterns="meta/",
+        )
+
     def download_episodes(self) -> None:
         """Downloads the dataset from the given 'repo_id' at the provided version. If 'episodes' is given, this
         will only download those episodes (selected by their episode_index). If 'episodes' is None, the whole
@@ -227,11 +241,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Formattable string for the video files."""
         return self.info["videos"]["videos_path"] if len(self.video_keys) > 0 else None
 
-    @property
-    def episode_dicts(self) -> list[dict]:
-        """List of dictionary containing information for each episode, indexed by episode_index."""
-        return self.info["episodes"]
-
     @property
     def fps(self) -> int:
         """Frames per second used during data collection."""
@@ -254,7 +263,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
     @property
     def camera_keys(self) -> list[str]:
-        """Keys to access image and video streams from cameras (regardless of their storage method)."""
+        """Keys to access visual modalities (regardless of their storage method)."""
         return self.image_keys + self.video_keys
 
     @property
@@ -277,6 +286,16 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Total number of episodes available."""
         return self.info["total_episodes"]
 
+    @property
+    def total_chunks(self) -> int:
+        """Total number of chunks (groups of episodes)."""
+        return self.info["total_chunks"]
+
+    @property
+    def chunks_size(self) -> int:
+        """Max number of episodes per chunk."""
+        return self.info["chunks_size"]
+
     @property
     def shapes(self) -> dict:
         """Shapes for the different features."""
@@ -397,42 +416,28 @@ class LeRobotDataset(torch.utils.data.Dataset):
         )
 
     @classmethod
-    def from_preloaded(
+    def create(
         cls,
-        repo_id: str = "from_preloaded",
+        repo_id: str,
         root: Path | None = None,
-        split: str = "train",
-        transform: callable = None,
+        image_transforms: Callable | None = None,
         delta_timestamps: dict[list[float]] | None = None,
-        # additional preloaded attributes
-        hf_dataset=None,
-        episode_data_index=None,
-        stats=None,
-        info=None,
-        videos_dir=None,
-        video_backend=None,
+        tolerance_s: float = 1e-4,
+        video_backend: str | None = None,
     ) -> "LeRobotDataset":
-        """Create a LeRobot Dataset from existing data and attributes instead of loading from the filesystem.
-
-        It is especially useful when converting raw data into LeRobotDataset before saving the dataset
-        on the filesystem or uploading to the hub.
-
-        Note: Meta-data attributes like `repo_id`, `version`, `root`, etc are optional and potentially
-        meaningless depending on the downstream usage of the return dataset.
-        """
+        """Create a LeRobot Dataset from scratch in order to record data."""
         # create an empty object of type LeRobotDataset
         obj = cls.__new__(cls)
         obj.repo_id = repo_id
-        obj.root = root
-        obj.split = split
-        obj.image_transforms = transform
-        obj.delta_timestamps = delta_timestamps
-        obj.hf_dataset = hf_dataset
-        obj.episode_data_index = episode_data_index
-        obj.stats = stats
-        obj.info = info if info is not None else {}
-        obj.videos_dir = videos_dir
-        obj.video_backend = video_backend if video_backend is not None else "pyav"
+        obj.root = root if root is not None else LEROBOT_HOME / repo_id
+        # obj.episodes = None
+        # obj.image_transforms = None
+        # obj.delta_timestamps = None
+        # obj.episode_data_index = episode_data_index
+        # obj.stats = stats
+        # obj.info = info if info is not None else {}
+        # obj.videos_dir = videos_dir
+        # obj.video_backend = video_backend if video_backend is not None else "pyav"
         return obj
 
 
diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index 36113fb1..a498f9c1 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -120,6 +120,11 @@ from huggingface_hub.errors import EntryNotFoundError
 from PIL import Image
 from safetensors.torch import load_file
 
+from lerobot.common.datasets.lerobot_dataset import (
+    DEFAULT_CHUNK_SIZE,
+    DEFAULT_PARQUET_PATH,
+    DEFAULT_VIDEO_PATH,
+)
 from lerobot.common.datasets.utils import create_branch, flatten_dict, get_hub_safe_version, unflatten_dict
 from lerobot.common.utils.utils import init_hydra_config
 from lerobot.scripts.push_dataset_to_hub import push_dataset_card_to_hub
@@ -127,15 +132,8 @@ from lerobot.scripts.push_dataset_to_hub import push_dataset_card_to_hub
 V16 = "v1.6"
 V20 = "v2.0"
 
-EPISODE_CHUNK_SIZE = 1000
-
 GITATTRIBUTES_REF = "aliberts/gitattributes_reference"
-
 VIDEO_FILE = "{video_key}_episode_{episode_index:06d}.mp4"
-PARQUET_CHUNK_PATH = (
-    "data/chunk-{episode_chunk:03d}/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
-)
-VIDEO_CHUNK_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
 
 
 def parse_robot_config(config_path: Path, config_overrides: list[str] | None = None) -> tuple[str, dict]:
@@ -269,15 +267,15 @@ def split_parquet_by_episodes(
     table = dataset.remove_columns(keys["video"])._data.table
     episode_lengths = []
     for ep_chunk in range(total_chunks):
-        ep_chunk_start = EPISODE_CHUNK_SIZE * ep_chunk
-        ep_chunk_end = min(EPISODE_CHUNK_SIZE * (ep_chunk + 1), total_episodes)
+        ep_chunk_start = DEFAULT_CHUNK_SIZE * ep_chunk
+        ep_chunk_end = min(DEFAULT_CHUNK_SIZE * (ep_chunk + 1), total_episodes)
 
-        chunk_dir = "/".join(PARQUET_CHUNK_PATH.split("/")[:-1]).format(episode_chunk=ep_chunk)
+        chunk_dir = "/".join(DEFAULT_PARQUET_PATH.split("/")[:-1]).format(episode_chunk=ep_chunk)
         (output_dir / chunk_dir).mkdir(parents=True, exist_ok=True)
         for ep_idx in range(ep_chunk_start, ep_chunk_end):
             ep_table = table.filter(pc.equal(table["episode_index"], ep_idx))
             episode_lengths.insert(ep_idx, len(ep_table))
-            output_file = output_dir / PARQUET_CHUNK_PATH.format(
+            output_file = output_dir / DEFAULT_PARQUET_PATH.format(
                 episode_chunk=ep_chunk, episode_index=ep_idx, total_episodes=total_episodes
             )
             pq.write_table(ep_table, output_file)
@@ -323,16 +321,16 @@ def move_videos(
 
     video_dirs = sorted(work_dir.glob("videos*/"))
     for ep_chunk in range(total_chunks):
-        ep_chunk_start = EPISODE_CHUNK_SIZE * ep_chunk
-        ep_chunk_end = min(EPISODE_CHUNK_SIZE * (ep_chunk + 1), total_episodes)
+        ep_chunk_start = DEFAULT_CHUNK_SIZE * ep_chunk
+        ep_chunk_end = min(DEFAULT_CHUNK_SIZE * (ep_chunk + 1), total_episodes)
         for vid_key in video_keys:
-            chunk_dir = "/".join(VIDEO_CHUNK_PATH.split("/")[:-1]).format(
+            chunk_dir = "/".join(DEFAULT_VIDEO_PATH.split("/")[:-1]).format(
                 episode_chunk=ep_chunk, video_key=vid_key
             )
             (work_dir / chunk_dir).mkdir(parents=True, exist_ok=True)
 
             for ep_idx in range(ep_chunk_start, ep_chunk_end):
-                target_path = VIDEO_CHUNK_PATH.format(
+                target_path = DEFAULT_VIDEO_PATH.format(
                     episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_idx
                 )
                 video_file = VIDEO_FILE.format(video_key=vid_key, episode_index=ep_idx)
@@ -476,11 +474,12 @@ def _get_video_info(video_path: Path | str) -> dict:
 
 def get_videos_info(repo_id: str, local_dir: Path, video_keys: list[str], branch: str) -> dict:
     hub_api = HfApi()
-    videos_info_dict = {"videos_path": VIDEO_CHUNK_PATH}
+    videos_info_dict = {"videos_path": DEFAULT_VIDEO_PATH}
 
     # Assumes first episode
     video_files = [
-        VIDEO_CHUNK_PATH.format(episode_chunk=0, video_key=vid_key, episode_index=0) for vid_key in video_keys
+        DEFAULT_VIDEO_PATH.format(episode_chunk=0, video_key=vid_key, episode_index=0)
+        for vid_key in video_keys
     ]
     hub_api.snapshot_download(
         repo_id=repo_id, repo_type="dataset", local_dir=local_dir, revision=branch, allow_patterns=video_files
@@ -587,8 +586,8 @@ def convert_dataset(
     total_episodes = len(episode_indices)
     assert episode_indices == list(range(total_episodes))
     total_videos = total_episodes * len(keys["video"])
-    total_chunks = total_episodes // EPISODE_CHUNK_SIZE
-    if total_episodes % EPISODE_CHUNK_SIZE != 0:
+    total_chunks = total_episodes // DEFAULT_CHUNK_SIZE
+    if total_episodes % DEFAULT_CHUNK_SIZE != 0:
         total_chunks += 1
 
     # Tasks
@@ -670,14 +669,14 @@ def convert_dataset(
     # Assemble metadata v2.0
     metadata_v2_0 = {
         "codebase_version": V20,
-        "data_path": PARQUET_CHUNK_PATH,
+        "data_path": DEFAULT_PARQUET_PATH,
         "robot_type": robot_type,
         "total_episodes": total_episodes,
         "total_frames": len(dataset),
         "total_tasks": len(tasks),
         "total_videos": total_videos,
         "total_chunks": total_chunks,
-        "chunks_size": EPISODE_CHUNK_SIZE,
+        "chunks_size": DEFAULT_CHUNK_SIZE,
         "fps": metadata_v1["fps"],
         "splits": {"train": f"0:{total_episodes}"},
         "keys": keys["sequence"],

From bce3dc3bfae585cd851c3088b7d4cc1f5445d512 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 18 Oct 2024 14:59:09 +0200
Subject: [PATCH 31/59] Add load_metadata

---
 lerobot/common/datasets/utils.py | 63 +++++++++++---------------------
 1 file changed, 21 insertions(+), 42 deletions(-)

diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index fbf4dd5f..bebc3c6f 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -15,16 +15,16 @@
 # limitations under the License.
 import json
 import warnings
-from functools import cache
 from itertools import accumulate
 from pathlib import Path
 from pprint import pformat
 from typing import Dict
 
 import datasets
+import jsonlines
 import torch
 from datasets import load_dataset
-from huggingface_hub import DatasetCard, HfApi, hf_hub_download
+from huggingface_hub import DatasetCard, HfApi
 from PIL import Image as PILImage
 from torchvision import transforms
 
@@ -96,7 +96,6 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
     return items_dict
 
 
-@cache
 def get_hub_safe_version(repo_id: str, version: str, enforce_v2: bool = True) -> str:
     num_version = float(version.strip("v"))
     if num_version < 2 and enforce_v2:
@@ -144,50 +143,30 @@ def load_hf_dataset(
     return hf_dataset
 
 
-def load_stats(repo_id: str, version: str, local_dir: Path) -> dict[str, dict[str, torch.Tensor]]:
-    """stats contains the statistics per modality computed over the full dataset, such as max, min, mean, std
+def load_metadata(local_dir: Path) -> tuple[dict | list]:
+    """Loads metadata files from a dataset."""
+    info_path = local_dir / "meta/info.json"
+    episodes_path = local_dir / "meta/episodes.jsonl"
+    stats_path = local_dir / "meta/stats.json"
+    tasks_path = local_dir / "meta/tasks.json"
 
-    Example:
-    ```python
-    normalized_action = (action - stats["action"]["mean"]) / stats["action"]["std"]
-    ```
-    """
-    fpath = hf_hub_download(
-        repo_id, filename="meta/stats.json", local_dir=local_dir, repo_type="dataset", revision=version
-    )
-    with open(fpath) as f:
+    with open(info_path) as f:
+        info = json.load(f)
+
+    with jsonlines.open(episodes_path, "r") as reader:
+        episode_dicts = list(reader)
+
+    with open(stats_path) as f:
         stats = json.load(f)
 
-    stats = flatten_dict(stats)
-    stats = {key: torch.tensor(value) for key, value in stats.items()}
-    return unflatten_dict(stats)
-
-
-def load_info(repo_id: str, version: str, local_dir: Path) -> dict:
-    """info contains structural information about the dataset. It should be the reference and
-    act as the 'source of thruth' for what's inside the dataset.
-
-    Example:
-    ```python
-    print("frame per second used to collect the video", info["fps"])
-    ```
-    """
-    fpath = hf_hub_download(
-        repo_id, filename="meta/info.json", local_dir=local_dir, repo_type="dataset", revision=version
-    )
-    with open(fpath) as f:
-        return json.load(f)
-
-
-def load_tasks(repo_id: str, version: str, local_dir: Path) -> dict:
-    """tasks contains all the tasks of the dataset, indexed by their task_index."""
-    fpath = hf_hub_download(
-        repo_id, filename="meta/tasks.json", local_dir=local_dir, repo_type="dataset", revision=version
-    )
-    with open(fpath) as f:
+    with open(tasks_path) as f:
         tasks = json.load(f)
 
-    return {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])}
+    stats = {key: torch.tensor(value) for key, value in flatten_dict(stats).items()}
+    stats = unflatten_dict(stats)
+    tasks = {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])}
+
+    return info, episode_dicts, stats, tasks
 
 
 def get_episode_data_index(episodes: list, episode_dicts: list[dict]) -> dict[str, torch.Tensor]:

From ac3798bd62e3f5fc888b4203a7a1c9e07abfc8fb Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Fri, 18 Oct 2024 17:53:25 +0200
Subject: [PATCH 32/59] Move default paths, use jsonlines for tasks

---
 lerobot/common/datasets/lerobot_dataset.py    | 26 ++++++++------
 lerobot/common/datasets/utils.py              | 35 +++++++++++++++++--
 .../datasets/v2/convert_dataset_v1_to_v2.py   | 11 +++---
 3 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 4e100d1f..cda0412f 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
 import logging
 import os
 from pathlib import Path
@@ -27,6 +28,7 @@ from lerobot.common.datasets.compute_stats import aggregate_stats
 from lerobot.common.datasets.utils import (
     check_delta_timestamps,
     check_timestamps_sync,
+    create_dataset_info,
     get_delta_indices,
     get_episode_data_index,
     get_hub_safe_version,
@@ -34,17 +36,12 @@ from lerobot.common.datasets.utils import (
     load_metadata,
 )
 from lerobot.common.datasets.video_utils import VideoFrame, decode_video_frames_torchvision
+from lerobot.common.robot_devices.robots.utils import Robot
 
 # For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md
 CODEBASE_VERSION = "v2.0"
 LEROBOT_HOME = Path(os.getenv("LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser()
 
-DEFAULT_CHUNK_SIZE = 1000
-DEFAULT_VIDEO_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
-DEFAULT_PARQUET_PATH = (
-    "data/chunk-{episode_chunk:03d}/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
-)
-
 
 class LeRobotDataset(torch.utils.data.Dataset):
     def __init__(
@@ -400,6 +397,10 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
         return item
 
+    def write_info(self) -> None:
+        with open(self.root / "meta/info.json", "w") as f:
+            json.dump(self.info, f, indent=4, ensure_ascii=False)
+
     def __repr__(self):
         return (
             f"{self.__class__.__name__}(\n"
@@ -419,17 +420,22 @@ class LeRobotDataset(torch.utils.data.Dataset):
     def create(
         cls,
         repo_id: str,
+        fps: int,
+        robot: Robot,
         root: Path | None = None,
-        image_transforms: Callable | None = None,
-        delta_timestamps: dict[list[float]] | None = None,
         tolerance_s: float = 1e-4,
-        video_backend: str | None = None,
     ) -> "LeRobotDataset":
         """Create a LeRobot Dataset from scratch in order to record data."""
-        # create an empty object of type LeRobotDataset
         obj = cls.__new__(cls)
         obj.repo_id = repo_id
         obj.root = root if root is not None else LEROBOT_HOME / repo_id
+        obj._version = CODEBASE_VERSION
+
+        obj.root.mkdir(exist_ok=True, parents=True)
+        obj.info = create_dataset_info(obj._version, fps, robot)
+        obj.write_info()
+        obj.fps = fps
+
         # obj.episodes = None
         # obj.image_transforms = None
         # obj.delta_timestamps = None
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index bebc3c6f..c80838e6 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -28,6 +28,13 @@ from huggingface_hub import DatasetCard, HfApi
 from PIL import Image as PILImage
 from torchvision import transforms
 
+from lerobot.common.robot_devices.robots.utils import Robot
+
+DEFAULT_CHUNK_SIZE = 1000  # Max number of episodes per chunk
+DEFAULT_VIDEO_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
+DEFAULT_PARQUET_PATH = (
+    "data/chunk-{episode_chunk:03d}/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
+)
 DATASET_CARD_TEMPLATE = """
 ---
 # Metadata will go there
@@ -145,7 +152,7 @@ def load_hf_dataset(
 
 def load_metadata(local_dir: Path) -> tuple[dict | list]:
     """Loads metadata files from a dataset."""
-    info_path = local_dir / "meta/info.json"
+    info_path = local_dir / "meta/info.jsonl"
     episodes_path = local_dir / "meta/episodes.jsonl"
     stats_path = local_dir / "meta/stats.json"
     tasks_path = local_dir / "meta/tasks.json"
@@ -159,8 +166,8 @@ def load_metadata(local_dir: Path) -> tuple[dict | list]:
     with open(stats_path) as f:
         stats = json.load(f)
 
-    with open(tasks_path) as f:
-        tasks = json.load(f)
+    with jsonlines.open(tasks_path, "r") as reader:
+        tasks = list(reader)
 
     stats = {key: torch.tensor(value) for key, value in flatten_dict(stats).items()}
     stats = unflatten_dict(stats)
@@ -169,6 +176,28 @@ def load_metadata(local_dir: Path) -> tuple[dict | list]:
     return info, episode_dicts, stats, tasks
 
 
+def create_dataset_info(codebase_version: str, fps: int, robot: Robot) -> dict:
+    return {
+        "codebase_version": codebase_version,
+        "data_path": DEFAULT_PARQUET_PATH,
+        "robot_type": robot.robot_type,
+        "total_episodes": 0,
+        "total_frames": 0,
+        "total_tasks": 0,
+        "total_videos": 0,
+        "total_chunks": 0,
+        "chunks_size": DEFAULT_CHUNK_SIZE,
+        "fps": fps,
+        "splits": {},
+        # "keys": keys,
+        # "video_keys": video_keys,
+        # "image_keys": image_keys,
+        # "shapes": {**sequence_shapes, **video_shapes, **image_shapes},
+        # "names": names,
+        # "videos": {"videos_path": DEFAULT_VIDEO_PATH} if video_keys else None,
+    }
+
+
 def get_episode_data_index(episodes: list, episode_dicts: list[dict]) -> dict[str, torch.Tensor]:
     episode_lengths = {ep_idx: ep_dict["length"] for ep_idx, ep_dict in enumerate(episode_dicts)}
     if episodes is not None:
diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index a498f9c1..4342ad6c 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -120,12 +120,15 @@ from huggingface_hub.errors import EntryNotFoundError
 from PIL import Image
 from safetensors.torch import load_file
 
-from lerobot.common.datasets.lerobot_dataset import (
+from lerobot.common.datasets.utils import (
     DEFAULT_CHUNK_SIZE,
     DEFAULT_PARQUET_PATH,
     DEFAULT_VIDEO_PATH,
+    create_branch,
+    flatten_dict,
+    get_hub_safe_version,
+    unflatten_dict,
 )
-from lerobot.common.datasets.utils import create_branch, flatten_dict, get_hub_safe_version, unflatten_dict
 from lerobot.common.utils.utils import init_hydra_config
 from lerobot.scripts.push_dataset_to_hub import push_dataset_card_to_hub
 
@@ -607,8 +610,8 @@ def convert_dataset(
         raise ValueError
 
     assert set(tasks) == {task for ep_tasks in tasks_by_episodes.values() for task in ep_tasks}
-    task_json = [{"task_index": task_idx, "task": task} for task_idx, task in enumerate(tasks)]
-    write_json(task_json, v20_dir / "meta" / "tasks.json")
+    tasks = [{"task_index": task_idx, "task": task} for task_idx, task in enumerate(tasks)]
+    write_jsonlines(tasks, v20_dir / "meta" / "tasks.json")
 
     # Shapes
     sequence_shapes = {key: dataset.features[key].length for key in keys["sequence"]}

From 9316cf46ef4f7e2473d1d3e605d6fe1da4e6310d Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Sun, 20 Oct 2024 14:00:19 +0200
Subject: [PATCH 33/59] Add file paths

---
 lerobot/common/datasets/lerobot_dataset.py | 70 ++++++++++++++++------
 lerobot/common/datasets/utils.py           | 43 +++----------
 2 files changed, 60 insertions(+), 53 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index cda0412f..43d8708d 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -22,6 +22,7 @@ from typing import Callable
 import datasets
 import torch
 import torch.utils
+from datasets import load_dataset
 from huggingface_hub import snapshot_download
 
 from lerobot.common.datasets.compute_stats import aggregate_stats
@@ -32,7 +33,7 @@ from lerobot.common.datasets.utils import (
     get_delta_indices,
     get_episode_data_index,
     get_hub_safe_version,
-    load_hf_dataset,
+    hf_transform_to_torch,
     load_metadata,
 )
 from lerobot.common.datasets.video_utils import VideoFrame, decode_video_frames_torchvision
@@ -100,7 +101,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         │   ├── episodes.jsonl
         │   ├── info.json
         │   ├── stats.json
-        │   └── tasks.json
+        │   └── tasks.jsonl
         └── videos (optional)
             ├── chunk-000
             │   ├── observation.images.laptop
@@ -160,12 +161,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # Load metadata
         self.root.mkdir(exist_ok=True, parents=True)
         self._version = get_hub_safe_version(repo_id, CODEBASE_VERSION)
-        self.download_metadata()
+        self.pull_from_repo(allow_patterns="meta/")
         self.info, self.episode_dicts, self.stats, self.tasks = load_metadata(self.root)
 
         # Load actual data
         self.download_episodes()
-        self.hf_dataset = load_hf_dataset(self.root, self.data_path, self.total_episodes, self.episodes)
+        self.hf_dataset = self.load_hf_dataset()
         self.episode_data_index = get_episode_data_index(self.episodes, self.episode_dicts)
 
         # Check timestamps
@@ -187,13 +188,18 @@ class LeRobotDataset(torch.utils.data.Dataset):
         #     - [ ] Update episode_index (arg update=True)
         #     - [ ] Update info.json (arg update=True)
 
-    def download_metadata(self) -> None:
+    def pull_from_repo(
+        self,
+        allow_patterns: list[str] | str | None = None,
+        ignore_patterns: list[str] | str | None = None,
+    ) -> None:
         snapshot_download(
             self.repo_id,
             repo_type="dataset",
             revision=self._version,
             local_dir=self.root,
-            allow_patterns="meta/",
+            allow_patterns=allow_patterns,
+            ignore_patterns=ignore_patterns,
         )
 
     def download_episodes(self) -> None:
@@ -207,26 +213,46 @@ class LeRobotDataset(torch.utils.data.Dataset):
         files = None
         ignore_patterns = None if self.download_videos else "videos/"
         if self.episodes is not None:
-            files = [
-                self.data_path.format(episode_index=ep_idx, total_episodes=self.total_episodes)
-                for ep_idx in self.episodes
-            ]
+            files = [self.get_data_file_path(ep_idx) for ep_idx in self.episodes]
             if len(self.video_keys) > 0 and self.download_videos:
                 video_files = [
-                    self.videos_path.format(video_key=vid_key, episode_index=ep_idx)
+                    self.get_video_file_path(ep_idx, vid_key)
                     for vid_key in self.video_keys
                     for ep_idx in self.episodes
                 ]
                 files += video_files
 
-        snapshot_download(
-            self.repo_id,
-            repo_type="dataset",
-            revision=self._version,
-            local_dir=self.root,
-            allow_patterns=files,
-            ignore_patterns=ignore_patterns,
+        self.pull_from_repo(allow_patterns=files, ignore_patterns=ignore_patterns)
+
+    def load_hf_dataset(self) -> datasets.Dataset:
+        """hf_dataset contains all the observations, states, actions, rewards, etc."""
+        if self.episodes is None:
+            path = str(self.root / "data")
+            hf_dataset = load_dataset("parquet", data_dir=path, split="train")
+        else:
+            files = [self.get_data_file_path(ep_idx) for ep_idx in self.episodes]
+            hf_dataset = load_dataset("parquet", data_files=files, split="train")
+
+        hf_dataset.set_transform(hf_transform_to_torch)
+        return hf_dataset
+
+    def get_data_file_path(self, ep_index: int, return_str: bool = True) -> str | Path:
+        ep_chunk = self.get_episode_chunk(ep_index)
+        fpath = self.data_path.format(
+            episode_chunk=ep_chunk, episode_index=ep_index, total_episodes=self.total_episodes
         )
+        return str(self.root / fpath) if return_str else self.root / fpath
+
+    def get_video_file_path(self, ep_index: int, vid_key: str, return_str: bool = True) -> str | Path:
+        ep_chunk = self.get_episode_chunk(ep_index)
+        fpath = self.videos_path.format(episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_index)
+        return str(self.root / fpath) if return_str else self.root / fpath
+
+    def get_episode_chunk(self, ep_index: int) -> int:
+        ep_chunk = ep_index // self.chunks_size
+        if ep_index > 0 and ep_index % self.chunks_size == 0:
+            ep_chunk -= 1
+        return ep_chunk
 
     @property
     def data_path(self) -> str:
@@ -355,7 +381,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """
         item = {}
         for vid_key, query_ts in query_timestamps.items():
-            video_path = self.root / self.videos_path.format(video_key=vid_key, episode_index=ep_idx)
+            video_path = self.root / self.get_video_file_path(ep_idx, vid_key)
             frames = decode_video_frames_torchvision(
                 video_path, query_ts, self.tolerance_s, self.video_backend
             )
@@ -436,6 +462,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
         obj.write_info()
         obj.fps = fps
 
+        if not all(cam.fps == fps for cam in robot.cameras):
+            logging.warn(
+                f"Some cameras in your {robot.robot_type} robot don't have an fps matching the fps of your dataset."
+                "In this case, frames from lower fps cameras will be repeated to fill in the blanks"
+            )
+
         # obj.episodes = None
         # obj.image_transforms = None
         # obj.delta_timestamps = None
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index c80838e6..90bb35c1 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -23,7 +23,6 @@ from typing import Dict
 import datasets
 import jsonlines
 import torch
-from datasets import load_dataset
 from huggingface_hub import DatasetCard, HfApi
 from PIL import Image as PILImage
 from torchvision import transforms
@@ -87,15 +86,6 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
         if isinstance(first_item, PILImage.Image):
             to_tensor = transforms.ToTensor()
             items_dict[key] = [to_tensor(img) for img in items_dict[key]]
-        # TODO(aliberts): remove this part as we'll be using task_index
-        elif isinstance(first_item, str):
-            # TODO (michel-aractingi): add str2embedding via language tokenizer
-            # For now we leave this part up to the user to choose how to address
-            # language conditioned tasks
-            pass
-        elif isinstance(first_item, dict) and "path" in first_item and "timestamp" in first_item:
-            # video frame will be processed downstream
-            pass
         elif first_item is None:
             pass
         else:
@@ -130,32 +120,12 @@ def get_hub_safe_version(repo_id: str, version: str, enforce_v2: bool = True) ->
         return version
 
 
-def load_hf_dataset(
-    local_dir: Path,
-    data_path: str,
-    total_episodes: int,
-    episodes: list[int] | None = None,
-    split: str = "train",
-) -> datasets.Dataset:
-    """hf_dataset contains all the observations, states, actions, rewards, etc."""
-    if episodes is None:
-        path = str(local_dir / "data")
-        hf_dataset = load_dataset("parquet", data_dir=path, split=split)
-    else:
-        files = [data_path.format(episode_index=ep_idx, total_episodes=total_episodes) for ep_idx in episodes]
-        files = [str(local_dir / fpath) for fpath in files]
-        hf_dataset = load_dataset("parquet", data_files=files, split=split)
-
-    hf_dataset.set_transform(hf_transform_to_torch)
-    return hf_dataset
-
-
 def load_metadata(local_dir: Path) -> tuple[dict | list]:
     """Loads metadata files from a dataset."""
-    info_path = local_dir / "meta/info.jsonl"
+    info_path = local_dir / "meta/info.json"
     episodes_path = local_dir / "meta/episodes.jsonl"
     stats_path = local_dir / "meta/stats.json"
-    tasks_path = local_dir / "meta/tasks.json"
+    tasks_path = local_dir / "meta/tasks.jsonl"
 
     with open(info_path) as f:
         info = json.load(f)
@@ -499,12 +469,17 @@ def create_branch(repo_id, *, branch: str, repo_type: str | None = None):
     api.create_branch(repo_id, repo_type=repo_type, branch=branch)
 
 
-def create_lerobot_dataset_card(tags: list | None = None, text: str | None = None) -> DatasetCard:
+def create_lerobot_dataset_card(
+    tags: list | None = None, text: str | None = None, info: dict | None = None
+) -> DatasetCard:
     card = DatasetCard(DATASET_CARD_TEMPLATE)
     card.data.task_categories = ["robotics"]
     card.data.tags = ["LeRobot"]
     if tags is not None:
         card.data.tags += tags
     if text is not None:
-        card.text += text
+        card.text += f"{text}\n"
+    if info is not None:
+        card.text += "[meta/info.json](meta/info.json)\n"
+        card.text += f"```json\n{json.dumps(info, indent=4)}\n```"
     return card

From e46bdb9d3074454292c33892b828ad94204458e3 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Sun, 20 Oct 2024 14:01:10 +0200
Subject: [PATCH 34/59] Change card creation

---
 lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index 4342ad6c..65a2061e 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -125,12 +125,12 @@ from lerobot.common.datasets.utils import (
     DEFAULT_PARQUET_PATH,
     DEFAULT_VIDEO_PATH,
     create_branch,
+    create_lerobot_dataset_card,
     flatten_dict,
     get_hub_safe_version,
     unflatten_dict,
 )
 from lerobot.common.utils.utils import init_hydra_config
-from lerobot.scripts.push_dataset_to_hub import push_dataset_card_to_hub
 
 V16 = "v1.6"
 V20 = "v2.0"
@@ -716,8 +716,9 @@ def convert_dataset(
         revision=branch,
     )
 
-    card_text = f"[meta/info.json](meta/info.json)\n```json\n{json.dumps(metadata_v2_0, indent=4)}\n```"
-    push_dataset_card_to_hub(repo_id=repo_id, revision=branch, tags=repo_tags, text=card_text)
+    card = create_lerobot_dataset_card(tags=repo_tags, info=metadata_v2_0)
+    card.push_to_hub(repo_id=repo_id, repo_type="dataset", revision=branch)
+
     if not test_branch:
         create_branch(repo_id=repo_id, branch=V20, repo_type="dataset")
 

From 3b925c3dce5a3b2f741b4335ff075f4d52152697 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Mon, 21 Oct 2024 00:15:09 +0200
Subject: [PATCH 35/59] Add ImageWriter

---
 lerobot/common/datasets/image_writer.py     | 130 ++++++++
 lerobot/common/datasets/populate_dataset.py | 352 ++++++--------------
 2 files changed, 231 insertions(+), 251 deletions(-)
 create mode 100644 lerobot/common/datasets/image_writer.py

diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py
new file mode 100644
index 00000000..c87e342b
--- /dev/null
+++ b/lerobot/common/datasets/image_writer.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import multiprocessing
+from concurrent.futures import ThreadPoolExecutor, wait
+from pathlib import Path
+
+import torch
+import tqdm
+from PIL import Image
+
+DEFAULT_IMAGE_PATH = "images/{image_key}/episode_{episode_index:06d}/frame_{frame_index:06d}.png"
+
+
+def safe_stop_image_writer(func):
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except Exception as e:
+            dataset = kwargs.get("dataset", None)
+            image_writer = getattr(dataset, "image_writer", None) if dataset else None
+            if image_writer is not None:
+                print("Waiting for image writer to terminate...")
+                image_writer.stop()
+            raise e
+
+    return wrapper
+
+
+class ImageWriter:
+    """This class abstract away the initialisation of processes or/and threads to
+    save images on disk asynchrounously, which is critical to control a robot and record data
+    at a high frame rate.
+
+    When `num_processes=0`, it creates a threads pool of size `num_threads`.
+    When `num_processes>0`, it creates processes pool of size `num_processes`, where each subprocess starts
+    their own threads pool of size `num_threads`.
+
+    The optimal number of processes and threads depends on your computer capabilities.
+    We advise to use 4 threads per camera with 0 processes. If the fps is not stable, try to increase or lower
+    the number of threads. If it is still not stable, try to use 1 subprocess, or more.
+    """
+
+    def __init__(self, write_dir: Path, num_processes: int = 0, num_threads: int = 1):
+        self.dir = write_dir
+        self.image_path = DEFAULT_IMAGE_PATH
+        self.num_processes = num_processes
+        self.num_threads = self.num_threads_per_process = num_threads
+
+        if self.num_processes <= 0:
+            self.type = "threads"
+            self.threads = ThreadPoolExecutor(max_workers=self.num_threads)
+            self.futures = []
+        else:
+            self.type = "processes"
+            self.num_threads_per_process = self.num_threads
+            self.image_queue = multiprocessing.Queue()
+            self.processes: list[multiprocessing.Process] = []
+            for _ in range(num_processes):
+                process = multiprocessing.Process(target=self._loop_to_save_images_in_threads)
+                process.start()
+                self.processes.append(process)
+
+    def _loop_to_save_images_in_threads(self) -> None:
+        with ThreadPoolExecutor(max_workers=self.num_threads) as executor:
+            futures = []
+            while True:
+                frame_data = self.image_queue.get()
+                if frame_data is None:
+                    break
+
+                image, file_path = frame_data
+                futures.append(executor.submit(self._save_image, image, file_path))
+
+            with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
+                wait(futures)
+                progress_bar.update(len(futures))
+
+    def async_save_image(self, image: torch.Tensor, file_path: Path) -> None:
+        """Save an image asynchronously using threads or processes."""
+        if self.type == "threads":
+            self.futures.append(self.threads.submit(self._save_image, image, file_path))
+        else:
+            self.image_queue.put((image, file_path))
+
+    def _save_image(self, image: torch.Tensor, file_path: Path) -> None:
+        img = Image.fromarray(image.numpy())
+        img.save(str(file_path), quality=100)
+
+    def get_image_file_path(
+        self, episode_index: int, image_key: str, frame_index: int, return_str: bool = True
+    ) -> str | Path:
+        fpath = self.image_path.format(
+            image_key=image_key, episode_index=episode_index, frame_index=frame_index
+        )
+        return str(self.dir / fpath) if return_str else self.dir / fpath
+
+    def stop(self, timeout=20) -> None:
+        """Stop the image writer, waiting for all processes or threads to finish."""
+        if self.type == "threads":
+            with tqdm.tqdm(total=len(self.futures), desc="Writing images") as progress_bar:
+                wait(self.futures, timeout=timeout)
+                progress_bar.update(len(self.futures))
+        else:
+            self._stop_processes(self.processes, self.image_queue, timeout)
+
+    def _stop_processes(self, timeout) -> None:
+        for _ in self.processes:
+            self.image_queue.put(None)
+
+        for process in self.processes:
+            process.join(timeout=timeout)
+
+        if process.is_alive():
+            process.terminate()
+
+        self.image_queue.close()
+        self.image_queue.join_thread()
diff --git a/lerobot/common/datasets/populate_dataset.py b/lerobot/common/datasets/populate_dataset.py
index df5d20e5..854b639e 100644
--- a/lerobot/common/datasets/populate_dataset.py
+++ b/lerobot/common/datasets/populate_dataset.py
@@ -1,16 +1,12 @@
 """Functions to create an empty dataset, and populate it with frames."""
 # TODO(rcadene, aliberts): to adapt as class methods of next version of LeRobotDataset
 
-import concurrent
 import json
 import logging
-import multiprocessing
 import shutil
-from pathlib import Path
 
 import torch
 import tqdm
-from PIL import Image
 
 from lerobot.common.datasets.compute_stats import compute_stats
 from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
@@ -26,277 +22,131 @@ from lerobot.scripts.push_dataset_to_hub import (
     save_meta_data,
 )
 
-########################################################################################
-# Asynchrounous saving of images on disk
-########################################################################################
-
-
-def safe_stop_image_writer(func):
-    # TODO(aliberts): Allow to pass custom exceptions
-    # (e.g. ThreadServiceExit, KeyboardInterrupt, SystemExit, UnpluggedError, DynamixelCommError)
-    def wrapper(*args, **kwargs):
-        try:
-            return func(*args, **kwargs)
-        except Exception as e:
-            image_writer = kwargs.get("dataset", {}).get("image_writer")
-            if image_writer is not None:
-                print("Waiting for image writer to terminate...")
-                stop_image_writer(image_writer, timeout=20)
-            raise e
-
-    return wrapper
-
-
-def save_image(img_tensor, key, frame_index, episode_index, videos_dir: str):
-    img = Image.fromarray(img_tensor.numpy())
-    path = Path(videos_dir) / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
-    path.parent.mkdir(parents=True, exist_ok=True)
-    img.save(str(path), quality=100)
-
-
-def loop_to_save_images_in_threads(image_queue, num_threads):
-    if num_threads < 1:
-        raise NotImplementedError(f"Only `num_threads>=1` is supported for now, but {num_threads=} given.")
-
-    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
-        futures = []
-        while True:
-            # Blocks until a frame is available
-            frame_data = image_queue.get()
-
-            # As usually done, exit loop when receiving None to stop the worker
-            if frame_data is None:
-                break
-
-            image, key, frame_index, episode_index, videos_dir = frame_data
-            futures.append(executor.submit(save_image, image, key, frame_index, episode_index, videos_dir))
-
-        # Before exiting function, wait for all threads to complete
-        with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
-            concurrent.futures.wait(futures)
-            progress_bar.update(len(futures))
-
-
-def start_image_writer_processes(image_queue, num_processes, num_threads_per_process):
-    if num_processes < 1:
-        raise ValueError(f"Only `num_processes>=1` is supported, but {num_processes=} given.")
-
-    if num_threads_per_process < 1:
-        raise NotImplementedError(
-            "Only `num_threads_per_process>=1` is supported for now, but {num_threads_per_process=} given."
-        )
-
-    processes = []
-    for _ in range(num_processes):
-        process = multiprocessing.Process(
-            target=loop_to_save_images_in_threads,
-            args=(image_queue, num_threads_per_process),
-        )
-        process.start()
-        processes.append(process)
-    return processes
-
-
-def stop_processes(processes, queue, timeout):
-    # Send None to each process to signal them to stop
-    for _ in processes:
-        queue.put(None)
-
-    # Wait maximum 20 seconds for all processes to terminate
-    for process in processes:
-        process.join(timeout=timeout)
-
-    # If not terminated after 20 seconds, force termination
-    if process.is_alive():
-        process.terminate()
-
-    # Close the queue, no more items can be put in the queue
-    queue.close()
-
-    # Ensure all background queue threads have finished
-    queue.join_thread()
-
-
-def start_image_writer(num_processes, num_threads):
-    """This function abstract away the initialisation of processes or/and threads to
-    save images on disk asynchrounously, which is critical to control a robot and record data
-    at a high frame rate.
-
-    When `num_processes=0`, it returns a dictionary containing a threads pool of size `num_threads`.
-    When `num_processes>0`, it returns a dictionary containing a processes pool of size `num_processes`,
-    where each subprocess starts their own threads pool of size `num_threads`.
-
-    The optimal number of processes and threads depends on your computer capabilities.
-    We advise to use 4 threads per camera with 0 processes. If the fps is not stable, try to increase or lower
-    the number of threads. If it is still not stable, try to use 1 subprocess, or more.
-    """
-    image_writer = {}
-
-    if num_processes == 0:
-        futures = []
-        threads_pool = concurrent.futures.ThreadPoolExecutor(max_workers=num_threads)
-        image_writer["threads_pool"], image_writer["futures"] = threads_pool, futures
-    else:
-        # TODO(rcadene): When using num_processes>1, `multiprocessing.Manager().Queue()`
-        # might be better than `multiprocessing.Queue()`. Source: https://www.geeksforgeeks.org/python-multiprocessing-queue-vs-multiprocessing-manager-queue
-        image_queue = multiprocessing.Queue()
-        processes_pool = start_image_writer_processes(
-            image_queue, num_processes=num_processes, num_threads_per_process=num_threads
-        )
-        image_writer["processes_pool"], image_writer["image_queue"] = processes_pool, image_queue
-
-    return image_writer
-
-
-def async_save_image(image_writer, image, key, frame_index, episode_index, videos_dir):
-    """This function abstract away the saving of an image on disk asynchrounously. It uses a dictionary
-    called image writer which contains either a pool of processes or a pool of threads.
-    """
-    if "threads_pool" in image_writer:
-        threads_pool, futures = image_writer["threads_pool"], image_writer["futures"]
-        futures.append(threads_pool.submit(save_image, image, key, frame_index, episode_index, videos_dir))
-    else:
-        image_queue = image_writer["image_queue"]
-        image_queue.put((image, key, frame_index, episode_index, videos_dir))
-
-
-def stop_image_writer(image_writer, timeout):
-    if "threads_pool" in image_writer:
-        futures = image_writer["futures"]
-        # Before exiting function, wait for all threads to complete
-        with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
-            concurrent.futures.wait(futures, timeout=timeout)
-            progress_bar.update(len(futures))
-    else:
-        processes_pool, image_queue = image_writer["processes_pool"], image_writer["image_queue"]
-        stop_processes(processes_pool, image_queue, timeout=timeout)
-
-
 ########################################################################################
 # Functions to initialize, resume and populate a dataset
 ########################################################################################
 
 
-def init_dataset(
-    repo_id,
-    root,
-    force_override,
-    fps,
-    video,
-    write_images,
-    num_image_writer_processes,
-    num_image_writer_threads,
-):
-    local_dir = Path(root) / repo_id
-    if local_dir.exists() and force_override:
-        shutil.rmtree(local_dir)
+# def init_dataset(
+#     repo_id,
+#     root,
+#     force_override,
+#     fps,
+#     video,
+#     write_images,
+#     num_image_writer_processes,
+#     num_image_writer_threads,
+# ):
+#     local_dir = Path(root) / repo_id
+#     if local_dir.exists() and force_override:
+#         shutil.rmtree(local_dir)
 
-    episodes_dir = local_dir / "episodes"
-    episodes_dir.mkdir(parents=True, exist_ok=True)
+#     episodes_dir = local_dir / "episodes"
+#     episodes_dir.mkdir(parents=True, exist_ok=True)
 
-    videos_dir = local_dir / "videos"
-    videos_dir.mkdir(parents=True, exist_ok=True)
+#     videos_dir = local_dir / "videos"
+#     videos_dir.mkdir(parents=True, exist_ok=True)
 
-    # Logic to resume data recording
-    rec_info_path = episodes_dir / "data_recording_info.json"
-    if rec_info_path.exists():
-        with open(rec_info_path) as f:
-            rec_info = json.load(f)
-        num_episodes = rec_info["last_episode_index"] + 1
-    else:
-        num_episodes = 0
+#     # Logic to resume data recording
+#     rec_info_path = episodes_dir / "data_recording_info.json"
+#     if rec_info_path.exists():
+#         with open(rec_info_path) as f:
+#             rec_info = json.load(f)
+#         num_episodes = rec_info["last_episode_index"] + 1
+#     else:
+#         num_episodes = 0
 
-    dataset = {
-        "repo_id": repo_id,
-        "local_dir": local_dir,
-        "videos_dir": videos_dir,
-        "episodes_dir": episodes_dir,
-        "fps": fps,
-        "video": video,
-        "rec_info_path": rec_info_path,
-        "num_episodes": num_episodes,
-    }
+#     dataset = {
+#         "repo_id": repo_id,
+#         "local_dir": local_dir,
+#         "videos_dir": videos_dir,
+#         "episodes_dir": episodes_dir,
+#         "fps": fps,
+#         "video": video,
+#         "rec_info_path": rec_info_path,
+#         "num_episodes": num_episodes,
+#     }
 
-    if write_images:
-        # Initialize processes or/and threads dedicated to save images on disk asynchronously,
-        # which is critical to control a robot and record data at a high frame rate.
-        image_writer = start_image_writer(
-            num_processes=num_image_writer_processes,
-            num_threads=num_image_writer_threads,
-        )
-        dataset["image_writer"] = image_writer
+#     if write_images:
+#         # Initialize processes or/and threads dedicated to save images on disk asynchronously,
+#         # which is critical to control a robot and record data at a high frame rate.
+#         image_writer = start_image_writer(
+#             num_processes=num_image_writer_processes,
+#             num_threads=num_image_writer_threads,
+#         )
+#         dataset["image_writer"] = image_writer
 
-    return dataset
+#     return dataset
 
 
-def add_frame(dataset, observation, action):
-    if "current_episode" not in dataset:
-        # initialize episode dictionary
-        ep_dict = {}
-        for key in observation:
-            if key not in ep_dict:
-                ep_dict[key] = []
-        for key in action:
-            if key not in ep_dict:
-                ep_dict[key] = []
+# def add_frame(dataset, observation, action):
+#     if "current_episode" not in dataset:
+#         # initialize episode dictionary
+#         ep_dict = {}
+#         for key in observation:
+#             if key not in ep_dict:
+#                 ep_dict[key] = []
+#         for key in action:
+#             if key not in ep_dict:
+#                 ep_dict[key] = []
 
-        ep_dict["episode_index"] = []
-        ep_dict["frame_index"] = []
-        ep_dict["timestamp"] = []
-        ep_dict["next.done"] = []
+#         ep_dict["episode_index"] = []
+#         ep_dict["frame_index"] = []
+#         ep_dict["timestamp"] = []
+#         ep_dict["next.done"] = []
 
-        dataset["current_episode"] = ep_dict
-        dataset["current_frame_index"] = 0
+#         dataset["current_episode"] = ep_dict
+#         dataset["current_frame_index"] = 0
 
-    ep_dict = dataset["current_episode"]
-    episode_index = dataset["num_episodes"]
-    frame_index = dataset["current_frame_index"]
-    videos_dir = dataset["videos_dir"]
-    video = dataset["video"]
-    fps = dataset["fps"]
+#     ep_dict = dataset["current_episode"]
+#     episode_index = dataset["num_episodes"]
+#     frame_index = dataset["current_frame_index"]
+#     videos_dir = dataset["videos_dir"]
+#     video = dataset["video"]
+#     fps = dataset["fps"]
 
-    ep_dict["episode_index"].append(episode_index)
-    ep_dict["frame_index"].append(frame_index)
-    ep_dict["timestamp"].append(frame_index / fps)
-    ep_dict["next.done"].append(False)
+#     ep_dict["episode_index"].append(episode_index)
+#     ep_dict["frame_index"].append(frame_index)
+#     ep_dict["timestamp"].append(frame_index / fps)
+#     ep_dict["next.done"].append(False)
 
-    img_keys = [key for key in observation if "image" in key]
-    non_img_keys = [key for key in observation if "image" not in key]
+#     img_keys = [key for key in observation if "image" in key]
+#     non_img_keys = [key for key in observation if "image" not in key]
 
-    # Save all observed modalities except images
-    for key in non_img_keys:
-        ep_dict[key].append(observation[key])
+#     # Save all observed modalities except images
+#     for key in non_img_keys:
+#         ep_dict[key].append(observation[key])
 
-    # Save actions
-    for key in action:
-        ep_dict[key].append(action[key])
+#     # Save actions
+#     for key in action:
+#         ep_dict[key].append(action[key])
 
-    if "image_writer" not in dataset:
-        dataset["current_frame_index"] += 1
-        return
+#     if "image_writer" not in dataset:
+#         dataset["current_frame_index"] += 1
+#         return
 
-    # Save images
-    image_writer = dataset["image_writer"]
-    for key in img_keys:
-        imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
-        async_save_image(
-            image_writer,
-            image=observation[key],
-            key=key,
-            frame_index=frame_index,
-            episode_index=episode_index,
-            videos_dir=str(videos_dir),
-        )
+#     # Save images
+#     image_writer = dataset["image_writer"]
+#     for key in img_keys:
+#         imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
+#         async_save_image(
+#             image_writer,
+#             image=observation[key],
+#             key=key,
+#             frame_index=frame_index,
+#             episode_index=episode_index,
+#             videos_dir=str(videos_dir),
+#         )
 
-        if video:
-            fname = f"{key}_episode_{episode_index:06d}.mp4"
-            frame_info = {"path": f"videos/{fname}", "timestamp": frame_index / fps}
-        else:
-            frame_info = str(imgs_dir / f"frame_{frame_index:06d}.png")
+#         if video:
+#             fname = f"{key}_episode_{episode_index:06d}.mp4"
+#             frame_info = {"path": f"videos/{fname}", "timestamp": frame_index / fps}
+#         else:
+#             frame_info = str(imgs_dir / f"frame_{frame_index:06d}.png")
 
-        ep_dict[key].append(frame_info)
+#         ep_dict[key].append(frame_info)
 
-    dataset["current_frame_index"] += 1
+#     dataset["current_frame_index"] += 1
 
 
 def delete_current_episode(dataset):
@@ -449,7 +299,7 @@ def create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_s
     if "image_writer" in dataset:
         logging.info("Waiting for image writer to terminate...")
         image_writer = dataset["image_writer"]
-        stop_image_writer(image_writer, timeout=20)
+        image_writer.stop()
 
     lerobot_dataset = from_dataset_to_lerobot_dataset(dataset, play_sounds)
 

From c1232a01e2e2872e7250135d6a560f6cfef607b9 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Mon, 21 Oct 2024 00:16:52 +0200
Subject: [PATCH 36/59] Add add_frame, empty dataset creation

---
 lerobot/common/datasets/lerobot_dataset.py    | 79 ++++++++++++++++---
 lerobot/common/datasets/utils.py              | 34 ++++++--
 .../common/robot_devices/cameras/opencv.py    |  4 +
 lerobot/common/robot_devices/control_utils.py |  8 +-
 .../robot_devices/robots/manipulator.py       |  7 ++
 lerobot/scripts/control_robot.py              | 15 ++--
 6 files changed, 114 insertions(+), 33 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 43d8708d..61331c5a 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import logging
 import os
 from pathlib import Path
@@ -26,15 +25,17 @@ from datasets import load_dataset
 from huggingface_hub import snapshot_download
 
 from lerobot.common.datasets.compute_stats import aggregate_stats
+from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.utils import (
     check_delta_timestamps,
     check_timestamps_sync,
-    create_dataset_info,
+    create_empty_dataset_info,
     get_delta_indices,
     get_episode_data_index,
     get_hub_safe_version,
     hf_transform_to_torch,
     load_metadata,
+    write_json,
 )
 from lerobot.common.datasets.video_utils import VideoFrame, decode_video_frames_torchvision
 from lerobot.common.robot_devices.robots.utils import Robot
@@ -55,6 +56,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         tolerance_s: float = 1e-4,
         download_videos: bool = True,
         video_backend: str | None = None,
+        image_writer: ImageWriter | None = None,
     ):
         """LeRobotDataset encapsulates 3 main things:
             - metadata:
@@ -156,6 +158,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.tolerance_s = tolerance_s
         self.download_videos = download_videos
         self.video_backend = video_backend if video_backend is not None else "pyav"
+        self.image_writer = image_writer
+        self.episode_buffer = {}
         self.delta_indices = None
 
         # Load metadata
@@ -296,9 +300,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
     @property
     def num_samples(self) -> int:
-        """Number of samples/frames."""
+        """Number of samples/frames in selected episodes."""
         return len(self.hf_dataset)
 
+    @property
+    def total_frames(self) -> int:
+        """Total number of frames saved in this dataset."""
+        return self.info["total_frames"]
+
     @property
     def num_episodes(self) -> int:
         """Number of episodes selected."""
@@ -423,10 +432,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
         return item
 
-    def write_info(self) -> None:
-        with open(self.root / "meta/info.json", "w") as f:
-            json.dump(self.info, f, indent=4, ensure_ascii=False)
-
     def __repr__(self):
         return (
             f"{self.__class__.__name__}(\n"
@@ -442,6 +447,49 @@ class LeRobotDataset(torch.utils.data.Dataset):
             f")"
         )
 
+    def _create_episode_buffer(self) -> dict:
+        # TODO(aliberts): Handle resume
+        return {
+            "chunk": self.total_chunks,
+            "episode_index": self.total_episodes,
+            "size": 0,
+            "frame_index": [],
+            "timestamp": [],
+            "next.done": [],
+            **{key: [] for key in self.keys},
+        }
+
+    def add_frame(self, frame: dict) -> None:
+        frame_index = self.episode_buffer["size"]
+        self.episode_buffer["frame_index"].append(frame_index)
+        self.episode_buffer["timestamp"].append(frame_index / self.fps)
+        self.episode_buffer["next.done"].append(False)
+
+        # Save all observed modalities except images
+        for key in self.keys:
+            self.episode_buffer[key].append(frame[key])
+
+        self.episode_buffer["size"] += 1
+
+        if self.image_writer is None:
+            return
+
+        # Save images
+        for cam_key in self.camera_keys:
+            img_path = self.image_writer.get_image_file_path(
+                episode_index=self.episode_buffer["episode_index"],
+                image_key=cam_key,
+                frame_index=frame_index,
+                return_str=False,
+            )
+            if frame_index == 0:
+                img_path.parent.mkdir(parents=True, exist_ok=True)
+
+            self.image_writer.async_save_image(
+                image=frame[cam_key],
+                file_path=img_path,
+            )
+
     @classmethod
     def create(
         cls,
@@ -450,24 +498,29 @@ class LeRobotDataset(torch.utils.data.Dataset):
         robot: Robot,
         root: Path | None = None,
         tolerance_s: float = 1e-4,
+        image_writer: ImageWriter | None = None,
+        use_videos: bool = True,
     ) -> "LeRobotDataset":
         """Create a LeRobot Dataset from scratch in order to record data."""
         obj = cls.__new__(cls)
         obj.repo_id = repo_id
         obj.root = root if root is not None else LEROBOT_HOME / repo_id
         obj._version = CODEBASE_VERSION
+        obj.tolerance_s = tolerance_s
+        obj.image_writer = image_writer
 
-        obj.root.mkdir(exist_ok=True, parents=True)
-        obj.info = create_dataset_info(obj._version, fps, robot)
-        obj.write_info()
-        obj.fps = fps
-
-        if not all(cam.fps == fps for cam in robot.cameras):
+        if not all(cam.fps == fps for cam in robot.cameras.values()):
             logging.warn(
                 f"Some cameras in your {robot.robot_type} robot don't have an fps matching the fps of your dataset."
                 "In this case, frames from lower fps cameras will be repeated to fill in the blanks"
             )
 
+        obj.info = create_empty_dataset_info(obj._version, fps, robot, use_videos)
+        write_json(obj.info, obj.root / "meta/info.json")
+
+        # TODO(aliberts, rcadene, alexander-soare): Merge this with OnlineBuffer/DataBuffer
+        obj.episode_buffer = obj._create_episode_buffer()
+
         # obj.episodes = None
         # obj.image_transforms = None
         # obj.delta_timestamps = None
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index 90bb35c1..79459882 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -75,6 +75,12 @@ def unflatten_dict(d, sep="/"):
     return outdict
 
 
+def write_json(data: dict, fpath: Path) -> None:
+    fpath.parent.mkdir(exist_ok=True, parents=True)
+    with open(fpath, "w") as f:
+        json.dump(data, f, indent=4, ensure_ascii=False)
+
+
 def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
     """Get a transform function that convert items from Hugging Face dataset (pyarrow)
     to torch tensors. Importantly, images are converted from PIL, which corresponds to
@@ -146,7 +152,16 @@ def load_metadata(local_dir: Path) -> tuple[dict | list]:
     return info, episode_dicts, stats, tasks
 
 
-def create_dataset_info(codebase_version: str, fps: int, robot: Robot) -> dict:
+def create_empty_dataset_info(codebase_version: str, fps: int, robot: Robot, use_videos: bool = True) -> dict:
+    shapes = {key: len(names) for key, names in robot.names.items()}
+    camera_shapes = {}
+    for key, cam in robot.cameras.items():
+        video_key = f"observation.images.{key}"
+        camera_shapes[video_key] = {
+            "width": cam.width,
+            "height": cam.height,
+            "channels": cam.channels,
+        }
     return {
         "codebase_version": codebase_version,
         "data_path": DEFAULT_PARQUET_PATH,
@@ -159,12 +174,12 @@ def create_dataset_info(codebase_version: str, fps: int, robot: Robot) -> dict:
         "chunks_size": DEFAULT_CHUNK_SIZE,
         "fps": fps,
         "splits": {},
-        # "keys": keys,
-        # "video_keys": video_keys,
-        # "image_keys": image_keys,
-        # "shapes": {**sequence_shapes, **video_shapes, **image_shapes},
-        # "names": names,
-        # "videos": {"videos_path": DEFAULT_VIDEO_PATH} if video_keys else None,
+        "keys": list(robot.names),
+        "video_keys": list(camera_shapes) if use_videos else [],
+        "image_keys": [] if use_videos else list(camera_shapes),
+        "shapes": {**shapes, **camera_shapes},
+        "names": robot.names,
+        "videos": {"videos_path": DEFAULT_VIDEO_PATH} if use_videos else None,
     }
 
 
@@ -270,6 +285,7 @@ def get_delta_indices(delta_timestamps: dict[str, list[float]], fps: int) -> dic
     return delta_indices
 
 
+# TODO(aliberts): remove
 def load_previous_and_future_frames(
     item: dict[str, torch.Tensor],
     hf_dataset: datasets.Dataset,
@@ -363,6 +379,7 @@ def load_previous_and_future_frames(
     return item
 
 
+# TODO(aliberts): remove
 def calculate_episode_data_index(hf_dataset: datasets.Dataset) -> Dict[str, torch.Tensor]:
     """
     Calculate episode data index for the provided HuggingFace Dataset. Relies on episode_index column of hf_dataset.
@@ -417,6 +434,7 @@ def calculate_episode_data_index(hf_dataset: datasets.Dataset) -> Dict[str, torc
     return episode_data_index
 
 
+# TODO(aliberts): remove
 def reset_episode_index(hf_dataset: datasets.Dataset) -> datasets.Dataset:
     """Reset the `episode_index` of the provided HuggingFace Dataset.
 
@@ -454,7 +472,7 @@ def cycle(iterable):
             iterator = iter(iterable)
 
 
-def create_branch(repo_id, *, branch: str, repo_type: str | None = None):
+def create_branch(repo_id, *, branch: str, repo_type: str | None = None) -> None:
     """Create a branch on a existing Hugging Face repo. Delete the branch if it already
     exists before creating it.
     """
diff --git a/lerobot/common/robot_devices/cameras/opencv.py b/lerobot/common/robot_devices/cameras/opencv.py
index 2d8b12c9..d284cf55 100644
--- a/lerobot/common/robot_devices/cameras/opencv.py
+++ b/lerobot/common/robot_devices/cameras/opencv.py
@@ -192,6 +192,7 @@ class OpenCVCameraConfig:
     width: int | None = None
     height: int | None = None
     color_mode: str = "rgb"
+    channels: int | None = None
     rotation: int | None = None
     mock: bool = False
 
@@ -201,6 +202,8 @@ class OpenCVCameraConfig:
                 f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
             )
 
+        self.channels = 3
+
         if self.rotation not in [-90, None, 90, 180]:
             raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
 
@@ -268,6 +271,7 @@ class OpenCVCamera:
         self.fps = config.fps
         self.width = config.width
         self.height = config.height
+        self.channels = config.channels
         self.color_mode = config.color_mode
         self.mock = config.mock
 
diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py
index 08bcec2e..6a8805dc 100644
--- a/lerobot/common/robot_devices/control_utils.py
+++ b/lerobot/common/robot_devices/control_utils.py
@@ -15,7 +15,8 @@ import torch
 import tqdm
 from termcolor import colored
 
-from lerobot.common.datasets.populate_dataset import add_frame, safe_stop_image_writer
+from lerobot.common.datasets.image_writer import safe_stop_image_writer
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.common.policies.factory import make_policy
 from lerobot.common.robot_devices.robots.utils import Robot
 from lerobot.common.robot_devices.utils import busy_wait
@@ -227,7 +228,7 @@ def control_loop(
     control_time_s=None,
     teleoperate=False,
     display_cameras=False,
-    dataset=None,
+    dataset: LeRobotDataset | None = None,
     events=None,
     policy=None,
     device=None,
@@ -268,7 +269,8 @@ def control_loop(
                 action = {"action": action}
 
         if dataset is not None:
-            add_frame(dataset, observation, action)
+            frame = {**observation, **action}
+            dataset.add_frame(frame)
 
         if display_cameras and not is_headless():
             image_keys = [key for key in observation if "image" in key]
diff --git a/lerobot/common/robot_devices/robots/manipulator.py b/lerobot/common/robot_devices/robots/manipulator.py
index 20969c30..6ee2cae7 100644
--- a/lerobot/common/robot_devices/robots/manipulator.py
+++ b/lerobot/common/robot_devices/robots/manipulator.py
@@ -349,6 +349,13 @@ class ManipulatorRobot:
         self.is_connected = False
         self.logs = {}
 
+        action_names = [f"{arm}_{motor}" for arm, bus in self.leader_arms.items() for motor in bus.motors]
+        state_names = [f"{arm}_{motor}" for arm, bus in self.follower_arms.items() for motor in bus.motors]
+        self.names = {
+            "action": action_names,
+            "observation.state": state_names,
+        }
+
     @property
     def has_camera(self):
         return len(self.cameras) > 0
diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
index 425247e6..3d9073b0 100644
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -105,11 +105,11 @@ from pathlib import Path
 from typing import List
 
 # from safetensors.torch import load_file, save_file
+from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.common.datasets.populate_dataset import (
     create_lerobot_dataset,
     delete_current_episode,
-    init_dataset,
     save_current_episode,
 )
 from lerobot.common.robot_devices.control_utils import (
@@ -233,16 +233,12 @@ def record(
 
     # Create empty dataset or load existing saved episodes
     sanity_check_dataset_name(repo_id, policy)
-    dataset = init_dataset(
-        repo_id,
-        root,
-        force_override,
-        fps,
-        video,
-        write_images=robot.has_camera,
+    image_writer = ImageWriter(
+        write_dir=root,
         num_image_writer_processes=num_image_writer_processes,
         num_image_writer_threads=num_image_writer_threads_per_camera * robot.num_cameras,
     )
+    dataset = LeRobotDataset.create(repo_id, fps, robot, image_writer=image_writer)
 
     if not robot.is_connected:
         robot.connect()
@@ -260,8 +256,9 @@ def record(
     if has_method(robot, "teleop_safety_stop"):
         robot.teleop_safety_stop()
 
+    recorded_episodes = 0
     while True:
-        if dataset["num_episodes"] >= num_episodes:
+        if recorded_episodes >= num_episodes:
             break
 
         episode_index = dataset["num_episodes"]

From 299451af81e268eae963134e7ae9b6c8213c3ed8 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Mon, 21 Oct 2024 19:30:20 +0200
Subject: [PATCH 37/59] Add add_episode & task logic

---
 lerobot/common/datasets/lerobot_dataset.py    | 179 +++++++++++++++++-
 lerobot/common/datasets/utils.py              |   5 +
 lerobot/common/robot_devices/control_utils.py |   2 +-
 lerobot/scripts/control_robot.py              |  35 +++-
 4 files changed, 203 insertions(+), 18 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 61331c5a..53b3c4af 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -19,16 +19,19 @@ from pathlib import Path
 from typing import Callable
 
 import datasets
+import pyarrow.parquet as pq
 import torch
 import torch.utils
 from datasets import load_dataset
-from huggingface_hub import snapshot_download
+from huggingface_hub import snapshot_download, upload_folder
 
 from lerobot.common.datasets.compute_stats import aggregate_stats
 from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.utils import (
+    append_jsonl,
     check_delta_timestamps,
     check_timestamps_sync,
+    create_branch,
     create_empty_dataset_info,
     get_delta_indices,
     get_episode_data_index,
@@ -160,6 +163,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.video_backend = video_backend if video_backend is not None else "pyav"
         self.image_writer = image_writer
         self.episode_buffer = {}
+        self.consolidated = True
         self.delta_indices = None
 
         # Load metadata
@@ -192,6 +196,24 @@ class LeRobotDataset(torch.utils.data.Dataset):
         #     - [ ] Update episode_index (arg update=True)
         #     - [ ] Update info.json (arg update=True)
 
+    def push_to_repo(self, push_videos: bool = True) -> None:
+        if not self.consolidated:
+            raise RuntimeError(
+                "You are trying to upload to the hub a LeRobotDataset that has not been consolidated yet."
+                "Please use the '.consolidate()' method first."
+            )
+        ignore_patterns = ["images/"]
+        if not push_videos:
+            ignore_patterns.append("videos/")
+
+        upload_folder(
+            repo_id=self.repo_id,
+            folder_path=self.root,
+            repo_type="dataset",
+            ignore_patterns=ignore_patterns,
+        )
+        create_branch(repo_id=self.repo_id, branch=CODEBASE_VERSION, repo_type="dataset")
+
     def pull_from_repo(
         self,
         allow_patterns: list[str] | str | None = None,
@@ -303,11 +325,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Number of samples/frames in selected episodes."""
         return len(self.hf_dataset)
 
-    @property
-    def total_frames(self) -> int:
-        """Total number of frames saved in this dataset."""
-        return self.info["total_frames"]
-
     @property
     def num_episodes(self) -> int:
         """Number of episodes selected."""
@@ -318,6 +335,16 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Total number of episodes available."""
         return self.info["total_episodes"]
 
+    @property
+    def total_frames(self) -> int:
+        """Total number of frames saved in this dataset."""
+        return self.info["total_frames"]
+
+    @property
+    def total_tasks(self) -> int:
+        """Total number of different tasks performed in this dataset."""
+        return self.info["total_tasks"]
+
     @property
     def total_chunks(self) -> int:
         """Total number of chunks (groups of episodes)."""
@@ -331,7 +358,46 @@ class LeRobotDataset(torch.utils.data.Dataset):
     @property
     def shapes(self) -> dict:
         """Shapes for the different features."""
-        self.info.get("shapes")
+        return self.info["shapes"]
+
+    @property
+    def features(self) -> datasets.Features:
+        """Shapes for the different features."""
+        if self.hf_dataset is not None:
+            return self.hf_dataset.features
+        elif self.episode_buffer is None:
+            raise NotImplementedError(
+                "Dataset features must be infered from an existing hf_dataset or episode_buffer."
+            )
+
+        features = {}
+        for key in self.episode_buffer:
+            if key in ["episode_index", "frame_index", "index", "task_index"]:
+                features[key] = datasets.Value(dtype="int64")
+            elif key in ["next.done", "next.success"]:
+                features[key] = datasets.Value(dtype="bool")
+            elif key in ["timestamp", "next.reward"]:
+                features[key] = datasets.Value(dtype="float32")
+            elif key in self.image_keys:
+                features[key] = datasets.Image()
+            elif key in self.keys:
+                features[key] = datasets.Sequence(
+                    length=self.shapes[key], feature=datasets.Value(dtype="float32")
+                )
+
+        return datasets.Features(features)
+
+    @property
+    def task_to_task_index(self) -> dict:
+        return {task: task_idx for task_idx, task in self.tasks.items()}
+
+    def get_task_index(self, task: str) -> int:
+        """
+        Given a task in natural language, returns its task_index if the task already exists in the dataset,
+        otherwise creates a new task_index.
+        """
+        task_index = self.task_to_task_index.get(task, None)
+        return task_index if task_index is not None else self.total_tasks
 
     def current_episode_index(self, idx: int) -> int:
         episode_index = self.hf_dataset["episode_index"][idx]
@@ -447,12 +513,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
             f")"
         )
 
-    def _create_episode_buffer(self) -> dict:
+    def _create_episode_buffer(self, episode_index: int | None = None) -> dict:
         # TODO(aliberts): Handle resume
         return {
-            "chunk": self.total_chunks,
-            "episode_index": self.total_episodes,
             "size": 0,
+            "episode_index": self.total_episodes if episode_index is None else episode_index,
+            "task_index": None,
             "frame_index": [],
             "timestamp": [],
             "next.done": [],
@@ -490,6 +556,92 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 file_path=img_path,
             )
 
+    def add_episode(self, task: str, encode_videos: bool = False) -> None:
+        """
+        This will save to disk the current episode in self.episode_buffer. Note that since it affects files on
+        disk, it sets self.consolidated to False to ensure proper consolidation later on before uploading to
+        the hub.
+
+        Use encode_videos if you want to encode videos during the saving of each episode. Otherwise,
+        you can do it later during dataset.consolidate(). This is to give more flexibility on when to spend
+        time for video encoding.
+        """
+        episode_length = self.episode_buffer.pop("size")
+        episode_index = self.episode_buffer["episode_index"]
+        task_index = self.get_task_index(task)
+        self.episode_buffer["next.done"][-1] = True
+
+        for key in self.episode_buffer:
+            if key in self.keys:
+                self.episode_buffer[key] = torch.stack(self.episode_buffer[key])
+            elif key == "episode_index":
+                self.episode_buffer[key] = torch.full((episode_length,), episode_index)
+            elif key == "task_index":
+                self.episode_buffer[key] = torch.full((episode_length,), task_index)
+            else:
+                self.episode_buffer[key] = torch.tensor(self.episode_buffer[key])
+
+        self._save_episode_to_metadata(episode_index, episode_length, task, task_index)
+        self._save_episode_table(episode_index)
+
+        if encode_videos:
+            pass  # TODO
+
+        # Reset the buffer
+        self.episode_buffer = self._create_episode_buffer()
+        self.consolidated = False
+
+    def _save_episode_table(self, episode_index: int) -> None:
+        features = self.features
+        ep_dataset = datasets.Dataset.from_dict(self.episode_buffer, features=features, split="train")
+        ep_table = ep_dataset._data.table
+        ep_data_path = self.get_data_file_path(ep_index=episode_index, return_str=False)
+        ep_data_path.parent.mkdir(parents=True, exist_ok=True)
+        pq.write_table(ep_table, ep_data_path)
+
+    def _save_episode_to_metadata(
+        self, episode_index: int, episode_length: int, task: str, task_index: int
+    ) -> None:
+        self.info["total_episodes"] += 1
+        self.info["total_frames"] += episode_length
+
+        if task_index not in self.tasks:
+            self.info["total_tasks"] += 1
+            self.tasks[task_index] = task
+            task_dict = {
+                "task_index": task_index,
+                "task": task,
+            }
+            append_jsonl(task_dict, self.root / "meta/tasks.jsonl")
+
+        chunk = self.get_episode_chunk(episode_index)
+        if chunk >= self.total_chunks:
+            self.info["total_chunks"] += 1
+
+        self.info["splits"] = {"train": f"0:{self.info['total_episodes']}"}
+        self.info["total_videos"] += len(self.video_keys)
+        write_json(self.info, self.root / "meta/info.json")
+
+        episode_dict = {
+            "episode_index": episode_index,
+            "tasks": [task],
+            "length": episode_length,
+        }
+        append_jsonl(episode_dict, self.root / "meta/episodes.jsonl")
+
+    def delete_episode(self) -> None:
+        pass  # TODO
+
+    def consolidate(self) -> None:
+        pass  # TODO
+        # Sanity checks:
+        # - [ ] shapes
+        # - [ ] ep_lenghts
+        # - [ ] number of files
+        # - [ ] names of files (e.g. parquet 00000-of-00001 and 00001-of-00002)
+        # - [ ] no remaining self.image_writer.dir
+        self.consolidated = True
+
     @classmethod
     def create(
         cls,
@@ -508,6 +660,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         obj._version = CODEBASE_VERSION
         obj.tolerance_s = tolerance_s
         obj.image_writer = image_writer
+        obj.hf_dataset = None
 
         if not all(cam.fps == fps for cam in robot.cameras.values()):
             logging.warn(
@@ -515,12 +668,18 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 "In this case, frames from lower fps cameras will be repeated to fill in the blanks"
             )
 
+        obj.tasks = {}
         obj.info = create_empty_dataset_info(obj._version, fps, robot, use_videos)
         write_json(obj.info, obj.root / "meta/info.json")
 
         # TODO(aliberts, rcadene, alexander-soare): Merge this with OnlineBuffer/DataBuffer
         obj.episode_buffer = obj._create_episode_buffer()
 
+        # This bool indicates that the current LeRobotDataset instance is in sync with the files on disk.
+        # It is used to know when certain operations are need (for instance, computing dataset statistics).
+        # In order to be able to push the dataset to the hub, it needs to be consolidation first.
+        obj.consolidated = True
+
         # obj.episodes = None
         # obj.image_transforms = None
         # obj.delta_timestamps = None
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index 79459882..8985e449 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -81,6 +81,11 @@ def write_json(data: dict, fpath: Path) -> None:
         json.dump(data, f, indent=4, ensure_ascii=False)
 
 
+def append_jsonl(data: dict, fpath: Path) -> None:
+    with jsonlines.open(fpath, "a") as writer:
+        writer.write(data)
+
+
 def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
     """Get a transform function that convert items from Hugging Face dataset (pyarrow)
     to torch tensors. Importantly, images are converted from PIL, which corresponds to
diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py
index 6a8805dc..9bcdaea3 100644
--- a/lerobot/common/robot_devices/control_utils.py
+++ b/lerobot/common/robot_devices/control_utils.py
@@ -248,7 +248,7 @@ def control_loop(
     if teleoperate and policy is not None:
         raise ValueError("When `teleoperate` is True, `policy` should be None.")
 
-    if dataset is not None and fps is not None and dataset["fps"] != fps:
+    if dataset is not None and fps is not None and dataset.fps != fps:
         raise ValueError(f"The dataset fps should be equal to requested fps ({dataset['fps']} != {fps}).")
 
     timestamp = 0
diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
index 3d9073b0..86233251 100644
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -109,8 +109,6 @@ from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.common.datasets.populate_dataset import (
     create_lerobot_dataset,
-    delete_current_episode,
-    save_current_episode,
 )
 from lerobot.common.robot_devices.control_utils import (
     control_loop,
@@ -195,6 +193,7 @@ def record(
     robot: Robot,
     root: str,
     repo_id: str,
+    single_task: str,
     pretrained_policy_name_or_path: str | None = None,
     policy_overrides: List[str] | None = None,
     fps: int | None = None,
@@ -219,6 +218,11 @@ def record(
     device = None
     use_amp = None
 
+    if single_task:
+        task = single_task
+    else:
+        raise NotImplementedError("Only single-task recording is supported for now")
+
     # Load pretrained policy
     if pretrained_policy_name_or_path is not None:
         policy, policy_fps, device, use_amp = init_policy(pretrained_policy_name_or_path, policy_overrides)
@@ -235,8 +239,8 @@ def record(
     sanity_check_dataset_name(repo_id, policy)
     image_writer = ImageWriter(
         write_dir=root,
-        num_image_writer_processes=num_image_writer_processes,
-        num_image_writer_threads=num_image_writer_threads_per_camera * robot.num_cameras,
+        num_processes=num_image_writer_processes,
+        num_threads=num_image_writer_threads_per_camera * robot.num_cameras,
     )
     dataset = LeRobotDataset.create(repo_id, fps, robot, image_writer=image_writer)
 
@@ -261,7 +265,12 @@ def record(
         if recorded_episodes >= num_episodes:
             break
 
-        episode_index = dataset["num_episodes"]
+        # TODO(aliberts): add task prompt for multitask here. Might need to temporarily disable event if
+        # input() messes with them.
+        # if multi_task:
+        #     task = input("Enter your task description: ")
+
+        episode_index = dataset.episode_buffer["episode_index"]
         log_say(f"Recording episode {episode_index}", play_sounds)
         record_episode(
             dataset=dataset,
@@ -289,11 +298,11 @@ def record(
             log_say("Re-record episode", play_sounds)
             events["rerecord_episode"] = False
             events["exit_early"] = False
-            delete_current_episode(dataset)
+            dataset.delete_episode()
             continue
 
         # Increment by one dataset["current_episode_index"]
-        save_current_episode(dataset)
+        dataset.add_episode(task)
 
         if events["stop_recording"]:
             break
@@ -378,9 +387,21 @@ if __name__ == "__main__":
     )
 
     parser_record = subparsers.add_parser("record", parents=[base_parser])
+    task_args = parser_record.add_mutually_exclusive_group(required=True)
     parser_record.add_argument(
         "--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)"
     )
+    task_args.add_argument(
+        "--single-task",
+        type=str,
+        help="A short but accurate description of the task performed during the recording.",
+    )
+    # TODO(aliberts): add multi-task support
+    # task_args.add_argument(
+    #     "--multi-task",
+    #     type=int,
+    #     help="You will need to enter the task performed at the start of each episode.",
+    # )
     parser_record.add_argument(
         "--root",
         type=Path,

From c4c0a43de76c61c118bf07a4b52b605abf883fd3 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Mon, 21 Oct 2024 20:10:13 +0200
Subject: [PATCH 38/59] add delete_episode, WIP on consolidate

---
 lerobot/common/datasets/image_writer.py    |  6 ++++++
 lerobot/common/datasets/lerobot_dataset.py | 20 +++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py
index c87e342b..7bdefc64 100644
--- a/lerobot/common/datasets/image_writer.py
+++ b/lerobot/common/datasets/image_writer.py
@@ -107,6 +107,12 @@ class ImageWriter:
         )
         return str(self.dir / fpath) if return_str else self.dir / fpath
 
+    def get_episode_dir(self, episode_index: int, image_key: str, return_str: bool = True) -> str | Path:
+        dir_path = self.get_image_file_path(
+            episode_index=episode_index, image_key=image_key, frame_index=0, return_str=False
+        ).parent
+        return str(dir_path) if return_str else dir_path
+
     def stop(self, timeout=20) -> None:
         """Stop the image writer, waiting for all processes or threads to finish."""
         if self.type == "threads":
diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 53b3c4af..6d68946e 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 import logging
 import os
+import shutil
 from pathlib import Path
 from typing import Callable
 
@@ -25,7 +26,7 @@ import torch.utils
 from datasets import load_dataset
 from huggingface_hub import snapshot_download, upload_folder
 
-from lerobot.common.datasets.compute_stats import aggregate_stats
+from lerobot.common.datasets.compute_stats import aggregate_stats, compute_stats
 from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.utils import (
     append_jsonl,
@@ -630,9 +631,22 @@ class LeRobotDataset(torch.utils.data.Dataset):
         append_jsonl(episode_dict, self.root / "meta/episodes.jsonl")
 
     def delete_episode(self) -> None:
-        pass  # TODO
+        episode_index = self.episode_buffer["episode_index"]
+        if self.image_writer is not None:
+            for cam_key in self.camera_keys:
+                cam_dir = self.image_writer.get_episode_dir(episode_index, cam_key)
+                if cam_dir.is_dir():
+                    shutil.rmtree(cam_dir)
 
-    def consolidate(self) -> None:
+        # Reset the buffer
+        self.episode_buffer = self._create_episode_buffer()
+
+    def consolidate(self, run_compute_stats: bool = True) -> None:
+        if run_compute_stats:
+            logging.info("Computing dataset statistics")
+            self.hf_dataset = self.load_hf_dataset()
+            self.stats = compute_stats(self)
+            write_json()
         pass  # TODO
         # Sanity checks:
         # - [ ] shapes

From e991a310614a533284abc64d0f2e6f49682d8809 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 22 Oct 2024 00:19:25 +0200
Subject: [PATCH 39/59] Improve consistency between __init__() and create(),
 WIP on consolidate

---
 lerobot/common/datasets/lerobot_dataset.py | 69 ++++++++++++++--------
 lerobot/scripts/control_robot.py           | 20 ++++---
 2 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 6d68946e..ffbcf0fb 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
 import logging
 import os
 import shutil
@@ -39,6 +40,7 @@ from lerobot.common.datasets.utils import (
     get_hub_safe_version,
     hf_transform_to_torch,
     load_metadata,
+    unflatten_dict,
     write_json,
 )
 from lerobot.common.datasets.video_utils import VideoFrame, decode_video_frames_torchvision
@@ -163,9 +165,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.download_videos = download_videos
         self.video_backend = video_backend if video_backend is not None else "pyav"
         self.image_writer = image_writer
-        self.episode_buffer = {}
-        self.consolidated = True
         self.delta_indices = None
+        self.consolidated = True
+        self.episode_buffer = {}
 
         # Load metadata
         self.root.mkdir(exist_ok=True, parents=True)
@@ -501,17 +503,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
     def __repr__(self):
         return (
-            f"{self.__class__.__name__}(\n"
+            f"{self.__class__.__name__}\n"
             f"  Repository ID: '{self.repo_id}',\n"
-            f"  Number of Samples: {self.num_samples},\n"
-            f"  Number of Episodes: {self.num_episodes},\n"
-            f"  Type: {'video (.mp4)' if self.video else 'image (.png)'},\n"
-            f"  Recorded Frames per Second: {self.fps},\n"
-            f"  Camera Keys: {self.camera_keys},\n"
-            f"  Video Frame Keys: {self.camera_keys if self.video else 'N/A'},\n"
-            f"  Transformations: {self.image_transforms},\n"
-            f"  Codebase Version: {self.info.get('codebase_version', '< v1.6')},\n"
-            f")"
+            f"  Selected episodes: {self.episodes},\n"
+            f"  Number of selected episodes: {self.num_episodes},\n"
+            f"  Number of selected samples: {self.num_samples},\n"
+            f"\n{json.dumps(self.info, indent=4)}\n"
         )
 
     def _create_episode_buffer(self, episode_index: int | None = None) -> dict:
@@ -563,12 +560,16 @@ class LeRobotDataset(torch.utils.data.Dataset):
         disk, it sets self.consolidated to False to ensure proper consolidation later on before uploading to
         the hub.
 
-        Use encode_videos if you want to encode videos during the saving of each episode. Otherwise,
-        you can do it later during dataset.consolidate(). This is to give more flexibility on when to spend
+        Use 'encode_videos' if you want to encode videos during the saving of each episode. Otherwise,
+        you can do it later with dataset.consolidate(). This is to give more flexibility on when to spend
         time for video encoding.
         """
         episode_length = self.episode_buffer.pop("size")
         episode_index = self.episode_buffer["episode_index"]
+        if episode_index != self.total_episodes:
+            # TODO(aliberts): Add option to use existing episode_index
+            raise NotImplementedError()
+
         task_index = self.get_task_index(task)
         self.episode_buffer["next.done"][-1] = True
 
@@ -641,12 +642,30 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # Reset the buffer
         self.episode_buffer = self._create_episode_buffer()
 
+    def _update_data_file_names(self) -> None:
+        # TODO(aliberts): remove the need for this hack by removing total_episodes part in data file names.
+        # Must first investigate if this doesn't break hub/datasets features like viewer etc.
+        for ep_idx in range(self.total_episodes):
+            ep_chunk = self.get_episode_chunk(ep_idx)
+            current_file_name = self.data_path.replace("{total_episodes:05d}", "*")
+            current_file_name = current_file_name.format(episode_chunk=ep_chunk, episode_index=ep_idx)
+            current_file_name = list(self.root.glob(current_file_name))[0]
+            updated_file_name = self.get_data_file_path(ep_idx)
+            current_file_name.rename(updated_file_name)
+
     def consolidate(self, run_compute_stats: bool = True) -> None:
+        self._update_data_file_names()
         if run_compute_stats:
             logging.info("Computing dataset statistics")
             self.hf_dataset = self.load_hf_dataset()
             self.stats = compute_stats(self)
-            write_json()
+            serialized_stats = {key: value.tolist() for key, value in self.stats.items()}
+            serialized_stats = unflatten_dict(serialized_stats)
+            write_json(serialized_stats, self.root / "meta/stats.json")
+        else:
+            logging.warning("Skipping computation of the dataset statistics.")
+
+        self.episode_data_index = get_episode_data_index(self.episodes, self.episode_dicts)
         pass  # TODO
         # Sanity checks:
         # - [ ] shapes
@@ -666,6 +685,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         tolerance_s: float = 1e-4,
         image_writer: ImageWriter | None = None,
         use_videos: bool = True,
+        video_backend: str | None = None,
     ) -> "LeRobotDataset":
         """Create a LeRobot Dataset from scratch in order to record data."""
         obj = cls.__new__(cls)
@@ -674,15 +694,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
         obj._version = CODEBASE_VERSION
         obj.tolerance_s = tolerance_s
         obj.image_writer = image_writer
-        obj.hf_dataset = None
 
         if not all(cam.fps == fps for cam in robot.cameras.values()):
-            logging.warn(
+            logging.warning(
                 f"Some cameras in your {robot.robot_type} robot don't have an fps matching the fps of your dataset."
                 "In this case, frames from lower fps cameras will be repeated to fill in the blanks"
             )
 
-        obj.tasks = {}
+        obj.tasks, obj.stats, obj.episode_dicts = {}, {}, []
         obj.info = create_empty_dataset_info(obj._version, fps, robot, use_videos)
         write_json(obj.info, obj.root / "meta/info.json")
 
@@ -694,14 +713,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # In order to be able to push the dataset to the hub, it needs to be consolidation first.
         obj.consolidated = True
 
-        # obj.episodes = None
-        # obj.image_transforms = None
-        # obj.delta_timestamps = None
-        # obj.episode_data_index = episode_data_index
-        # obj.stats = stats
-        # obj.info = info if info is not None else {}
-        # obj.videos_dir = videos_dir
-        # obj.video_backend = video_backend if video_backend is not None else "pyav"
+        obj.episodes = None
+        obj.hf_dataset = None
+        obj.image_transforms = None
+        obj.delta_timestamps = None
+        obj.episode_data_index = None
+        obj.video_backend = video_backend if video_backend is not None else "pyav"
         return obj
 
 
diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
index 86233251..62d6760b 100644
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -107,9 +107,6 @@ from typing import List
 # from safetensors.torch import load_file, save_file
 from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.common.datasets.populate_dataset import (
-    create_lerobot_dataset,
-)
 from lerobot.common.robot_devices.control_utils import (
     control_loop,
     has_method,
@@ -210,7 +207,7 @@ def record(
     force_override=False,
     display_cameras=True,
     play_sounds=True,
-):
+) -> LeRobotDataset:
     # TODO(rcadene): Add option to record logs
     listener = None
     events = None
@@ -242,7 +239,7 @@ def record(
         num_processes=num_image_writer_processes,
         num_threads=num_image_writer_threads_per_camera * robot.num_cameras,
     )
-    dataset = LeRobotDataset.create(repo_id, fps, robot, image_writer=image_writer)
+    dataset = LeRobotDataset.create(repo_id, fps, robot, root=root, image_writer=image_writer)
 
     if not robot.is_connected:
         robot.connect()
@@ -301,8 +298,8 @@ def record(
             dataset.delete_episode()
             continue
 
-        # Increment by one dataset["current_episode_index"]
         dataset.add_episode(task)
+        recorded_episodes += 1
 
         if events["stop_recording"]:
             break
@@ -310,10 +307,17 @@ def record(
     log_say("Stop recording", play_sounds, blocking=True)
     stop_recording(robot, listener, display_cameras)
 
-    lerobot_dataset = create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds)
+    logging.info("Waiting for image writer to terminate...")
+    dataset.image_writer.stop()
+
+    dataset.consolidate(run_compute_stats)
+
+    # lerobot_dataset = create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds)
+    if push_to_hub:
+        dataset.push_to_repo()
 
     log_say("Exiting", play_sounds)
-    return lerobot_dataset
+    return dataset
 
 
 @safe_disconnect

From a805458c7eb57b93522610c7f3fa79e204567725 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 22 Oct 2024 19:57:52 +0200
Subject: [PATCH 40/59] Add local_files_only, encode_videos, fix bugs to pass
 tests (WIP)

---
 lerobot/common/datasets/lerobot_dataset.py | 127 ++++++++++++++++-----
 lerobot/common/datasets/utils.py           |  66 ++++++++---
 lerobot/common/datasets/video_utils.py     |   4 +-
 lerobot/scripts/control_robot.py           |  66 ++++++-----
 4 files changed, 183 insertions(+), 80 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index ffbcf0fb..ad5a37cf 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -17,6 +17,7 @@ import json
 import logging
 import os
 import shutil
+from functools import cached_property
 from pathlib import Path
 from typing import Callable
 
@@ -30,20 +31,32 @@ from huggingface_hub import snapshot_download, upload_folder
 from lerobot.common.datasets.compute_stats import aggregate_stats, compute_stats
 from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.utils import (
+    EPISODES_PATH,
+    INFO_PATH,
+    TASKS_PATH,
     append_jsonl,
     check_delta_timestamps,
     check_timestamps_sync,
+    check_version_compatibility,
     create_branch,
     create_empty_dataset_info,
+    flatten_dict,
     get_delta_indices,
     get_episode_data_index,
     get_hub_safe_version,
     hf_transform_to_torch,
-    load_metadata,
+    load_episode_dicts,
+    load_info,
+    load_stats,
+    load_tasks,
     unflatten_dict,
     write_json,
 )
-from lerobot.common.datasets.video_utils import VideoFrame, decode_video_frames_torchvision
+from lerobot.common.datasets.video_utils import (
+    VideoFrame,
+    decode_video_frames_torchvision,
+    encode_video_frames,
+)
 from lerobot.common.robot_devices.robots.utils import Robot
 
 # For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md
@@ -61,6 +74,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         delta_timestamps: dict[list[float]] | None = None,
         tolerance_s: float = 1e-4,
         download_videos: bool = True,
+        local_files_only: bool = False,
         video_backend: str | None = None,
         image_writer: ImageWriter | None = None,
     ):
@@ -162,21 +176,26 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.delta_timestamps = delta_timestamps
         self.episodes = episodes
         self.tolerance_s = tolerance_s
-        self.download_videos = download_videos
         self.video_backend = video_backend if video_backend is not None else "pyav"
         self.image_writer = image_writer
         self.delta_indices = None
         self.consolidated = True
         self.episode_buffer = {}
+        self.local_files_only = local_files_only
 
         # Load metadata
         self.root.mkdir(exist_ok=True, parents=True)
-        self._version = get_hub_safe_version(repo_id, CODEBASE_VERSION)
         self.pull_from_repo(allow_patterns="meta/")
-        self.info, self.episode_dicts, self.stats, self.tasks = load_metadata(self.root)
+        self.info = load_info(self.root)
+        self.stats = load_stats(self.root)
+        self.tasks = load_tasks(self.root)
+        self.episode_dicts = load_episode_dicts(self.root)
+
+        # Check version
+        check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION)
 
         # Load actual data
-        self.download_episodes()
+        self.download_episodes(download_videos)
         self.hf_dataset = self.load_hf_dataset()
         self.episode_data_index = get_episode_data_index(self.episodes, self.episode_dicts)
 
@@ -199,6 +218,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
         #     - [ ] Update episode_index (arg update=True)
         #     - [ ] Update info.json (arg update=True)
 
+    @cached_property
+    def _hub_version(self) -> str | None:
+        return None if self.local_files_only else get_hub_safe_version(self.repo_id, CODEBASE_VERSION)
+
+    @property
+    def _version(self) -> str:
+        """Codebase version used to create this dataset."""
+        return self.info["codebase_version"]
+
     def push_to_repo(self, push_videos: bool = True) -> None:
         if not self.consolidated:
             raise RuntimeError(
@@ -225,13 +253,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
         snapshot_download(
             self.repo_id,
             repo_type="dataset",
-            revision=self._version,
+            revision=self._hub_version,
             local_dir=self.root,
             allow_patterns=allow_patterns,
             ignore_patterns=ignore_patterns,
+            local_files_only=self.local_files_only,
         )
 
-    def download_episodes(self) -> None:
+    def download_episodes(self, download_videos: bool = True) -> None:
         """Downloads the dataset from the given 'repo_id' at the provided version. If 'episodes' is given, this
         will only download those episodes (selected by their episode_index). If 'episodes' is None, the whole
         dataset will be downloaded. Thanks to the behavior of snapshot_download, if the files are already present
@@ -240,10 +269,10 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # TODO(rcadene, aliberts): implement faster transfer
         # https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads
         files = None
-        ignore_patterns = None if self.download_videos else "videos/"
+        ignore_patterns = None if download_videos else "videos/"
         if self.episodes is not None:
             files = [self.get_data_file_path(ep_idx) for ep_idx in self.episodes]
-            if len(self.video_keys) > 0 and self.download_videos:
+            if len(self.video_keys) > 0 and download_videos:
                 video_files = [
                     self.get_video_file_path(ep_idx, vid_key)
                     for vid_key in self.video_keys
@@ -495,7 +524,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             item = {**video_frames, **item}
 
         if self.image_transforms is not None:
-            image_keys = self.camera_keys if self.download_videos else self.image_keys
+            image_keys = self.camera_keys
             for cam in image_keys:
                 item[cam] = self.image_transforms(item[cam])
 
@@ -521,6 +550,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             "timestamp": [],
             "next.done": [],
             **{key: [] for key in self.keys},
+            **{key: [] for key in self.image_keys},
         }
 
     def add_frame(self, frame: dict) -> None:
@@ -553,6 +583,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 image=frame[cam_key],
                 file_path=img_path,
             )
+            if cam_key in self.image_keys:
+                self.episode_buffer[cam_key].append(str(img_path))
 
     def add_episode(self, task: str, encode_videos: bool = False) -> None:
         """
@@ -574,6 +606,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.episode_buffer["next.done"][-1] = True
 
         for key in self.episode_buffer:
+            if key in self.image_keys:
+                continue
             if key in self.keys:
                 self.episode_buffer[key] = torch.stack(self.episode_buffer[key])
             elif key == "episode_index":
@@ -583,11 +617,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
             else:
                 self.episode_buffer[key] = torch.tensor(self.episode_buffer[key])
 
+        self.episode_buffer["index"] = torch.arange(self.total_frames, self.total_frames + episode_length)
         self._save_episode_to_metadata(episode_index, episode_length, task, task_index)
         self._save_episode_table(episode_index)
 
-        if encode_videos:
-            pass  # TODO
+        if encode_videos and len(self.video_keys) > 0:
+            self.encode_videos()
 
         # Reset the buffer
         self.episode_buffer = self._create_episode_buffer()
@@ -614,7 +649,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 "task_index": task_index,
                 "task": task,
             }
-            append_jsonl(task_dict, self.root / "meta/tasks.jsonl")
+            append_jsonl(task_dict, self.root / TASKS_PATH)
 
         chunk = self.get_episode_chunk(episode_index)
         if chunk >= self.total_chunks:
@@ -622,22 +657,23 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
         self.info["splits"] = {"train": f"0:{self.info['total_episodes']}"}
         self.info["total_videos"] += len(self.video_keys)
-        write_json(self.info, self.root / "meta/info.json")
+        write_json(self.info, self.root / INFO_PATH)
 
         episode_dict = {
             "episode_index": episode_index,
             "tasks": [task],
             "length": episode_length,
         }
-        append_jsonl(episode_dict, self.root / "meta/episodes.jsonl")
+        self.episode_dicts.append(episode_dict)
+        append_jsonl(episode_dict, self.root / EPISODES_PATH)
 
     def delete_episode(self) -> None:
         episode_index = self.episode_buffer["episode_index"]
         if self.image_writer is not None:
             for cam_key in self.camera_keys:
-                cam_dir = self.image_writer.get_episode_dir(episode_index, cam_key)
-                if cam_dir.is_dir():
-                    shutil.rmtree(cam_dir)
+                img_dir = self.image_writer.get_episode_dir(episode_index, cam_key, return_str=False)
+                if img_dir.is_dir():
+                    shutil.rmtree(img_dir)
 
         # Reset the buffer
         self.episode_buffer = self._create_episode_buffer()
@@ -653,27 +689,54 @@ class LeRobotDataset(torch.utils.data.Dataset):
             updated_file_name = self.get_data_file_path(ep_idx)
             current_file_name.rename(updated_file_name)
 
+    def _remove_image_writer(self) -> None:
+        if self.image_writer is not None:
+            self.image_writer = None
+
+    def encode_videos(self) -> None:
+        # Use ffmpeg to convert frames stored as png into mp4 videos
+        for episode_index in range(self.num_episodes):
+            for key in self.video_keys:
+                # TODO: create video_buffer to store the state of encoded/unencoded videos and remove the need
+                # to call self.image_writer here
+                tmp_imgs_dir = self.image_writer.get_episode_dir(episode_index, key)
+                video_path = self.get_video_file_path(episode_index, key, return_str=False)
+                if video_path.is_file():
+                    # Skip if video is already encoded. Could be the case when resuming data recording.
+                    continue
+                # note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
+                # since video encoding with ffmpeg is already using multithreading.
+                encode_video_frames(tmp_imgs_dir, video_path, self.fps, overwrite=True)
+                shutil.rmtree(tmp_imgs_dir)
+
     def consolidate(self, run_compute_stats: bool = True) -> None:
         self._update_data_file_names()
+        self.hf_dataset = self.load_hf_dataset()
+        self.episode_data_index = get_episode_data_index(self.episodes, self.episode_dicts)
+        check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s)
+
+        if len(self.video_keys) > 0:
+            self.encode_videos()
+
         if run_compute_stats:
             logging.info("Computing dataset statistics")
-            self.hf_dataset = self.load_hf_dataset()
+            self._remove_image_writer()
             self.stats = compute_stats(self)
-            serialized_stats = {key: value.tolist() for key, value in self.stats.items()}
+            serialized_stats = flatten_dict(self.stats)
+            serialized_stats = {key: value.tolist() for key, value in serialized_stats.items()}
             serialized_stats = unflatten_dict(serialized_stats)
             write_json(serialized_stats, self.root / "meta/stats.json")
+            self.consolidated = True
         else:
             logging.warning("Skipping computation of the dataset statistics.")
 
-        self.episode_data_index = get_episode_data_index(self.episodes, self.episode_dicts)
-        pass  # TODO
+        # TODO(aliberts)
         # Sanity checks:
         # - [ ] shapes
         # - [ ] ep_lenghts
         # - [ ] number of files
         # - [ ] names of files (e.g. parquet 00000-of-00001 and 00001-of-00002)
         # - [ ] no remaining self.image_writer.dir
-        self.consolidated = True
 
     @classmethod
     def create(
@@ -691,7 +754,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         obj = cls.__new__(cls)
         obj.repo_id = repo_id
         obj.root = root if root is not None else LEROBOT_HOME / repo_id
-        obj._version = CODEBASE_VERSION
         obj.tolerance_s = tolerance_s
         obj.image_writer = image_writer
 
@@ -702,21 +764,26 @@ class LeRobotDataset(torch.utils.data.Dataset):
             )
 
         obj.tasks, obj.stats, obj.episode_dicts = {}, {}, []
-        obj.info = create_empty_dataset_info(obj._version, fps, robot, use_videos)
-        write_json(obj.info, obj.root / "meta/info.json")
+        obj.info = create_empty_dataset_info(CODEBASE_VERSION, fps, robot, use_videos)
+        write_json(obj.info, obj.root / INFO_PATH)
 
         # TODO(aliberts, rcadene, alexander-soare): Merge this with OnlineBuffer/DataBuffer
         obj.episode_buffer = obj._create_episode_buffer()
 
-        # This bool indicates that the current LeRobotDataset instance is in sync with the files on disk.
-        # It is used to know when certain operations are need (for instance, computing dataset statistics).
-        # In order to be able to push the dataset to the hub, it needs to be consolidation first.
+        # This bool indicates that the current LeRobotDataset instance is in sync with the files on disk. It
+        # is used to know when certain operations are need (for instance, computing dataset statistics). In
+        # order to be able to push the dataset to the hub, it needs to be consolidated first by calling
+        # self.consolidate().
         obj.consolidated = True
 
+        obj.local_files_only = True
+        obj.download_videos = False
+
         obj.episodes = None
         obj.hf_dataset = None
         obj.image_transforms = None
         obj.delta_timestamps = None
+        obj.delta_indices = None
         obj.episode_data_index = None
         obj.video_backend = video_backend if video_backend is not None else "pyav"
         return obj
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index 8985e449..8625808e 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -30,6 +30,12 @@ from torchvision import transforms
 from lerobot.common.robot_devices.robots.utils import Robot
 
 DEFAULT_CHUNK_SIZE = 1000  # Max number of episodes per chunk
+
+INFO_PATH = "meta/info.json"
+EPISODES_PATH = "meta/episodes.jsonl"
+STATS_PATH = "meta/stats.json"
+TASKS_PATH = "meta/tasks.jsonl"
+
 DEFAULT_VIDEO_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
 DEFAULT_PARQUET_PATH = (
     "data/chunk-{episode_chunk:03d}/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
@@ -104,6 +110,32 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
     return items_dict
 
 
+def _get_major_minor(version: str) -> tuple[int]:
+    split = version.strip("v").split(".")
+    return int(split[0]), int(split[1])
+
+
+def check_version_compatibility(
+    repo_id: str, version_to_check: str, current_version: str, enforce_breaking_major: bool = True
+) -> None:
+    current_major, _ = _get_major_minor(current_version)
+    major_to_check, _ = _get_major_minor(version_to_check)
+    if major_to_check < current_major and enforce_breaking_major:
+        raise ValueError(
+            f"""The dataset you requested ({repo_id}) is in {version_to_check} format. We introduced a new
+            format with v2.0 that is not backward compatible. Please use our conversion script
+            first (convert_dataset_v1_to_v2.py) to convert your dataset to this new format."""
+        )
+    elif float(version_to_check.strip("v")) < float(current_version.strip("v")):
+        warnings.warn(
+            f"""The dataset you requested ({repo_id}) was created with a previous version ({version_to_check}) of the
+            codebase. The current codebase version is {current_version}. You should be fine since
+            backward compatibility is maintained. If you encounter a problem, contact LeRobot maintainers on
+            Discord ('https://discord.com/invite/s3KuuzsPFb') or open an issue on github.""",
+            stacklevel=1,
+        )
+
+
 def get_hub_safe_version(repo_id: str, version: str, enforce_v2: bool = True) -> str:
     num_version = float(version.strip("v"))
     if num_version < 2 and enforce_v2:
@@ -131,30 +163,28 @@ def get_hub_safe_version(repo_id: str, version: str, enforce_v2: bool = True) ->
         return version
 
 
-def load_metadata(local_dir: Path) -> tuple[dict | list]:
-    """Loads metadata files from a dataset."""
-    info_path = local_dir / "meta/info.json"
-    episodes_path = local_dir / "meta/episodes.jsonl"
-    stats_path = local_dir / "meta/stats.json"
-    tasks_path = local_dir / "meta/tasks.jsonl"
+def load_info(local_dir: Path) -> dict:
+    with open(local_dir / INFO_PATH) as f:
+        return json.load(f)
 
-    with open(info_path) as f:
-        info = json.load(f)
 
-    with jsonlines.open(episodes_path, "r") as reader:
-        episode_dicts = list(reader)
-
-    with open(stats_path) as f:
+def load_stats(local_dir: Path) -> dict:
+    with open(local_dir / STATS_PATH) as f:
         stats = json.load(f)
+    stats = {key: torch.tensor(value) for key, value in flatten_dict(stats).items()}
+    return unflatten_dict(stats)
 
-    with jsonlines.open(tasks_path, "r") as reader:
+
+def load_tasks(local_dir: Path) -> dict:
+    with jsonlines.open(local_dir / TASKS_PATH, "r") as reader:
         tasks = list(reader)
 
-    stats = {key: torch.tensor(value) for key, value in flatten_dict(stats).items()}
-    stats = unflatten_dict(stats)
-    tasks = {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])}
+    return {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])}
 
-    return info, episode_dicts, stats, tasks
+
+def load_episode_dicts(local_dir: Path) -> dict:
+    with jsonlines.open(local_dir / EPISODES_PATH, "r") as reader:
+        return list(reader)
 
 
 def create_empty_dataset_info(codebase_version: str, fps: int, robot: Robot, use_videos: bool = True) -> dict:
@@ -229,7 +259,7 @@ def check_timestamps_sync(
         # Track original indices before masking
         original_indices = torch.arange(len(diffs))
         filtered_indices = original_indices[mask]
-        outside_tolerance_filtered_indices = torch.nonzero(~filtered_within_tolerance).squeeze()
+        outside_tolerance_filtered_indices = torch.nonzero(~filtered_within_tolerance)  # .squeeze()
         outside_tolerance_indices = filtered_indices[outside_tolerance_filtered_indices]
         episode_indices = torch.stack(hf_dataset["episode_index"])
 
diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py
index 6a606415..b5d634ba 100644
--- a/lerobot/common/datasets/video_utils.py
+++ b/lerobot/common/datasets/video_utils.py
@@ -126,8 +126,8 @@ def decode_video_frames_torchvision(
 
 
 def encode_video_frames(
-    imgs_dir: Path,
-    video_path: Path,
+    imgs_dir: Path | str,
+    video_path: Path | str,
     fps: int,
     vcodec: str = "libsvtav1",
     pix_fmt: str = "yuv420p",
diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
index 62d6760b..5bf427f4 100644
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -194,19 +194,17 @@ def record(
     pretrained_policy_name_or_path: str | None = None,
     policy_overrides: List[str] | None = None,
     fps: int | None = None,
-    warmup_time_s=2,
-    episode_time_s=10,
-    reset_time_s=5,
-    num_episodes=50,
-    video=True,
-    run_compute_stats=True,
-    push_to_hub=True,
-    tags=None,
-    num_image_writer_processes=0,
-    num_image_writer_threads_per_camera=4,
-    force_override=False,
-    display_cameras=True,
-    play_sounds=True,
+    warmup_time_s: int | float = 2,
+    episode_time_s: int | float = 10,
+    reset_time_s: int | float = 5,
+    num_episodes: int = 50,
+    video: bool = True,
+    run_compute_stats: bool = True,
+    push_to_hub: bool = True,
+    num_image_writer_processes: int = 0,
+    num_image_writer_threads_per_camera: int = 4,
+    display_cameras: bool = True,
+    play_sounds: bool = True,
 ) -> LeRobotDataset:
     # TODO(rcadene): Add option to record logs
     listener = None
@@ -234,12 +232,18 @@ def record(
 
     # Create empty dataset or load existing saved episodes
     sanity_check_dataset_name(repo_id, policy)
-    image_writer = ImageWriter(
-        write_dir=root,
-        num_processes=num_image_writer_processes,
-        num_threads=num_image_writer_threads_per_camera * robot.num_cameras,
+    if len(robot.cameras) > 0:
+        image_writer = ImageWriter(
+            write_dir=root,
+            num_processes=num_image_writer_processes,
+            num_threads=num_image_writer_threads_per_camera * robot.num_cameras,
+        )
+    else:
+        image_writer = None
+
+    dataset = LeRobotDataset.create(
+        repo_id, fps, robot, root=root, image_writer=image_writer, use_videos=video
     )
-    dataset = LeRobotDataset.create(repo_id, fps, robot, root=root, image_writer=image_writer)
 
     if not robot.is_connected:
         robot.connect()
@@ -307,8 +311,9 @@ def record(
     log_say("Stop recording", play_sounds, blocking=True)
     stop_recording(robot, listener, display_cameras)
 
-    logging.info("Waiting for image writer to terminate...")
-    dataset.image_writer.stop()
+    if dataset.image_writer is not None:
+        logging.info("Waiting for image writer to terminate...")
+        dataset.image_writer.stop()
 
     dataset.consolidate(run_compute_stats)
 
@@ -322,27 +327,28 @@ def record(
 
 @safe_disconnect
 def replay(
-    robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug", play_sounds=True
+    robot: Robot,
+    root: Path,
+    repo_id: str,
+    episode: int,
+    fps: int | None = None,
+    play_sounds: bool = True,
+    local_files_only: bool = True,
 ):
     # TODO(rcadene, aliberts): refactor with control_loop, once `dataset` is an instance of LeRobotDataset
     # TODO(rcadene): Add option to record logs
-    local_dir = Path(root) / repo_id
-    if not local_dir.exists():
-        raise ValueError(local_dir)
 
-    dataset = LeRobotDataset(repo_id, root=root)
-    items = dataset.hf_dataset.select_columns("action")
-    from_idx = dataset.episode_data_index["from"][episode].item()
-    to_idx = dataset.episode_data_index["to"][episode].item()
+    dataset = LeRobotDataset(repo_id, root=root, episodes=[episode], local_files_only=local_files_only)
+    actions = dataset.hf_dataset.select_columns("action")
 
     if not robot.is_connected:
         robot.connect()
 
     log_say("Replaying episode", play_sounds, blocking=True)
-    for idx in range(from_idx, to_idx):
+    for idx in range(dataset.num_samples):
         start_episode_t = time.perf_counter()
 
-        action = items[idx]["action"]
+        action = actions[idx]["action"]
         robot.send_action(action)
 
         dt_s = time.perf_counter() - start_episode_t

From ee52b8b7825764a286656fe2081ed096ad2d4d76 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 22 Oct 2024 20:07:11 +0200
Subject: [PATCH 41/59] Add channels to intelrealsense

---
 lerobot/common/robot_devices/cameras/intelrealsense.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lerobot/common/robot_devices/cameras/intelrealsense.py b/lerobot/common/robot_devices/cameras/intelrealsense.py
index 66c7fe5c..684774fa 100644
--- a/lerobot/common/robot_devices/cameras/intelrealsense.py
+++ b/lerobot/common/robot_devices/cameras/intelrealsense.py
@@ -168,6 +168,7 @@ class IntelRealSenseCameraConfig:
     width: int | None = None
     height: int | None = None
     color_mode: str = "rgb"
+    channels: int | None = None
     use_depth: bool = False
     force_hardware_reset: bool = True
     rotation: int | None = None
@@ -179,6 +180,8 @@ class IntelRealSenseCameraConfig:
                 f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
             )
 
+        self.channels = 3
+
         at_least_one_is_not_none = self.fps is not None or self.width is not None or self.height is not None
         at_least_one_is_none = self.fps is None or self.width is None or self.height is None
         if at_least_one_is_not_none and at_least_one_is_none:
@@ -254,6 +257,7 @@ class IntelRealSenseCamera:
         self.fps = config.fps
         self.width = config.width
         self.height = config.height
+        self.channels = config.channels
         self.color_mode = config.color_mode
         self.use_depth = config.use_depth
         self.force_hardware_reset = config.force_hardware_reset

From b46db7ea738f922458889a8ec2190782149043c3 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 22 Oct 2024 20:14:06 +0200
Subject: [PATCH 42/59] Fix tests

---
 lerobot/common/datasets/image_writer.py |   2 +-
 tests/test_control_robot.py             | 111 ++++++++++++------------
 2 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py
index 7bdefc64..b86a7cdf 100644
--- a/lerobot/common/datasets/image_writer.py
+++ b/lerobot/common/datasets/image_writer.py
@@ -120,7 +120,7 @@ class ImageWriter:
                 wait(self.futures, timeout=timeout)
                 progress_bar.update(len(self.futures))
         else:
-            self._stop_processes(self.processes, self.image_queue, timeout)
+            self._stop_processes(timeout)
 
     def _stop_processes(self, timeout) -> None:
         for _ in self.processes:
diff --git a/tests/test_control_robot.py b/tests/test_control_robot.py
index 2c0bca9b..efdba0d0 100644
--- a/tests/test_control_robot.py
+++ b/tests/test_control_robot.py
@@ -29,7 +29,6 @@ from unittest.mock import patch
 
 import pytest
 
-from lerobot.common.datasets.populate_dataset import add_frame, init_dataset
 from lerobot.common.logger import Logger
 from lerobot.common.policies.factory import make_policy
 from lerobot.common.utils.utils import init_hydra_config
@@ -91,8 +90,9 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):
         calibration_dir = Path(tmpdir) / robot_type
         overrides.append(f"calibration_dir={calibration_dir}")
 
-    root = Path(tmpdir) / "data"
     repo_id = "lerobot/debug"
+    root = Path(tmpdir) / "data" / repo_id
+    single_task = "Do something."
 
     robot = make_robot(robot_type, overrides=overrides, mock=mock)
     record(
@@ -100,6 +100,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):
         fps=30,
         root=root,
         repo_id=repo_id,
+        single_task=single_task,
         warmup_time_s=1,
         episode_time_s=1,
         num_episodes=2,
@@ -129,17 +130,18 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
     env_name = "koch_real"
     policy_name = "act_koch_real"
 
-    root = tmpdir / "data"
     repo_id = "lerobot/debug"
-    eval_repo_id = "lerobot/eval_debug"
+    root = tmpdir / "data" / repo_id
+    single_task = "Do something."
 
     robot = make_robot(robot_type, overrides=overrides, mock=mock)
     dataset = record(
         robot,
         root,
         repo_id,
-        fps=1,
-        warmup_time_s=1,
+        single_task,
+        fps=5,
+        warmup_time_s=0.5,
         episode_time_s=1,
         reset_time_s=1,
         num_episodes=2,
@@ -150,10 +152,10 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
         display_cameras=False,
         play_sounds=False,
     )
-    assert dataset.num_episodes == 2
-    assert len(dataset) == 2
+    assert dataset.total_episodes == 2
+    assert len(dataset) == 10
 
-    replay(robot, episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False)
+    replay(robot, episode=0, fps=5, root=root, repo_id=repo_id, play_sounds=False)
 
     # TODO(rcadene, aliberts): rethink this design
     if robot_type == "aloha":
@@ -216,10 +218,14 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
     else:
         num_image_writer_processes = 0
 
-    record(
+    eval_repo_id = "lerobot/eval_debug"
+    eval_root = tmpdir / "data" / eval_repo_id
+
+    dataset = record(
         robot,
-        root,
+        eval_root,
         eval_repo_id,
+        single_task,
         pretrained_policy_name_or_path,
         warmup_time_s=1,
         episode_time_s=1,
@@ -255,13 +261,15 @@ def test_resume_record(tmpdir, request, robot_type, mock):
 
     robot = make_robot(robot_type, overrides=overrides, mock=mock)
 
-    root = Path(tmpdir) / "data"
     repo_id = "lerobot/debug"
+    root = Path(tmpdir) / "data" / repo_id
+    single_task = "Do something."
 
     dataset = record(
         robot,
         root,
         repo_id,
+        single_task,
         fps=1,
         warmup_time_s=0,
         episode_time_s=1,
@@ -274,32 +282,33 @@ def test_resume_record(tmpdir, request, robot_type, mock):
     )
     assert len(dataset) == 1, "`dataset` should contain only 1 frame"
 
-    init_dataset_return_value = {}
+    # init_dataset_return_value = {}
 
-    def wrapped_init_dataset(*args, **kwargs):
-        nonlocal init_dataset_return_value
-        init_dataset_return_value = init_dataset(*args, **kwargs)
-        return init_dataset_return_value
+    # def wrapped_init_dataset(*args, **kwargs):
+    #     nonlocal init_dataset_return_value
+    #     init_dataset_return_value = init_dataset(*args, **kwargs)
+    #     return init_dataset_return_value
 
-    with patch("lerobot.scripts.control_robot.init_dataset", wraps=wrapped_init_dataset):
-        dataset = record(
-            robot,
-            root,
-            repo_id,
-            fps=1,
-            warmup_time_s=0,
-            episode_time_s=1,
-            num_episodes=2,
-            push_to_hub=False,
-            video=False,
-            display_cameras=False,
-            play_sounds=False,
-            run_compute_stats=False,
-        )
-        assert len(dataset) == 2, "`dataset` should contain only 1 frame"
-        assert (
-            init_dataset_return_value["num_episodes"] == 2
-        ), "`init_dataset` should load the previous episode"
+    # with patch("lerobot.scripts.control_robot.init_dataset", wraps=wrapped_init_dataset):
+    dataset = record(
+        robot,
+        root,
+        repo_id,
+        single_task,
+        fps=1,
+        warmup_time_s=0,
+        episode_time_s=1,
+        num_episodes=2,
+        push_to_hub=False,
+        video=False,
+        display_cameras=False,
+        play_sounds=False,
+        run_compute_stats=False,
+    )
+    assert len(dataset) == 2, "`dataset` should contain only 1 frame"
+    # assert (
+    #     init_dataset_return_value["num_episodes"] == 2
+    # ), "`init_dataset` should load the previous episode"
 
 
 @pytest.mark.parametrize("robot_type, mock", [("koch", True)])
@@ -317,23 +326,22 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
         overrides = []
 
     robot = make_robot(robot_type, overrides=overrides, mock=mock)
-    with (
-        patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener,
-        patch("lerobot.common.robot_devices.control_utils.add_frame", wraps=add_frame) as mock_add_frame,
-    ):
+    with patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener:
         mock_events = {}
         mock_events["exit_early"] = True
         mock_events["rerecord_episode"] = True
         mock_events["stop_recording"] = False
         mock_listener.return_value = (None, mock_events)
 
-        root = Path(tmpdir) / "data"
         repo_id = "lerobot/debug"
+        root = Path(tmpdir) / "data" / repo_id
+        single_task = "Do something."
 
         dataset = record(
             robot,
             root,
             repo_id,
+            single_task,
             fps=1,
             warmup_time_s=0,
             episode_time_s=1,
@@ -347,7 +355,6 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
 
         assert not mock_events["rerecord_episode"], "`rerecord_episode` wasn't properly reset to False"
         assert not mock_events["exit_early"], "`exit_early` wasn't properly reset to False"
-        assert mock_add_frame.call_count == 2, "`add_frame` should have been called 2 times"
         assert len(dataset) == 1, "`dataset` should contain only 1 frame"
 
 
@@ -366,23 +373,22 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
         overrides = []
 
     robot = make_robot(robot_type, overrides=overrides, mock=mock)
-    with (
-        patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener,
-        patch("lerobot.common.robot_devices.control_utils.add_frame", wraps=add_frame) as mock_add_frame,
-    ):
+    with patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener:
         mock_events = {}
         mock_events["exit_early"] = True
         mock_events["rerecord_episode"] = False
         mock_events["stop_recording"] = False
         mock_listener.return_value = (None, mock_events)
 
-        root = Path(tmpdir) / "data"
         repo_id = "lerobot/debug"
+        root = Path(tmpdir) / "data" / repo_id
+        single_task = "Do something."
 
         dataset = record(
             robot,
             fps=2,
             root=root,
+            single_task=single_task,
             repo_id=repo_id,
             warmup_time_s=0,
             episode_time_s=1,
@@ -395,7 +401,6 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
         )
 
         assert not mock_events["exit_early"], "`exit_early` wasn't properly reset to False"
-        assert mock_add_frame.call_count == 1, "`add_frame` should have been called 1 time"
         assert len(dataset) == 1, "`dataset` should contain only 1 frame"
 
 
@@ -416,23 +421,22 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num
         overrides = []
 
     robot = make_robot(robot_type, overrides=overrides, mock=mock)
-    with (
-        patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener,
-        patch("lerobot.common.robot_devices.control_utils.add_frame", wraps=add_frame) as mock_add_frame,
-    ):
+    with patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener:
         mock_events = {}
         mock_events["exit_early"] = True
         mock_events["rerecord_episode"] = False
         mock_events["stop_recording"] = True
         mock_listener.return_value = (None, mock_events)
 
-        root = Path(tmpdir) / "data"
         repo_id = "lerobot/debug"
+        root = Path(tmpdir) / "data" / repo_id
+        single_task = "Do something."
 
         dataset = record(
             robot,
             root,
             repo_id,
+            single_task=single_task,
             fps=1,
             warmup_time_s=0,
             episode_time_s=1,
@@ -446,5 +450,4 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num
         )
 
         assert not mock_events["exit_early"], "`exit_early` wasn't properly reset to False"
-        assert mock_add_frame.call_count == 1, "`add_frame` should have been called 1 time"
         assert len(dataset) == 1, "`dataset` should contain only 1 frame"

From 6c2cb6e10737e797f71420922c763235026b3b22 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 22 Oct 2024 20:21:26 +0200
Subject: [PATCH 43/59] Remove populate dataset

---
 lerobot/common/datasets/populate_dataset.py | 318 --------------------
 1 file changed, 318 deletions(-)
 delete mode 100644 lerobot/common/datasets/populate_dataset.py

diff --git a/lerobot/common/datasets/populate_dataset.py b/lerobot/common/datasets/populate_dataset.py
deleted file mode 100644
index 854b639e..00000000
--- a/lerobot/common/datasets/populate_dataset.py
+++ /dev/null
@@ -1,318 +0,0 @@
-"""Functions to create an empty dataset, and populate it with frames."""
-# TODO(rcadene, aliberts): to adapt as class methods of next version of LeRobotDataset
-
-import json
-import logging
-import shutil
-
-import torch
-import tqdm
-
-from lerobot.common.datasets.compute_stats import compute_stats
-from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
-from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import to_hf_dataset
-from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, get_default_encoding
-from lerobot.common.datasets.utils import calculate_episode_data_index, create_branch
-from lerobot.common.datasets.video_utils import encode_video_frames
-from lerobot.common.utils.utils import log_say
-from lerobot.scripts.push_dataset_to_hub import (
-    push_dataset_card_to_hub,
-    push_meta_data_to_hub,
-    push_videos_to_hub,
-    save_meta_data,
-)
-
-########################################################################################
-# Functions to initialize, resume and populate a dataset
-########################################################################################
-
-
-# def init_dataset(
-#     repo_id,
-#     root,
-#     force_override,
-#     fps,
-#     video,
-#     write_images,
-#     num_image_writer_processes,
-#     num_image_writer_threads,
-# ):
-#     local_dir = Path(root) / repo_id
-#     if local_dir.exists() and force_override:
-#         shutil.rmtree(local_dir)
-
-#     episodes_dir = local_dir / "episodes"
-#     episodes_dir.mkdir(parents=True, exist_ok=True)
-
-#     videos_dir = local_dir / "videos"
-#     videos_dir.mkdir(parents=True, exist_ok=True)
-
-#     # Logic to resume data recording
-#     rec_info_path = episodes_dir / "data_recording_info.json"
-#     if rec_info_path.exists():
-#         with open(rec_info_path) as f:
-#             rec_info = json.load(f)
-#         num_episodes = rec_info["last_episode_index"] + 1
-#     else:
-#         num_episodes = 0
-
-#     dataset = {
-#         "repo_id": repo_id,
-#         "local_dir": local_dir,
-#         "videos_dir": videos_dir,
-#         "episodes_dir": episodes_dir,
-#         "fps": fps,
-#         "video": video,
-#         "rec_info_path": rec_info_path,
-#         "num_episodes": num_episodes,
-#     }
-
-#     if write_images:
-#         # Initialize processes or/and threads dedicated to save images on disk asynchronously,
-#         # which is critical to control a robot and record data at a high frame rate.
-#         image_writer = start_image_writer(
-#             num_processes=num_image_writer_processes,
-#             num_threads=num_image_writer_threads,
-#         )
-#         dataset["image_writer"] = image_writer
-
-#     return dataset
-
-
-# def add_frame(dataset, observation, action):
-#     if "current_episode" not in dataset:
-#         # initialize episode dictionary
-#         ep_dict = {}
-#         for key in observation:
-#             if key not in ep_dict:
-#                 ep_dict[key] = []
-#         for key in action:
-#             if key not in ep_dict:
-#                 ep_dict[key] = []
-
-#         ep_dict["episode_index"] = []
-#         ep_dict["frame_index"] = []
-#         ep_dict["timestamp"] = []
-#         ep_dict["next.done"] = []
-
-#         dataset["current_episode"] = ep_dict
-#         dataset["current_frame_index"] = 0
-
-#     ep_dict = dataset["current_episode"]
-#     episode_index = dataset["num_episodes"]
-#     frame_index = dataset["current_frame_index"]
-#     videos_dir = dataset["videos_dir"]
-#     video = dataset["video"]
-#     fps = dataset["fps"]
-
-#     ep_dict["episode_index"].append(episode_index)
-#     ep_dict["frame_index"].append(frame_index)
-#     ep_dict["timestamp"].append(frame_index / fps)
-#     ep_dict["next.done"].append(False)
-
-#     img_keys = [key for key in observation if "image" in key]
-#     non_img_keys = [key for key in observation if "image" not in key]
-
-#     # Save all observed modalities except images
-#     for key in non_img_keys:
-#         ep_dict[key].append(observation[key])
-
-#     # Save actions
-#     for key in action:
-#         ep_dict[key].append(action[key])
-
-#     if "image_writer" not in dataset:
-#         dataset["current_frame_index"] += 1
-#         return
-
-#     # Save images
-#     image_writer = dataset["image_writer"]
-#     for key in img_keys:
-#         imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
-#         async_save_image(
-#             image_writer,
-#             image=observation[key],
-#             key=key,
-#             frame_index=frame_index,
-#             episode_index=episode_index,
-#             videos_dir=str(videos_dir),
-#         )
-
-#         if video:
-#             fname = f"{key}_episode_{episode_index:06d}.mp4"
-#             frame_info = {"path": f"videos/{fname}", "timestamp": frame_index / fps}
-#         else:
-#             frame_info = str(imgs_dir / f"frame_{frame_index:06d}.png")
-
-#         ep_dict[key].append(frame_info)
-
-#     dataset["current_frame_index"] += 1
-
-
-def delete_current_episode(dataset):
-    del dataset["current_episode"]
-    del dataset["current_frame_index"]
-
-    # delete temporary images
-    episode_index = dataset["num_episodes"]
-    videos_dir = dataset["videos_dir"]
-    for tmp_imgs_dir in videos_dir.glob(f"*_episode_{episode_index:06d}"):
-        shutil.rmtree(tmp_imgs_dir)
-
-
-def save_current_episode(dataset):
-    episode_index = dataset["num_episodes"]
-    ep_dict = dataset["current_episode"]
-    episodes_dir = dataset["episodes_dir"]
-    rec_info_path = dataset["rec_info_path"]
-
-    ep_dict["next.done"][-1] = True
-
-    for key in ep_dict:
-        if "observation" in key and "image" not in key:
-            ep_dict[key] = torch.stack(ep_dict[key])
-
-    ep_dict["action"] = torch.stack(ep_dict["action"])
-    ep_dict["episode_index"] = torch.tensor(ep_dict["episode_index"])
-    ep_dict["frame_index"] = torch.tensor(ep_dict["frame_index"])
-    ep_dict["timestamp"] = torch.tensor(ep_dict["timestamp"])
-    ep_dict["next.done"] = torch.tensor(ep_dict["next.done"])
-
-    ep_path = episodes_dir / f"episode_{episode_index}.pth"
-    torch.save(ep_dict, ep_path)
-
-    rec_info = {
-        "last_episode_index": episode_index,
-    }
-    with open(rec_info_path, "w") as f:
-        json.dump(rec_info, f)
-
-    # force re-initialization of episode dictionnary during add_frame
-    del dataset["current_episode"]
-
-    dataset["num_episodes"] += 1
-
-
-def encode_videos(dataset, image_keys, play_sounds):
-    log_say("Encoding videos", play_sounds)
-
-    num_episodes = dataset["num_episodes"]
-    videos_dir = dataset["videos_dir"]
-    local_dir = dataset["local_dir"]
-    fps = dataset["fps"]
-
-    # Use ffmpeg to convert frames stored as png into mp4 videos
-    for episode_index in tqdm.tqdm(range(num_episodes)):
-        for key in image_keys:
-            # key = f"observation.images.{name}"
-            tmp_imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
-            fname = f"{key}_episode_{episode_index:06d}.mp4"
-            video_path = local_dir / "videos" / fname
-            if video_path.exists():
-                # Skip if video is already encoded. Could be the case when resuming data recording.
-                continue
-            # note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
-            # since video encoding with ffmpeg is already using multithreading.
-            encode_video_frames(tmp_imgs_dir, video_path, fps, overwrite=True)
-            shutil.rmtree(tmp_imgs_dir)
-
-
-def from_dataset_to_lerobot_dataset(dataset, play_sounds):
-    log_say("Consolidate episodes", play_sounds)
-
-    num_episodes = dataset["num_episodes"]
-    episodes_dir = dataset["episodes_dir"]
-    videos_dir = dataset["videos_dir"]
-    video = dataset["video"]
-    fps = dataset["fps"]
-    repo_id = dataset["repo_id"]
-
-    ep_dicts = []
-    for episode_index in tqdm.tqdm(range(num_episodes)):
-        ep_path = episodes_dir / f"episode_{episode_index}.pth"
-        ep_dict = torch.load(ep_path)
-        ep_dicts.append(ep_dict)
-    data_dict = concatenate_episodes(ep_dicts)
-
-    if video:
-        image_keys = [key for key in data_dict if "image" in key]
-        encode_videos(dataset, image_keys, play_sounds)
-
-    hf_dataset = to_hf_dataset(data_dict, video)
-    episode_data_index = calculate_episode_data_index(hf_dataset)
-
-    info = {
-        "codebase_version": CODEBASE_VERSION,
-        "fps": fps,
-        "video": video,
-    }
-    if video:
-        info["encoding"] = get_default_encoding()
-
-    lerobot_dataset = LeRobotDataset.from_preloaded(
-        repo_id=repo_id,
-        hf_dataset=hf_dataset,
-        episode_data_index=episode_data_index,
-        info=info,
-        videos_dir=videos_dir,
-    )
-
-    return lerobot_dataset
-
-
-def save_lerobot_dataset_on_disk(lerobot_dataset):
-    hf_dataset = lerobot_dataset.hf_dataset
-    info = lerobot_dataset.info
-    stats = lerobot_dataset.stats
-    episode_data_index = lerobot_dataset.episode_data_index
-    local_dir = lerobot_dataset.videos_dir.parent
-    meta_data_dir = local_dir / "meta_data"
-
-    hf_dataset = hf_dataset.with_format(None)  # to remove transforms that cant be saved
-    hf_dataset.save_to_disk(str(local_dir / "train"))
-
-    save_meta_data(info, stats, episode_data_index, meta_data_dir)
-
-
-def push_lerobot_dataset_to_hub(lerobot_dataset, tags):
-    hf_dataset = lerobot_dataset.hf_dataset
-    local_dir = lerobot_dataset.videos_dir.parent
-    videos_dir = lerobot_dataset.videos_dir
-    repo_id = lerobot_dataset.repo_id
-    video = lerobot_dataset.video
-    meta_data_dir = local_dir / "meta_data"
-
-    if not (local_dir / "train").exists():
-        raise ValueError(
-            "You need to run `save_lerobot_dataset_on_disk(lerobot_dataset)` before pushing to the hub."
-        )
-
-    hf_dataset.push_to_hub(repo_id, revision="main")
-    push_meta_data_to_hub(repo_id, meta_data_dir, revision="main")
-    push_dataset_card_to_hub(repo_id, revision="main", tags=tags)
-    if video:
-        push_videos_to_hub(repo_id, videos_dir, revision="main")
-    create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
-
-
-def create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds):
-    if "image_writer" in dataset:
-        logging.info("Waiting for image writer to terminate...")
-        image_writer = dataset["image_writer"]
-        image_writer.stop()
-
-    lerobot_dataset = from_dataset_to_lerobot_dataset(dataset, play_sounds)
-
-    if run_compute_stats:
-        log_say("Computing dataset statistics", play_sounds)
-        lerobot_dataset.stats = compute_stats(lerobot_dataset)
-    else:
-        logging.info("Skipping computation of the dataset statistics")
-        lerobot_dataset.stats = {}
-
-    save_lerobot_dataset_on_disk(lerobot_dataset)
-
-    if push_to_hub:
-        push_lerobot_dataset_to_hub(lerobot_dataset, tags)
-
-    return lerobot_dataset

From 237a484be0704160ca32ade58eb07b2eed0db5fb Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Tue, 22 Oct 2024 22:46:34 +0200
Subject: [PATCH 44/59] Fix paths & add add_frame doc

---
 lerobot/common/datasets/image_writer.py    | 13 ++++-----
 lerobot/common/datasets/lerobot_dataset.py | 34 +++++++++++-----------
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py
index b86a7cdf..09f803e2 100644
--- a/lerobot/common/datasets/image_writer.py
+++ b/lerobot/common/datasets/image_writer.py
@@ -99,19 +99,16 @@ class ImageWriter:
         img = Image.fromarray(image.numpy())
         img.save(str(file_path), quality=100)
 
-    def get_image_file_path(
-        self, episode_index: int, image_key: str, frame_index: int, return_str: bool = True
-    ) -> str | Path:
+    def get_image_file_path(self, episode_index: int, image_key: str, frame_index: int) -> Path:
         fpath = self.image_path.format(
             image_key=image_key, episode_index=episode_index, frame_index=frame_index
         )
-        return str(self.dir / fpath) if return_str else self.dir / fpath
+        return self.dir / fpath
 
-    def get_episode_dir(self, episode_index: int, image_key: str, return_str: bool = True) -> str | Path:
-        dir_path = self.get_image_file_path(
-            episode_index=episode_index, image_key=image_key, frame_index=0, return_str=False
+    def get_episode_dir(self, episode_index: int, image_key: str) -> Path:
+        return self.get_image_file_path(
+            episode_index=episode_index, image_key=image_key, frame_index=0
         ).parent
-        return str(dir_path) if return_str else dir_path
 
     def stop(self, timeout=20) -> None:
         """Stop the image writer, waiting for all processes or threads to finish."""
diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index ad5a37cf..1f01d9f0 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -271,10 +271,10 @@ class LeRobotDataset(torch.utils.data.Dataset):
         files = None
         ignore_patterns = None if download_videos else "videos/"
         if self.episodes is not None:
-            files = [self.get_data_file_path(ep_idx) for ep_idx in self.episodes]
+            files = [str(self.get_data_file_path(ep_idx)) for ep_idx in self.episodes]
             if len(self.video_keys) > 0 and download_videos:
                 video_files = [
-                    self.get_video_file_path(ep_idx, vid_key)
+                    str(self.get_video_file_path(ep_idx, vid_key))
                     for vid_key in self.video_keys
                     for ep_idx in self.episodes
                 ]
@@ -288,23 +288,21 @@ class LeRobotDataset(torch.utils.data.Dataset):
             path = str(self.root / "data")
             hf_dataset = load_dataset("parquet", data_dir=path, split="train")
         else:
-            files = [self.get_data_file_path(ep_idx) for ep_idx in self.episodes]
+            files = [str(self.root / self.get_data_file_path(ep_idx)) for ep_idx in self.episodes]
             hf_dataset = load_dataset("parquet", data_files=files, split="train")
 
         hf_dataset.set_transform(hf_transform_to_torch)
         return hf_dataset
 
-    def get_data_file_path(self, ep_index: int, return_str: bool = True) -> str | Path:
+    def get_data_file_path(self, ep_index: int) -> Path:
         ep_chunk = self.get_episode_chunk(ep_index)
-        fpath = self.data_path.format(
+        return self.data_path.format(
             episode_chunk=ep_chunk, episode_index=ep_index, total_episodes=self.total_episodes
         )
-        return str(self.root / fpath) if return_str else self.root / fpath
 
-    def get_video_file_path(self, ep_index: int, vid_key: str, return_str: bool = True) -> str | Path:
+    def get_video_file_path(self, ep_index: int, vid_key: str) -> Path:
         ep_chunk = self.get_episode_chunk(ep_index)
-        fpath = self.videos_path.format(episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_index)
-        return str(self.root / fpath) if return_str else self.root / fpath
+        return self.videos_path.format(episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_index)
 
     def get_episode_chunk(self, ep_index: int) -> int:
         ep_chunk = ep_index // self.chunks_size
@@ -554,6 +552,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
         }
 
     def add_frame(self, frame: dict) -> None:
+        """
+        This function only adds the frame to the episode_buffer. Apart from images — which are written in a
+        temporary directory — nothing is written to disk. To save those frames, the 'add_episode()' method
+        then needs to be called.
+        """
         frame_index = self.episode_buffer["size"]
         self.episode_buffer["frame_index"].append(frame_index)
         self.episode_buffer["timestamp"].append(frame_index / self.fps)
@@ -571,10 +574,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # Save images
         for cam_key in self.camera_keys:
             img_path = self.image_writer.get_image_file_path(
-                episode_index=self.episode_buffer["episode_index"],
-                image_key=cam_key,
-                frame_index=frame_index,
-                return_str=False,
+                episode_index=self.episode_buffer["episode_index"], image_key=cam_key, frame_index=frame_index
             )
             if frame_index == 0:
                 img_path.parent.mkdir(parents=True, exist_ok=True)
@@ -632,7 +632,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         features = self.features
         ep_dataset = datasets.Dataset.from_dict(self.episode_buffer, features=features, split="train")
         ep_table = ep_dataset._data.table
-        ep_data_path = self.get_data_file_path(ep_index=episode_index, return_str=False)
+        ep_data_path = self.root / self.get_data_file_path(ep_index=episode_index)
         ep_data_path.parent.mkdir(parents=True, exist_ok=True)
         pq.write_table(ep_table, ep_data_path)
 
@@ -671,7 +671,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         episode_index = self.episode_buffer["episode_index"]
         if self.image_writer is not None:
             for cam_key in self.camera_keys:
-                img_dir = self.image_writer.get_episode_dir(episode_index, cam_key, return_str=False)
+                img_dir = self.image_writer.get_episode_dir(episode_index, cam_key)
                 if img_dir.is_dir():
                     shutil.rmtree(img_dir)
 
@@ -686,7 +686,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             current_file_name = self.data_path.replace("{total_episodes:05d}", "*")
             current_file_name = current_file_name.format(episode_chunk=ep_chunk, episode_index=ep_idx)
             current_file_name = list(self.root.glob(current_file_name))[0]
-            updated_file_name = self.get_data_file_path(ep_idx)
+            updated_file_name = self.root / self.get_data_file_path(ep_idx)
             current_file_name.rename(updated_file_name)
 
     def _remove_image_writer(self) -> None:
@@ -700,7 +700,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 # TODO: create video_buffer to store the state of encoded/unencoded videos and remove the need
                 # to call self.image_writer here
                 tmp_imgs_dir = self.image_writer.get_episode_dir(episode_index, key)
-                video_path = self.get_video_file_path(episode_index, key, return_str=False)
+                video_path = self.root / self.get_video_file_path(episode_index, key)
                 if video_path.is_file():
                     # Skip if video is already encoded. Could be the case when resuming data recording.
                     continue

From c72dc23c437c4bcd46b2b3b044064f160e57c724 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 00:03:30 +0200
Subject: [PATCH 45/59] Remove total_episodes from default parquet path

---
 lerobot/common/datasets/lerobot_dataset.py | 20 ++++----------------
 lerobot/common/datasets/utils.py           |  6 +++---
 2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 1f01d9f0..acde3b92 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -296,13 +296,13 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
     def get_data_file_path(self, ep_index: int) -> Path:
         ep_chunk = self.get_episode_chunk(ep_index)
-        return self.data_path.format(
-            episode_chunk=ep_chunk, episode_index=ep_index, total_episodes=self.total_episodes
-        )
+        fpath = self.data_path.format(episode_chunk=ep_chunk, episode_index=ep_index)
+        return Path(fpath)
 
     def get_video_file_path(self, ep_index: int, vid_key: str) -> Path:
         ep_chunk = self.get_episode_chunk(ep_index)
-        return self.videos_path.format(episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_index)
+        fpath = self.videos_path.format(episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_index)
+        return Path(fpath)
 
     def get_episode_chunk(self, ep_index: int) -> int:
         ep_chunk = ep_index // self.chunks_size
@@ -678,17 +678,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # Reset the buffer
         self.episode_buffer = self._create_episode_buffer()
 
-    def _update_data_file_names(self) -> None:
-        # TODO(aliberts): remove the need for this hack by removing total_episodes part in data file names.
-        # Must first investigate if this doesn't break hub/datasets features like viewer etc.
-        for ep_idx in range(self.total_episodes):
-            ep_chunk = self.get_episode_chunk(ep_idx)
-            current_file_name = self.data_path.replace("{total_episodes:05d}", "*")
-            current_file_name = current_file_name.format(episode_chunk=ep_chunk, episode_index=ep_idx)
-            current_file_name = list(self.root.glob(current_file_name))[0]
-            updated_file_name = self.root / self.get_data_file_path(ep_idx)
-            current_file_name.rename(updated_file_name)
-
     def _remove_image_writer(self) -> None:
         if self.image_writer is not None:
             self.image_writer = None
@@ -710,7 +699,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 shutil.rmtree(tmp_imgs_dir)
 
     def consolidate(self, run_compute_stats: bool = True) -> None:
-        self._update_data_file_names()
         self.hf_dataset = self.load_hf_dataset()
         self.episode_data_index = get_episode_data_index(self.episodes, self.episode_dicts)
         check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s)
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index 8625808e..aa9c0c04 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -37,9 +37,8 @@ STATS_PATH = "meta/stats.json"
 TASKS_PATH = "meta/tasks.jsonl"
 
 DEFAULT_VIDEO_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
-DEFAULT_PARQUET_PATH = (
-    "data/chunk-{episode_chunk:03d}/train-{episode_index:05d}-of-{total_episodes:05d}.parquet"
-)
+DEFAULT_PARQUET_PATH = "data/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.parquet"
+
 DATASET_CARD_TEMPLATE = """
 ---
 # Metadata will go there
@@ -88,6 +87,7 @@ def write_json(data: dict, fpath: Path) -> None:
 
 
 def append_jsonl(data: dict, fpath: Path) -> None:
+    fpath.parent.mkdir(exist_ok=True, parents=True)
     with jsonlines.open(fpath, "a") as writer:
         writer.write(data)
 

From c3c0141738d133546022632f4a29ed27dd7c87c2 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 00:05:31 +0200
Subject: [PATCH 46/59] Update & fix conversion script

---
 .../datasets/v2/convert_dataset_v1_to_v2.py   | 30 ++++++++++++-------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
index 65a2061e..7ab5ae14 100644
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@@ -124,19 +124,26 @@ from lerobot.common.datasets.utils import (
     DEFAULT_CHUNK_SIZE,
     DEFAULT_PARQUET_PATH,
     DEFAULT_VIDEO_PATH,
+    EPISODES_PATH,
+    INFO_PATH,
+    STATS_PATH,
+    TASKS_PATH,
     create_branch,
     create_lerobot_dataset_card,
     flatten_dict,
     get_hub_safe_version,
     unflatten_dict,
 )
+from lerobot.common.datasets.video_utils import VideoFrame  # noqa: F401
 from lerobot.common.utils.utils import init_hydra_config
 
 V16 = "v1.6"
 V20 = "v2.0"
 
 GITATTRIBUTES_REF = "aliberts/gitattributes_reference"
-VIDEO_FILE = "{video_key}_episode_{episode_index:06d}.mp4"
+V1_VIDEO_FILE = "{video_key}_episode_{episode_index:06d}.mp4"
+V1_INFO_PATH = "meta_data/info.json"
+V1_STATS_PATH = "meta_data/stats.safetensors"
 
 
 def parse_robot_config(config_path: Path, config_overrides: list[str] | None = None) -> tuple[str, dict]:
@@ -180,17 +187,18 @@ def write_json(data: dict, fpath: Path) -> None:
 
 
 def write_jsonlines(data: dict, fpath: Path) -> None:
+    fpath.parent.mkdir(exist_ok=True, parents=True)
     with jsonlines.open(fpath, "w") as writer:
         writer.write_all(data)
 
 
-def convert_stats_to_json(input_dir: Path, output_dir: Path) -> None:
-    safetensor_path = input_dir / "stats.safetensors"
+def convert_stats_to_json(v1_dir: Path, v2_dir: Path) -> None:
+    safetensor_path = v1_dir / V1_STATS_PATH
     stats = load_file(safetensor_path)
     serialized_stats = {key: value.tolist() for key, value in stats.items()}
     serialized_stats = unflatten_dict(serialized_stats)
 
-    json_path = output_dir / "stats.json"
+    json_path = v2_dir / STATS_PATH
     json_path.parent.mkdir(exist_ok=True, parents=True)
     with open(json_path, "w") as f:
         json.dump(serialized_stats, f, indent=4)
@@ -279,7 +287,7 @@ def split_parquet_by_episodes(
             ep_table = table.filter(pc.equal(table["episode_index"], ep_idx))
             episode_lengths.insert(ep_idx, len(ep_table))
             output_file = output_dir / DEFAULT_PARQUET_PATH.format(
-                episode_chunk=ep_chunk, episode_index=ep_idx, total_episodes=total_episodes
+                episode_chunk=ep_chunk, episode_index=ep_idx
             )
             pq.write_table(ep_table, output_file)
 
@@ -336,7 +344,7 @@ def move_videos(
                 target_path = DEFAULT_VIDEO_PATH.format(
                     episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_idx
                 )
-                video_file = VIDEO_FILE.format(video_key=vid_key, episode_index=ep_idx)
+                video_file = V1_VIDEO_FILE.format(video_key=vid_key, episode_index=ep_idx)
                 if len(video_dirs) == 1:
                     video_path = video_dirs[0] / video_file
                 else:
@@ -572,7 +580,7 @@ def convert_dataset(
         branch = test_branch
         create_branch(repo_id=repo_id, branch=test_branch, repo_type="dataset")
 
-    metadata_v1 = load_json(v1x_dir / "meta_data" / "info.json")
+    metadata_v1 = load_json(v1x_dir / V1_INFO_PATH)
     dataset = datasets.load_dataset("parquet", data_dir=v1x_dir / "data", split="train")
     keys = get_keys(dataset)
 
@@ -611,7 +619,7 @@ def convert_dataset(
 
     assert set(tasks) == {task for ep_tasks in tasks_by_episodes.values() for task in ep_tasks}
     tasks = [{"task_index": task_idx, "task": task} for task_idx, task in enumerate(tasks)]
-    write_jsonlines(tasks, v20_dir / "meta" / "tasks.json")
+    write_jsonlines(tasks, v20_dir / TASKS_PATH)
 
     # Shapes
     sequence_shapes = {key: dataset.features[key].length for key in keys["sequence"]}
@@ -667,7 +675,7 @@ def convert_dataset(
         {"episode_index": ep_idx, "tasks": tasks_by_episodes[ep_idx], "length": episode_lengths[ep_idx]}
         for ep_idx in episode_indices
     ]
-    write_jsonlines(episodes, v20_dir / "meta" / "episodes.jsonl")
+    write_jsonlines(episodes, v20_dir / EPISODES_PATH)
 
     # Assemble metadata v2.0
     metadata_v2_0 = {
@@ -689,8 +697,8 @@ def convert_dataset(
         "names": names,
         "videos": videos_info,
     }
-    write_json(metadata_v2_0, v20_dir / "meta" / "info.json")
-    convert_stats_to_json(v1x_dir / "meta_data", v20_dir / "meta")
+    write_json(metadata_v2_0, v20_dir / INFO_PATH)
+    convert_stats_to_json(v1x_dir, v20_dir)
 
     with contextlib.suppress(EntryNotFoundError):
         hub_api.delete_folder(repo_id=repo_id, path_in_repo="data", repo_type="dataset", revision=branch)

From 9dca233d7eb4d2e7842256a0068c36904ae5816f Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 00:27:14 +0200
Subject: [PATCH 47/59] Fix episode chunk

---
 lerobot/common/datasets/lerobot_dataset.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index acde3b92..014d2783 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -305,10 +305,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         return Path(fpath)
 
     def get_episode_chunk(self, ep_index: int) -> int:
-        ep_chunk = ep_index // self.chunks_size
-        if ep_index > 0 and ep_index % self.chunks_size == 0:
-            ep_chunk -= 1
-        return ep_chunk
+        return ep_index // self.chunks_size
 
     @property
     def data_path(self) -> str:

From fb73cdb9a4ca9d03aeddd8777c16d108b81c898d Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 00:32:28 +0200
Subject: [PATCH 48/59] Update dataset doc

---
 lerobot/common/datasets/lerobot_dataset.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 014d2783..9721cd62 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -109,14 +109,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
         .
         ├── data
         │   ├── chunk-000
-        │   │   ├── train-00000-of-03603.parquet
-        │   │   ├── train-00001-of-03603.parquet
-        │   │   ├── train-00002-of-03603.parquet
+        │   │   ├── episode_000000.parquet
+        │   │   ├── episode_000001.parquet
+        │   │   ├── episode_000002.parquet
         │   │   └── ...
         │   ├── chunk-001
-        │   │   ├── train-01000-of-03603.parquet
-        │   │   ├── train-01001-of-03603.parquet
-        │   │   ├── train-01002-of-03603.parquet
+        │   │   ├── episode_001000.parquet
+        │   │   ├── episode_001001.parquet
+        │   │   ├── episode_001002.parquet
         │   │   └── ...
         │   └── ...
         ├── meta

From a2a8538ac97407d3fa65da4842a33ddbd7f78e82 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 11:38:07 +0200
Subject: [PATCH 49/59] add write_stats, changes names, add some typing

---
 lerobot/common/datasets/factory.py         |  3 +-
 lerobot/common/datasets/image_writer.py    |  5 +--
 lerobot/common/datasets/lerobot_dataset.py | 36 ++++++++++------------
 lerobot/common/datasets/utils.py           | 10 ++++--
 lerobot/scripts/control_robot.py           |  5 ++-
 5 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/lerobot/common/datasets/factory.py b/lerobot/common/datasets/factory.py
index 96a353fb..04b6e57b 100644
--- a/lerobot/common/datasets/factory.py
+++ b/lerobot/common/datasets/factory.py
@@ -91,9 +91,9 @@ def make_dataset(cfg, split: str = "train") -> LeRobotDataset | MultiLeRobotData
         )
 
     if isinstance(cfg.dataset_repo_id, str):
+        # TODO (aliberts): add 'episodes' arg from config after removing hydra
         dataset = LeRobotDataset(
             cfg.dataset_repo_id,
-            split=split,
             delta_timestamps=cfg.training.get("delta_timestamps"),
             image_transforms=image_transforms,
             video_backend=cfg.video_backend,
@@ -101,7 +101,6 @@ def make_dataset(cfg, split: str = "train") -> LeRobotDataset | MultiLeRobotData
     else:
         dataset = MultiLeRobotDataset(
             cfg.dataset_repo_id,
-            split=split,
             delta_timestamps=cfg.training.get("delta_timestamps"),
             image_transforms=image_transforms,
             video_backend=cfg.video_backend,
diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py
index 09f803e2..0900d910 100644
--- a/lerobot/common/datasets/image_writer.py
+++ b/lerobot/common/datasets/image_writer.py
@@ -21,7 +21,7 @@ import torch
 import tqdm
 from PIL import Image
 
-DEFAULT_IMAGE_PATH = "images/{image_key}/episode_{episode_index:06d}/frame_{frame_index:06d}.png"
+DEFAULT_IMAGE_PATH = "{image_key}/episode_{episode_index:06d}/frame_{frame_index:06d}.png"
 
 
 def safe_stop_image_writer(func):
@@ -54,7 +54,8 @@ class ImageWriter:
     """
 
     def __init__(self, write_dir: Path, num_processes: int = 0, num_threads: int = 1):
-        self.dir = write_dir
+        self.dir = write_dir / "images"
+        self.dir.mkdir(parents=True, exist_ok=True)
         self.image_path = DEFAULT_IMAGE_PATH
         self.num_processes = num_processes
         self.num_threads = self.num_threads_per_process = num_threads
diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 9721cd62..0c62756e 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -33,6 +33,7 @@ from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.utils import (
     EPISODES_PATH,
     INFO_PATH,
+    STATS_PATH,
     TASKS_PATH,
     append_jsonl,
     check_delta_timestamps,
@@ -40,7 +41,6 @@ from lerobot.common.datasets.utils import (
     check_version_compatibility,
     create_branch,
     create_empty_dataset_info,
-    flatten_dict,
     get_delta_indices,
     get_episode_data_index,
     get_hub_safe_version,
@@ -49,8 +49,8 @@ from lerobot.common.datasets.utils import (
     load_info,
     load_stats,
     load_tasks,
-    unflatten_dict,
     write_json,
+    write_stats,
 )
 from lerobot.common.datasets.video_utils import (
     VideoFrame,
@@ -227,11 +227,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
         """Codebase version used to create this dataset."""
         return self.info["codebase_version"]
 
-    def push_to_repo(self, push_videos: bool = True) -> None:
+    def push_to_hub(self, push_videos: bool = True) -> None:
         if not self.consolidated:
             raise RuntimeError(
                 "You are trying to upload to the hub a LeRobotDataset that has not been consolidated yet."
-                "Please use the '.consolidate()' method first."
+                "Please call the dataset 'consolidate()' method first."
             )
         ignore_patterns = ["images/"]
         if not push_videos:
@@ -675,7 +675,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # Reset the buffer
         self.episode_buffer = self._create_episode_buffer()
 
-    def _remove_image_writer(self) -> None:
+    def read_mode(self) -> None:
+        """Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first."""
+        # TODO(aliberts, rcadene): find better api/interface for this.
         if self.image_writer is not None:
             self.image_writer = None
 
@@ -693,9 +695,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
                 # note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
                 # since video encoding with ffmpeg is already using multithreading.
                 encode_video_frames(tmp_imgs_dir, video_path, self.fps, overwrite=True)
-                shutil.rmtree(tmp_imgs_dir)
 
-    def consolidate(self, run_compute_stats: bool = True) -> None:
+    def consolidate(self, run_compute_stats: bool = True, keep_image_files: bool = False) -> None:
         self.hf_dataset = self.load_hf_dataset()
         self.episode_data_index = get_episode_data_index(self.episodes, self.episode_dicts)
         check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s)
@@ -703,14 +704,13 @@ class LeRobotDataset(torch.utils.data.Dataset):
         if len(self.video_keys) > 0:
             self.encode_videos()
 
+        if not keep_image_files:
+            shutil.rmtree(self.image_writer.dir)
+
         if run_compute_stats:
-            logging.info("Computing dataset statistics")
-            self._remove_image_writer()
+            self.read_mode()
             self.stats = compute_stats(self)
-            serialized_stats = flatten_dict(self.stats)
-            serialized_stats = {key: value.tolist() for key, value in serialized_stats.items()}
-            serialized_stats = unflatten_dict(serialized_stats)
-            write_json(serialized_stats, self.root / "meta/stats.json")
+            write_stats(self.stats, self.root / STATS_PATH)
             self.consolidated = True
         else:
             logging.warning("Skipping computation of the dataset statistics.")
@@ -784,8 +784,8 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
     def __init__(
         self,
         repo_ids: list[str],
-        root: Path | None = LEROBOT_HOME,
-        split: str = "train",
+        root: Path | None = None,
+        episodes: dict | None = None,
         image_transforms: Callable | None = None,
         delta_timestamps: dict[list[float]] | None = None,
         video_backend: str | None = None,
@@ -797,8 +797,8 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
         self._datasets = [
             LeRobotDataset(
                 repo_id,
-                root=root,
-                split=split,
+                root=root / repo_id if root is not None else None,
+                episodes=episodes[repo_id] if episodes is not None else None,
                 delta_timestamps=delta_timestamps,
                 image_transforms=image_transforms,
                 video_backend=video_backend,
@@ -834,7 +834,6 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
             self.disabled_data_keys.update(extra_keys)
 
         self.root = root
-        self.split = split
         self.image_transforms = image_transforms
         self.delta_timestamps = delta_timestamps
         self.stats = aggregate_stats(self._datasets)
@@ -948,7 +947,6 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):
         return (
             f"{self.__class__.__name__}(\n"
             f"  Repository IDs: '{self.repo_ids}',\n"
-            f"  Split: '{self.split}',\n"
             f"  Number of Samples: {self.num_samples},\n"
             f"  Number of Episodes: {self.num_episodes},\n"
             f"  Type: {'video (.mp4)' if self.video else 'image (.png)'},\n"
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index aa9c0c04..394723c0 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -48,7 +48,7 @@ This dataset was created using [LeRobot](https://github.com/huggingface/lerobot)
 """
 
 
-def flatten_dict(d, parent_key="", sep="/"):
+def flatten_dict(d: dict, parent_key: str = "", sep: str = "/") -> dict:
     """Flatten a nested dictionary structure by collapsing nested keys into one key with a separator.
 
     For example:
@@ -67,7 +67,7 @@ def flatten_dict(d, parent_key="", sep="/"):
     return dict(items)
 
 
-def unflatten_dict(d, sep="/"):
+def unflatten_dict(d: dict, sep: str = "/") -> dict:
     outdict = {}
     for key, value in d.items():
         parts = key.split(sep)
@@ -92,6 +92,12 @@ def append_jsonl(data: dict, fpath: Path) -> None:
         writer.write(data)
 
 
+def write_stats(stats: dict[str, torch.Tensor | dict], fpath: Path) -> None:
+    serialized_stats = {key: value.tolist() for key, value in flatten_dict(stats).items()}
+    serialized_stats = unflatten_dict(serialized_stats)
+    write_json(serialized_stats, fpath)
+
+
 def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
     """Get a transform function that convert items from Hugging Face dataset (pyarrow)
     to torch tensors. Importantly, images are converted from PIL, which corresponds to
diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
index 5bf427f4..9ef50ced 100644
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -315,11 +315,14 @@ def record(
         logging.info("Waiting for image writer to terminate...")
         dataset.image_writer.stop()
 
+    if run_compute_stats:
+        logging.info("Computing dataset statistics")
+
     dataset.consolidate(run_compute_stats)
 
     # lerobot_dataset = create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds)
     if push_to_hub:
-        dataset.push_to_repo()
+        dataset.push_to_hub()
 
     log_say("Exiting", play_sounds)
     return dataset

From 7ae8d05326430398517b342cef35e8baf545b62b Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 14:20:27 +0200
Subject: [PATCH 50/59] Fix visualization

---
 lerobot/scripts/visualize_dataset_html.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/lerobot/scripts/visualize_dataset_html.py b/lerobot/scripts/visualize_dataset_html.py
index d9d153a0..ec7e4b1f 100644
--- a/lerobot/scripts/visualize_dataset_html.py
+++ b/lerobot/scripts/visualize_dataset_html.py
@@ -97,14 +97,13 @@ def run_server(
             "num_episodes": dataset.num_episodes,
             "fps": dataset.fps,
         }
-        video_paths = get_episode_video_paths(dataset, episode_id)
-        language_instruction = get_episode_language_instruction(dataset, episode_id)
+        video_paths = [dataset.get_video_file_path(episode_id, key) for key in dataset.video_keys]
+        tasks = dataset.episode_dicts[episode_id]["tasks"]
         videos_info = [
-            {"url": url_for("static", filename=video_path), "filename": Path(video_path).name}
+            {"url": url_for("static", filename=video_path), "filename": video_path.name}
             for video_path in video_paths
         ]
-        if language_instruction:
-            videos_info[0]["language_instruction"] = language_instruction
+        videos_info[0]["language_instruction"] = tasks
 
         ep_csv_url = url_for("static", filename=get_ep_csv_fname(episode_id))
         return render_template(
@@ -137,10 +136,10 @@ def write_episode_data_csv(output_dir, file_name, episode_index, dataset):
     # init header of csv with state and action names
     header = ["timestamp"]
     if has_state:
-        dim_state = len(dataset.hf_dataset["observation.state"][0])
+        dim_state = dataset.shapes["observation.state"]
         header += [f"state_{i}" for i in range(dim_state)]
     if has_action:
-        dim_action = len(dataset.hf_dataset["action"][0])
+        dim_action = dataset.shapes["action"]
         header += [f"action_{i}" for i in range(dim_action)]
 
     columns = ["timestamp"]
@@ -171,7 +170,7 @@ def get_episode_video_paths(dataset: LeRobotDataset, ep_index: int) -> list[str]
     # get first frame of episode (hack to get video_path of the episode)
     first_frame_idx = dataset.episode_data_index["from"][ep_index].item()
     return [
-        dataset.hf_dataset.select_columns(key)[first_frame_idx][key]["path"] for key in dataset.camera_keys
+        dataset.hf_dataset.select_columns(key)[first_frame_idx][key]["path"] for key in dataset.video_keys
     ]
 
 
@@ -203,8 +202,8 @@ def visualize_dataset_html(
 
     dataset = LeRobotDataset(repo_id, root=root)
 
-    if not dataset.video:
-        raise NotImplementedError(f"Image datasets ({dataset.video=}) are currently not supported.")
+    if len(dataset.image_keys) > 0:
+        raise NotImplementedError(f"Image keys ({dataset.image_keys=}) are currently not supported.")
 
     if output_dir is None:
         output_dir = f"outputs/visualize_dataset_html/{repo_id}"
@@ -224,7 +223,7 @@ def visualize_dataset_html(
     static_dir.mkdir(parents=True, exist_ok=True)
     ln_videos_dir = static_dir / "videos"
     if not ln_videos_dir.exists():
-        ln_videos_dir.symlink_to(dataset.videos_dir.resolve())
+        ln_videos_dir.symlink_to((dataset.root / "videos").resolve())
 
     template_dir = Path(__file__).resolve().parent.parent / "templates"
 

From b8bdbc1c5be93364541a0f627b4e7c21be3742b5 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 18:17:56 +0200
Subject: [PATCH 51/59] Fix check_delta_timestamps

---
 lerobot/common/datasets/utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index 394723c0..ccb57197 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -298,10 +298,11 @@ def check_delta_timestamps(
     """
     outside_tolerance = {}
     for key, delta_ts in delta_timestamps.items():
-        abs_delta_ts = torch.abs(torch.tensor(delta_ts))
-        within_tolerance = (abs_delta_ts % (1 / fps)) <= tolerance_s
-        if not torch.all(within_tolerance):
-            outside_tolerance[key] = torch.tensor(delta_ts)[~within_tolerance]
+        within_tolerance = [abs(ts * fps - round(ts * fps)) <= tolerance_s for ts in delta_ts]
+        if not all(within_tolerance):
+            outside_tolerance[key] = [
+                ts for ts, is_within in zip(delta_ts, within_tolerance, strict=True) if not is_within
+            ]
 
     if len(outside_tolerance) > 0:
         if raise_value_error:

From 07570f867f03b2dfef58b46e9b63153507769f96 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 18:18:28 +0200
Subject: [PATCH 52/59] Fix _query_videos return shapes

---
 lerobot/common/datasets/lerobot_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 0c62756e..b5e18964 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -487,7 +487,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             frames = decode_video_frames_torchvision(
                 video_path, query_ts, self.tolerance_s, self.video_backend
             )
-            item[vid_key] = frames
+            item[vid_key] = frames.squeeze(0)
 
         return item
 

From 1aba80d93fef70846aba438d0ef4ad089bd9c9de Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 18:45:59 +0200
Subject: [PATCH 53/59] Fix consolidate

---
 lerobot/common/datasets/lerobot_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index b5e18964..6caec09c 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -704,7 +704,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         if len(self.video_keys) > 0:
             self.encode_videos()
 
-        if not keep_image_files:
+        if not keep_image_files and self.image_writer is not None:
             shutil.rmtree(self.image_writer.dir)
 
         if run_compute_stats:

From 0098bd264ec073cb97bab3916755a97143698451 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 20:55:54 +0200
Subject: [PATCH 54/59] Nits

---
 lerobot/common/datasets/lerobot_dataset.py | 4 ++--
 lerobot/scripts/control_robot.py           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 6caec09c..b32e1008 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -389,7 +389,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
 
     @property
     def features(self) -> datasets.Features:
-        """Shapes for the different features."""
+        """Features of the hf_dataset."""
         if self.hf_dataset is not None:
             return self.hf_dataset.features
         elif self.episode_buffer is None:
@@ -664,7 +664,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.episode_dicts.append(episode_dict)
         append_jsonl(episode_dict, self.root / EPISODES_PATH)
 
-    def delete_episode(self) -> None:
+    def clear_episode_buffer(self) -> None:
         episode_index = self.episode_buffer["episode_index"]
         if self.image_writer is not None:
             for cam_key in self.camera_keys:
diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
index 9ef50ced..1185db20 100644
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -299,7 +299,7 @@ def record(
             log_say("Re-record episode", play_sounds)
             events["rerecord_episode"] = False
             events["exit_early"] = False
-            dataset.delete_episode()
+            dataset.clear_episode_buffer()
             continue
 
         dataset.add_episode(task)

From 0d77be90ee0871b16fbfbbe10f9024aae4ba83a8 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Wed, 23 Oct 2024 23:12:44 +0200
Subject: [PATCH 55/59] Move ImageWriter creation inside the dataset

---
 lerobot/common/datasets/lerobot_dataset.py | 54 +++++++++++++++-------
 lerobot/scripts/control_robot.py           | 18 +++-----
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index b32e1008..6a1d3719 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -177,11 +177,13 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.episodes = episodes
         self.tolerance_s = tolerance_s
         self.video_backend = video_backend if video_backend is not None else "pyav"
-        self.image_writer = image_writer
         self.delta_indices = None
-        self.consolidated = True
-        self.episode_buffer = {}
         self.local_files_only = local_files_only
+        self.consolidated = True
+
+        # Unused attributes
+        self.image_writer = None
+        self.episode_buffer = {}
 
         # Load metadata
         self.root.mkdir(exist_ok=True, parents=True)
@@ -626,8 +628,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.consolidated = False
 
     def _save_episode_table(self, episode_index: int) -> None:
-        features = self.features
-        ep_dataset = datasets.Dataset.from_dict(self.episode_buffer, features=features, split="train")
+        ep_dataset = datasets.Dataset.from_dict(self.episode_buffer, features=self.features, split="train")
         ep_table = ep_dataset._data.table
         ep_data_path = self.root / self.get_data_file_path(ep_index=episode_index)
         ep_data_path.parent.mkdir(parents=True, exist_ok=True)
@@ -675,10 +676,25 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # Reset the buffer
         self.episode_buffer = self._create_episode_buffer()
 
-    def read_mode(self) -> None:
-        """Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first."""
-        # TODO(aliberts, rcadene): find better api/interface for this.
+    def start_image_writter(self, num_processes: int = 0, num_threads: int = 1) -> None:
+        if isinstance(self.image_writer, ImageWriter):
+            logging.warning(
+                "You are starting a new ImageWriter that is replacing an already exising one in the dataset."
+            )
+
+        self.image_writer = ImageWriter(
+            write_dir=self.root,
+            num_processes=num_processes,
+            num_threads=num_threads,
+        )
+
+    def stop_image_writter(self) -> None:
+        """
+        Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to
+        remove the image_write in order for the LeRobotDataset object to be pickleable and parallelized.
+        """
         if self.image_writer is not None:
+            self.image_writer.stop()
             self.image_writer = None
 
     def encode_videos(self) -> None:
@@ -708,20 +724,20 @@ class LeRobotDataset(torch.utils.data.Dataset):
             shutil.rmtree(self.image_writer.dir)
 
         if run_compute_stats:
-            self.read_mode()
+            self.stop_image_writter()
             self.stats = compute_stats(self)
             write_stats(self.stats, self.root / STATS_PATH)
             self.consolidated = True
         else:
-            logging.warning("Skipping computation of the dataset statistics.")
+            logging.warning(
+                "Skipping computation of the dataset statistics, dataset is not fully consolidated."
+            )
 
         # TODO(aliberts)
         # Sanity checks:
         # - [ ] shapes
         # - [ ] ep_lenghts
         # - [ ] number of files
-        # - [ ] names of files (e.g. parquet 00000-of-00001 and 00001-of-00002)
-        # - [ ] no remaining self.image_writer.dir
 
     @classmethod
     def create(
@@ -731,7 +747,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
         robot: Robot,
         root: Path | None = None,
         tolerance_s: float = 1e-4,
-        image_writer: ImageWriter | None = None,
+        image_writer_processes: int = 0,
+        image_writer_threads_per_camera: int = 0,
         use_videos: bool = True,
         video_backend: str | None = None,
     ) -> "LeRobotDataset":
@@ -740,7 +757,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
         obj.repo_id = repo_id
         obj.root = root if root is not None else LEROBOT_HOME / repo_id
         obj.tolerance_s = tolerance_s
-        obj.image_writer = image_writer
 
         if not all(cam.fps == fps for cam in robot.cameras.values()):
             logging.warning(
@@ -755,20 +771,24 @@ class LeRobotDataset(torch.utils.data.Dataset):
         # TODO(aliberts, rcadene, alexander-soare): Merge this with OnlineBuffer/DataBuffer
         obj.episode_buffer = obj._create_episode_buffer()
 
+        obj.image_writer = None
+        if len(robot.cameras) > 0 and (image_writer_processes or image_writer_threads_per_camera):
+            obj.start_image_writter(
+                image_writer_processes, image_writer_threads_per_camera * robot.num_cameras
+            )
+
         # This bool indicates that the current LeRobotDataset instance is in sync with the files on disk. It
         # is used to know when certain operations are need (for instance, computing dataset statistics). In
         # order to be able to push the dataset to the hub, it needs to be consolidated first by calling
         # self.consolidate().
         obj.consolidated = True
 
-        obj.local_files_only = True
-        obj.download_videos = False
-
         obj.episodes = None
         obj.hf_dataset = None
         obj.image_transforms = None
         obj.delta_timestamps = None
         obj.delta_indices = None
+        obj.local_files_only = True
         obj.episode_data_index = None
         obj.video_backend = video_backend if video_backend is not None else "pyav"
         return obj
diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
index 1185db20..02975148 100644
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -105,7 +105,6 @@ from pathlib import Path
 from typing import List
 
 # from safetensors.torch import load_file, save_file
-from lerobot.common.datasets.image_writer import ImageWriter
 from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.common.robot_devices.control_utils import (
     control_loop,
@@ -232,17 +231,14 @@ def record(
 
     # Create empty dataset or load existing saved episodes
     sanity_check_dataset_name(repo_id, policy)
-    if len(robot.cameras) > 0:
-        image_writer = ImageWriter(
-            write_dir=root,
-            num_processes=num_image_writer_processes,
-            num_threads=num_image_writer_threads_per_camera * robot.num_cameras,
-        )
-    else:
-        image_writer = None
-
     dataset = LeRobotDataset.create(
-        repo_id, fps, robot, root=root, image_writer=image_writer, use_videos=video
+        repo_id,
+        fps,
+        robot,
+        root=root,
+        image_writer_processes=num_image_writer_processes,
+        image_writer_threads_per_camera=num_image_writer_threads_per_camera,
+        use_videos=video,
     )
 
     if not robot.is_connected:

From 60865e8980034d5957472a94e6aab2bd3f366def Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 24 Oct 2024 00:13:21 +0200
Subject: [PATCH 56/59] Allow dataset creation without robot

---
 lerobot/common/datasets/image_writer.py    |  2 +-
 lerobot/common/datasets/lerobot_dataset.py | 51 ++++++++++++++++------
 lerobot/common/datasets/utils.py           | 36 +++++++++++----
 3 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py
index 0900d910..6801bc5d 100644
--- a/lerobot/common/datasets/image_writer.py
+++ b/lerobot/common/datasets/image_writer.py
@@ -54,7 +54,7 @@ class ImageWriter:
     """
 
     def __init__(self, write_dir: Path, num_processes: int = 0, num_threads: int = 1):
-        self.dir = write_dir / "images"
+        self.dir = write_dir
         self.dir.mkdir(parents=True, exist_ok=True)
         self.image_path = DEFAULT_IMAGE_PATH
         self.num_processes = num_processes
diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 6a1d3719..e95f53c9 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -35,6 +35,7 @@ from lerobot.common.datasets.utils import (
     INFO_PATH,
     STATS_PATH,
     TASKS_PATH,
+    _get_info_from_robot,
     append_jsonl,
     check_delta_timestamps,
     check_timestamps_sync,
@@ -683,7 +684,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             )
 
         self.image_writer = ImageWriter(
-            write_dir=self.root,
+            write_dir=self.root / "images",
             num_processes=num_processes,
             num_threads=num_threads,
         )
@@ -734,6 +735,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             )
 
         # TODO(aliberts)
+        # - [ ] add video info in info.json
         # Sanity checks:
         # - [ ] shapes
         # - [ ] ep_lenghts
@@ -744,8 +746,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
         cls,
         repo_id: str,
         fps: int,
-        robot: Robot,
         root: Path | None = None,
+        robot: Robot | None = None,
+        robot_type: str | None = None,
+        keys: list[str] | None = None,
+        image_keys: list[str] | None = None,
+        video_keys: list[str] = None,
+        shapes: dict | None = None,
+        names: dict | None = None,
         tolerance_s: float = 1e-4,
         image_writer_processes: int = 0,
         image_writer_threads_per_camera: int = 0,
@@ -757,26 +765,41 @@ class LeRobotDataset(torch.utils.data.Dataset):
         obj.repo_id = repo_id
         obj.root = root if root is not None else LEROBOT_HOME / repo_id
         obj.tolerance_s = tolerance_s
+        obj.image_writer = None
 
-        if not all(cam.fps == fps for cam in robot.cameras.values()):
-            logging.warning(
-                f"Some cameras in your {robot.robot_type} robot don't have an fps matching the fps of your dataset."
-                "In this case, frames from lower fps cameras will be repeated to fill in the blanks"
-            )
+        if robot is not None:
+            robot_type, keys, image_keys, video_keys, shapes, names = _get_info_from_robot(robot, use_videos)
+            if not all(cam.fps == fps for cam in robot.cameras.values()):
+                logging.warning(
+                    f"Some cameras in your {robot.robot_type} robot don't have an fps matching the fps of your dataset."
+                    "In this case, frames from lower fps cameras will be repeated to fill in the blanks"
+                )
+            if len(robot.cameras) > 0 and (image_writer_processes or image_writer_threads_per_camera):
+                obj.start_image_writter(
+                    image_writer_processes, image_writer_threads_per_camera * robot.num_cameras
+                )
+        elif (
+            robot_type is None
+            or keys is None
+            or image_keys is None
+            or video_keys is None
+            or shapes is None
+            or names is None
+        ):
+            raise ValueError()
+
+        if len(video_keys) > 0 and not use_videos:
+            raise ValueError
 
         obj.tasks, obj.stats, obj.episode_dicts = {}, {}, []
-        obj.info = create_empty_dataset_info(CODEBASE_VERSION, fps, robot, use_videos)
+        obj.info = create_empty_dataset_info(
+            CODEBASE_VERSION, fps, robot_type, keys, image_keys, video_keys, shapes, names
+        )
         write_json(obj.info, obj.root / INFO_PATH)
 
         # TODO(aliberts, rcadene, alexander-soare): Merge this with OnlineBuffer/DataBuffer
         obj.episode_buffer = obj._create_episode_buffer()
 
-        obj.image_writer = None
-        if len(robot.cameras) > 0 and (image_writer_processes or image_writer_threads_per_camera):
-            obj.start_image_writter(
-                image_writer_processes, image_writer_threads_per_camera * robot.num_cameras
-            )
-
         # This bool indicates that the current LeRobotDataset instance is in sync with the files on disk. It
         # is used to know when certain operations are need (for instance, computing dataset statistics). In
         # order to be able to push the dataset to the hub, it needs to be consolidated first by calling
diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py
index ccb57197..f2ce9b55 100644
--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@@ -193,7 +193,7 @@ def load_episode_dicts(local_dir: Path) -> dict:
         return list(reader)
 
 
-def create_empty_dataset_info(codebase_version: str, fps: int, robot: Robot, use_videos: bool = True) -> dict:
+def _get_info_from_robot(robot: Robot, use_videos: bool) -> tuple[list | dict]:
     shapes = {key: len(names) for key, names in robot.names.items()}
     camera_shapes = {}
     for key, cam in robot.cameras.items():
@@ -203,10 +203,30 @@ def create_empty_dataset_info(codebase_version: str, fps: int, robot: Robot, use
             "height": cam.height,
             "channels": cam.channels,
         }
+    keys = list(robot.names)
+    image_keys = [] if use_videos else list(camera_shapes)
+    video_keys = list(camera_shapes) if use_videos else []
+    shapes = {**shapes, **camera_shapes}
+    names = robot.names
+    robot_type = robot.robot_type
+
+    return robot_type, keys, image_keys, video_keys, shapes, names
+
+
+def create_empty_dataset_info(
+    codebase_version: str,
+    fps: int,
+    robot_type: str,
+    keys: list[str],
+    image_keys: list[str],
+    video_keys: list[str],
+    shapes: dict,
+    names: dict,
+) -> dict:
     return {
         "codebase_version": codebase_version,
         "data_path": DEFAULT_PARQUET_PATH,
-        "robot_type": robot.robot_type,
+        "robot_type": robot_type,
         "total_episodes": 0,
         "total_frames": 0,
         "total_tasks": 0,
@@ -215,12 +235,12 @@ def create_empty_dataset_info(codebase_version: str, fps: int, robot: Robot, use
         "chunks_size": DEFAULT_CHUNK_SIZE,
         "fps": fps,
         "splits": {},
-        "keys": list(robot.names),
-        "video_keys": list(camera_shapes) if use_videos else [],
-        "image_keys": [] if use_videos else list(camera_shapes),
-        "shapes": {**shapes, **camera_shapes},
-        "names": robot.names,
-        "videos": {"videos_path": DEFAULT_VIDEO_PATH} if use_videos else None,
+        "keys": keys,
+        "video_keys": video_keys,
+        "image_keys": image_keys,
+        "shapes": shapes,
+        "names": names,
+        "videos": {"videos_path": DEFAULT_VIDEO_PATH} if len(video_keys) > 0 else None,
     }
 
 

From 450eae310be9c4c4675573a51ef4408782754f3e Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 24 Oct 2024 00:13:53 +0200
Subject: [PATCH 57/59] Add error msg

---
 lerobot/common/datasets/lerobot_dataset.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index e95f53c9..513a931b 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -786,7 +786,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
             or shapes is None
             or names is None
         ):
-            raise ValueError()
+            raise ValueError(
+                "Dataset info (robot_type, keys, shapes...) must either come from a Robot or explicitly passed upon creation."
+            )
 
         if len(video_keys) > 0 and not use_videos:
             raise ValueError

From 615894d3fbfa3289ee019467f4ca590923fe9869 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 24 Oct 2024 11:37:44 +0200
Subject: [PATCH 58/59] Add test_same_attributes_defined

---
 tests/test_datasets.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 1316df78..56b25d6d 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -42,7 +42,27 @@ from lerobot.common.datasets.utils import (
     unflatten_dict,
 )
 from lerobot.common.utils.utils import init_hydra_config, seeded_context
-from tests.utils import DEFAULT_CONFIG_PATH, DEVICE
+from tests.utils import DEFAULT_CONFIG_PATH, DEVICE, make_robot
+
+TEST_REPO_ID = "aliberts/koch_tutorial"
+
+
+def test_same_attributes_defined():
+    # TODO(aliberts): test with keys, shapes, names etc. provided instead of robot
+    robot = make_robot("koch", mock=True)
+
+    # Instantiate both ways
+    dataset_init = LeRobotDataset(repo_id=TEST_REPO_ID)
+    dataset_create = LeRobotDataset.create(repo_id=TEST_REPO_ID, fps=30, robot=robot)
+
+    # Access the '_hub_version' cached_property in both instances to force its creation
+    _ = dataset_init._hub_version
+    _ = dataset_create._hub_version
+
+    init_attr = set(vars(dataset_init).keys())
+    create_attr = set(vars(dataset_create).keys())
+
+    assert init_attr == create_attr, "Attribute sets do not match between __init__ and .create()"
 
 
 @pytest.mark.parametrize(

From 8bcf81fa24ea07c23274898b6d475d319fa3489a Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 24 Oct 2024 11:38:32 +0200
Subject: [PATCH 59/59] Add todo

---
 tests/test_datasets.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 56b25d6d..02875d3b 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -44,6 +44,7 @@ from lerobot.common.datasets.utils import (
 from lerobot.common.utils.utils import init_hydra_config, seeded_context
 from tests.utils import DEFAULT_CONFIG_PATH, DEVICE, make_robot
 
+# TODO(aliberts): create proper test repo
 TEST_REPO_ID = "aliberts/koch_tutorial"