Loads episode_data_index and stats during dataset __init__ (#85)

Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
2024-04-23 14:13:25 +02:00
parent e2168163cd
commit 1030ea0070
89 changed files with 1008 additions and 432 deletions
--- a/examples/1_load_hugging_face_dataset.py
+++ b/examples/1_load_hugging_face_dataset.py
@@ -10,10 +10,13 @@ As an example, this script saves frames of episode number 5 of the PushT dataset
 This script supports several Hugging Face datasets, among which:
 1. [Pusht](https://huggingface.co/datasets/lerobot/pusht)
 2. [Xarm Lift Medium](https://huggingface.co/datasets/lerobot/xarm_lift_medium)
-3. [Aloha Sim Insertion Human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human)
-4. [Aloha Sim Insertion Scripted](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_scripted)
-5. [Aloha Sim Transfer Cube Human](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human)
-6. [Aloha Sim Transfer Cube Scripted](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_scripted)
+3. [Xarm Lift Medium Replay](https://huggingface.co/datasets/lerobot/xarm_lift_medium_replay)
+4. [Xarm Push Medium](https://huggingface.co/datasets/lerobot/xarm_push_medium)
+5. [Xarm Push Medium Replay](https://huggingface.co/datasets/lerobot/xarm_push_medium_replay)
+6. [Aloha Sim Insertion Human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human)
+7. [Aloha Sim Insertion Scripted](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_scripted)
+8. [Aloha Sim Transfer Cube Human](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human)
+9. [Aloha Sim Transfer Cube Scripted](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_scripted)

 To try a different Hugging Face dataset, you can replace this line:
 ```python
@@ -22,12 +25,16 @@ hf_dataset, fps = load_dataset("lerobot/pusht", split="train"), 10
 by one of these:
 ```python
 hf_dataset, fps = load_dataset("lerobot/xarm_lift_medium", split="train"), 15
+hf_dataset, fps = load_dataset("lerobot/xarm_lift_medium_replay", split="train"), 15
+hf_dataset, fps = load_dataset("lerobot/xarm_push_medium", split="train"), 15
+hf_dataset, fps = load_dataset("lerobot/xarm_push_medium_replay", split="train"), 15
 hf_dataset, fps = load_dataset("lerobot/aloha_sim_insertion_human", split="train"), 50
 hf_dataset, fps = load_dataset("lerobot/aloha_sim_insertion_scripted", split="train"), 50
 hf_dataset, fps = load_dataset("lerobot/aloha_sim_transfer_cube_human", split="train"), 50
 hf_dataset, fps = load_dataset("lerobot/aloha_sim_transfer_cube_scripted", split="train"), 50
 ```
 """
+# TODO(rcadene): remove this example file of using hf_dataset

 from pathlib import Path

@@ -37,19 +44,22 @@ from datasets import load_dataset
 # TODO(rcadene): list available datasets on lerobot page using `datasets`

 # download/load hugging face dataset in pyarrow format
-hf_dataset, fps = load_dataset("lerobot/pusht", revision="v1.0", split="train"), 10
+hf_dataset, fps = load_dataset("lerobot/pusht", split="train"), 10

 # display name of dataset and its features
+# TODO(rcadene): update to make the print pretty
 print(f"{hf_dataset=}")
 print(f"{hf_dataset.features=}")

 # display useful statistics about frames and episodes, which are sequences of frames from the same video
 print(f"number of frames: {len(hf_dataset)=}")
-print(f"number of episodes: {len(hf_dataset.unique('episode_id'))=}")
-print(f"average number of frames per episode: {len(hf_dataset) / len(hf_dataset.unique('episode_id')):.3f}")
+print(f"number of episodes: {len(hf_dataset.unique('episode_index'))=}")
+print(
+    f"average number of frames per episode: {len(hf_dataset) / len(hf_dataset.unique('episode_index')):.3f}"
+)

 # select the frames belonging to episode number 5
-hf_dataset = hf_dataset.filter(lambda frame: frame["episode_id"] == 5)
+hf_dataset = hf_dataset.filter(lambda frame: frame["episode_index"] == 5)

 # load all frames of episode 5 in RAM in PIL format
 frames = hf_dataset["observation.image"]
--- a/examples/2_load_lerobot_dataset.py
+++ b/examples/2_load_lerobot_dataset.py
@@ -18,7 +18,10 @@ dataset = PushtDataset()
 ```
 by one of these:
 ```python
-dataset = XarmDataset()
+dataset = XarmDataset("xarm_lift_medium")
+dataset = XarmDataset("xarm_lift_medium_replay")
+dataset = XarmDataset("xarm_push_medium")
+dataset = XarmDataset("xarm_push_medium_replay")
 dataset = AlohaDataset("aloha_sim_insertion_human")
 dataset = AlohaDataset("aloha_sim_insertion_scripted")
 dataset = AlohaDataset("aloha_sim_transfer_cube_human")
@@ -44,6 +47,7 @@ from lerobot.common.datasets.pusht import PushtDataset
 dataset = PushtDataset()

 # All LeRobot datasets are actually a thin wrapper around an underlying Hugging Face dataset  (see https://huggingface.co/docs/datasets/index for more information).
+# TODO(rcadene): update to make the print pretty
 print(f"{dataset=}")
 print(f"{dataset.hf_dataset=}")

@@ -55,13 +59,16 @@ print(f"frames per second used during data collection: {dataset.fps=}")
 print(f"keys to access images from cameras: {dataset.image_keys=}")

 # While the LeRobot dataset adds helpers for working within our library, we still expose the underling Hugging Face dataset. It may be freely replaced or modified in place. Here we use the filtering to keep only frames from episode 5.
-dataset.hf_dataset = dataset.hf_dataset.filter(lambda frame: frame["episode_id"] == 5)
+# TODO(rcadene): remove this example of accessing hf_dataset
+dataset.hf_dataset = dataset.hf_dataset.filter(lambda frame: frame["episode_index"] == 5)

-# LeRobot datsets actually subclass PyTorch datasets. So you can do everything you know and love from working with the latter, for example: iterating through the dataset. Here we grap all the image frames.
+# LeRobot datsets actually subclass PyTorch datasets. So you can do everything you know and love from working with the latter, for example: iterating through the dataset. Here we grab all the image frames.
 frames = [sample["observation.image"] for sample in dataset]

-# but frames are now channel first to follow pytorch convention,
-# to view them, we convert to channel last
+# but frames are now float32 range [0,1] channel first (c,h,w) to follow pytorch convention,
+# to view them, we convert to uint8 range [0,255]
+frames = [(frame * 255).type(torch.uint8) for frame in frames]
+# and to channel last (h,w,c)
 frames = [frame.permute((1, 2, 0)).numpy() for frame in frames]

 # and finally save them to a mp4 video