Add UMI-gripper dataset (#83)

Co-authored-by: Remi <re.cadene@gmail.com>
2024-04-28 18:41:07 +02:00
parent a4b6c5e3b1
commit 81e490d46f
11 changed files with 706 additions and 29 deletions
--- a/download_and_upload_dataset.py
+++ b/download_and_upload_dataset.py
@@ -30,10 +30,31 @@ def download_and_upload(root, revision, dataset_id):
        download_and_upload_xarm(root, revision, dataset_id)
    elif "aloha" in dataset_id:
        download_and_upload_aloha(root, revision, dataset_id)
+    elif "umi" in dataset_id:
+        download_and_upload_umi(root, revision, dataset_id)
    else:
        raise ValueError(dataset_id)


+def concatenate_episodes(ep_dicts):
+    data_dict = {}
+
+    keys = ep_dicts[0].keys()
+    for key in keys:
+        if torch.is_tensor(ep_dicts[0][key][0]):
+            data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
+        else:
+            if key not in data_dict:
+                data_dict[key] = []
+            for ep_dict in ep_dicts:
+                for x in ep_dict[key]:
+                    data_dict[key].append(x)
+
+    total_frames = data_dict["frame_index"].shape[0]
+    data_dict["index"] = torch.arange(0, total_frames, 1)
+    return data_dict
+
+
 def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
    import zipfile

@@ -62,25 +83,6 @@ def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
        return False


-def concatenate_episodes(ep_dicts):
-    data_dict = {}
-
-    keys = ep_dicts[0].keys()
-    for key in keys:
-        if torch.is_tensor(ep_dicts[0][key][0]):
-            data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
-        else:
-            if key not in data_dict:
-                data_dict[key] = []
-            for ep_dict in ep_dicts:
-                for x in ep_dict[key]:
-                    data_dict[key].append(x)
-
-    total_frames = data_dict["frame_index"].shape[0]
-    data_dict["index"] = torch.arange(0, total_frames, 1)
-    return data_dict
-
-
 def push_to_hub(hf_dataset, episode_data_index, info, stats, root, revision, dataset_id):
    # push to main to indicate latest version
    hf_dataset.push_to_hub(f"lerobot/{dataset_id}", token=True)
@@ -515,9 +517,9 @@ def download_and_upload_aloha(root, revision, dataset_id, fps=50):
        "episode_index": Value(dtype="int64", id=None),
        "frame_index": Value(dtype="int64", id=None),
        "timestamp": Value(dtype="float32", id=None),
-        #'next.reward': Value(dtype='float32', id=None),
+        # "next.reward": Value(dtype="float32", id=None),
        "next.done": Value(dtype="bool", id=None),
-        #'next.success': Value(dtype='bool', id=None),
+        # "next.success": Value(dtype="bool", id=None),
        "index": Value(dtype="int64", id=None),
    }
    features = Features(features)
@@ -531,10 +533,236 @@ def download_and_upload_aloha(root, revision, dataset_id, fps=50):
    push_to_hub(hf_dataset, episode_data_index, info, stats, root, revision, dataset_id)


+def download_and_upload_umi(root, revision, dataset_id, fps=10):
+    # fps is equal to 10 source:https://arxiv.org/pdf/2402.10329.pdf#table.caption.16
+    import os
+    import re
+    import shutil
+    from glob import glob
+
+    import numpy as np
+    import torch
+    import tqdm
+    import zarr
+    from datasets import Dataset, Features, Image, Sequence, Value
+
+    from lerobot.common.datasets._umi_imagecodecs_numcodecs import register_codecs
+
+    # NOTE: This is critical otherwise ValueError: codec not available: 'imagecodecs_jpegxl'
+    # will be raised
+    register_codecs()
+
+    url_cup_in_the_wild = "https://real.stanford.edu/umi/data/zarr_datasets/cup_in_the_wild.zarr.zip"
+    cup_in_the_wild_zarr = Path("umi/cup_in_the_wild/cup_in_the_wild.zarr")
+
+    root = Path(root)
+    raw_dir = root / f"{dataset_id}_raw"
+    zarr_path = (raw_dir / cup_in_the_wild_zarr).resolve()
+    if not zarr_path.is_dir():
+        raw_dir.mkdir(parents=True, exist_ok=True)
+        download_and_extract_zip(url_cup_in_the_wild, zarr_path)
+    zarr_data = zarr.open(zarr_path, mode="r")
+
+    # We process the image data separately because it is too large to fit in memory
+    end_pose = torch.from_numpy(zarr_data["data/robot0_demo_end_pose"][:])
+    start_pos = torch.from_numpy(zarr_data["data/robot0_demo_start_pose"][:])
+    eff_pos = torch.from_numpy(zarr_data["data/robot0_eef_pos"][:])
+    eff_rot_axis_angle = torch.from_numpy(zarr_data["data/robot0_eef_rot_axis_angle"][:])
+    gripper_width = torch.from_numpy(zarr_data["data/robot0_gripper_width"][:])
+
+    states_pos = torch.cat([eff_pos, eff_rot_axis_angle], dim=1)
+    states = torch.cat([states_pos, gripper_width], dim=1)
+
+    def get_episode_idxs(episode_ends: np.ndarray) -> np.ndarray:
+        # Optimized and simplified version of this function: https://github.com/real-stanford/universal_manipulation_interface/blob/298776ce251f33b6b3185a98d6e7d1f9ad49168b/diffusion_policy/common/replay_buffer.py#L374
+        from numba import jit
+
+        @jit(nopython=True)
+        def _get_episode_idxs(episode_ends):
+            result = np.zeros((episode_ends[-1],), dtype=np.int64)
+            start_idx = 0
+            for episode_number, end_idx in enumerate(episode_ends):
+                result[start_idx:end_idx] = episode_number
+                start_idx = end_idx
+            return result
+
+        return _get_episode_idxs(episode_ends)
+
+    episode_ends = zarr_data["meta/episode_ends"][:]
+    num_episodes: int = episode_ends.shape[0]
+
+    episode_ids = torch.from_numpy(get_episode_idxs(episode_ends))
+
+    # We convert it in torch tensor later because the jit function does not support torch tensors
+    episode_ends = torch.from_numpy(episode_ends)
+
+    ep_dicts = []
+    episode_data_index = {"from": [], "to": []}
+    id_from = 0
+
+    for episode_id in tqdm.tqdm(range(num_episodes)):
+        id_to = episode_ends[episode_id]
+
+        num_frames = id_to - id_from
+
+        assert (
+            episode_ids[id_from:id_to] == episode_id
+        ).all(), f"episode_ids[{id_from}:{id_to}] != {episode_id}"
+
+        state = states[id_from:id_to]
+        ep_dict = {
+            # observation.image will be filled later
+            "observation.state": state,
+            "episode_index": torch.tensor([episode_id] * num_frames, dtype=torch.int),
+            "frame_index": torch.arange(0, num_frames, 1),
+            "timestamp": torch.arange(0, num_frames, 1) / fps,
+            "episode_data_index_from": torch.tensor([id_from] * num_frames),
+            "episode_data_index_to": torch.tensor([id_from + num_frames] * num_frames),
+            "end_pose": end_pose[id_from:id_to],
+            "start_pos": start_pos[id_from:id_to],
+            "gripper_width": gripper_width[id_from:id_to],
+        }
+        ep_dicts.append(ep_dict)
+        episode_data_index["from"].append(id_from)
+        episode_data_index["to"].append(id_from + num_frames)
+        id_from += num_frames
+
+    data_dict = concatenate_episodes(ep_dicts)
+
+    total_frames = id_from
+    data_dict["index"] = torch.arange(0, total_frames, 1)
+
+    print("Saving images to disk in temporary folder...")
+    # datasets.Image() can take a list of paths to images, so we save the images to a temporary folder
+    # to avoid loading them all in memory
+    _umi_save_images_concurrently(zarr_data, "tmp_umi_images", max_workers=12)
+    print("Saving images to disk in temporary folder... Done")
+
+    # Sort files by number eg. 1.png, 2.png, 3.png, 9.png, 10.png instead of 1.png, 10.png, 2.png, 3.png, 9.png
+    # to correctly match the images with the data
+    images_path = sorted(glob("tmp_umi_images/*"), key=lambda x: int(re.search(r"(\d+)\.png$", x).group(1)))
+    data_dict["observation.image"] = images_path
+
+    features = {
+        "observation.image": Image(),
+        "observation.state": Sequence(
+            length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
+        ),
+        "episode_index": Value(dtype="int64", id=None),
+        "frame_index": Value(dtype="int64", id=None),
+        "timestamp": Value(dtype="float32", id=None),
+        "index": Value(dtype="int64", id=None),
+        "episode_data_index_from": Value(dtype="int64", id=None),
+        "episode_data_index_to": Value(dtype="int64", id=None),
+        # `start_pos` and `end_pos` respectively represent the positions of the end-effector
+        # at the beginning and the end of the episode.
+        # `gripper_width` indicates the distance between the grippers, and this value is included
+        # in the state vector, which comprises the concatenation of the end-effector position
+        # and gripper width.
+        "end_pose": Sequence(length=data_dict["end_pose"].shape[1], feature=Value(dtype="float32", id=None)),
+        "start_pos": Sequence(
+            length=data_dict["start_pos"].shape[1], feature=Value(dtype="float32", id=None)
+        ),
+        "gripper_width": Sequence(
+            length=data_dict["gripper_width"].shape[1], feature=Value(dtype="float32", id=None)
+        ),
+    }
+    features = Features(features)
+    hf_dataset = Dataset.from_dict(data_dict, features=features)
+    hf_dataset.set_transform(hf_transform_to_torch)
+
+    info = {
+        "fps": fps,
+    }
+    stats = compute_stats(hf_dataset)
+    push_to_hub(
+        hf_dataset=hf_dataset,
+        episode_data_index=episode_data_index,
+        info=info,
+        stats=stats,
+        root=root,
+        revision=revision,
+        dataset_id=dataset_id,
+    )
+    # Cleanup
+    if os.path.exists("tmp_umi_images"):
+        print("Removing temporary images folder")
+        shutil.rmtree("tmp_umi_images")
+        print("Cleanup done")
+
+
+def _umi_clear_folder(folder_path: str):
+    import os
+
+    """
+    Clears all the content of the specified folder. Creates the folder if it does not exist.
+
+    Args:
+    folder_path (str): Path to the folder to clear.
+
+    Examples:
+    >>> import os
+    >>> os.makedirs('example_folder', exist_ok=True)
+    >>> with open('example_folder/temp_file.txt', 'w') as f:
+    ...     f.write('example')
+    >>> clear_folder('example_folder')
+    >>> os.listdir('example_folder')
+    []
+    """
+    if os.path.exists(folder_path):
+        for filename in os.listdir(folder_path):
+            file_path = os.path.join(folder_path, filename)
+            try:
+                if os.path.isfile(file_path) or os.path.islink(file_path):
+                    os.unlink(file_path)
+                elif os.path.isdir(file_path):
+                    shutil.rmtree(file_path)
+            except Exception as e:
+                print(f"Failed to delete {file_path}. Reason: {e}")
+    else:
+        os.makedirs(folder_path)
+
+
+def _umi_save_image(img_array: np.array, i: int, folder_path: str):
+    import os
+
+    """
+    Saves a single image to the specified folder.
+
+    Args:
+    img_array (ndarray): The numpy array of the image.
+    i (int): Index of the image, used for naming.
+    folder_path (str): Path to the folder where the image will be saved.
+    """
+    img = PILImage.fromarray(img_array)
+    img_format = "PNG" if img_array.dtype == np.uint8 else "JPEG"
+    img.save(os.path.join(folder_path, f"{i}.{img_format.lower()}"), quality=100)
+
+
+def _umi_save_images_concurrently(zarr_data: dict, folder_path: str, max_workers: int = 4):
+    from concurrent.futures import ThreadPoolExecutor
+
+    """
+    Saves images from the zarr_data to the specified folder using multithreading.
+
+    Args:
+    zarr_data (dict): A dictionary containing image data in an array format.
+    folder_path (str): Path to the folder where images will be saved.
+    max_workers (int): The maximum number of threads to use for saving images.
+    """
+    num_images = len(zarr_data["data/camera0_rgb"])
+    _umi_clear_folder(folder_path)  # Clear or create folder first
+
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        [
+            executor.submit(_umi_save_image, zarr_data["data/camera0_rgb"][i], i, folder_path)
+            for i in range(num_images)
+        ]
+
+
 if __name__ == "__main__":
    root = "data"
    revision = "v1.1"
-
    dataset_ids = [
        "pusht",
        "xarm_lift_medium",
@@ -545,6 +773,7 @@ if __name__ == "__main__":
        "aloha_sim_insertion_scripted",
        "aloha_sim_transfer_cube_human",
        "aloha_sim_transfer_cube_scripted",
+        "umi_cup_in_the_wild",
    ]
    for dataset_id in dataset_ids:
        download_and_upload(root, revision, dataset_id)