Add UMI-gripper dataset (#83)

Co-authored-by: Remi <re.cadene@gmail.com>
This commit is contained in:
Adil Zouitine
2024-04-28 18:41:07 +02:00
committed by GitHub
parent a4b6c5e3b1
commit 81e490d46f
11 changed files with 706 additions and 29 deletions

View File

@@ -30,10 +30,31 @@ def download_and_upload(root, revision, dataset_id):
download_and_upload_xarm(root, revision, dataset_id)
elif "aloha" in dataset_id:
download_and_upload_aloha(root, revision, dataset_id)
elif "umi" in dataset_id:
download_and_upload_umi(root, revision, dataset_id)
else:
raise ValueError(dataset_id)
def concatenate_episodes(ep_dicts):
data_dict = {}
keys = ep_dicts[0].keys()
for key in keys:
if torch.is_tensor(ep_dicts[0][key][0]):
data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
else:
if key not in data_dict:
data_dict[key] = []
for ep_dict in ep_dicts:
for x in ep_dict[key]:
data_dict[key].append(x)
total_frames = data_dict["frame_index"].shape[0]
data_dict["index"] = torch.arange(0, total_frames, 1)
return data_dict
def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
import zipfile
@@ -62,25 +83,6 @@ def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
return False
def concatenate_episodes(ep_dicts):
data_dict = {}
keys = ep_dicts[0].keys()
for key in keys:
if torch.is_tensor(ep_dicts[0][key][0]):
data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
else:
if key not in data_dict:
data_dict[key] = []
for ep_dict in ep_dicts:
for x in ep_dict[key]:
data_dict[key].append(x)
total_frames = data_dict["frame_index"].shape[0]
data_dict["index"] = torch.arange(0, total_frames, 1)
return data_dict
def push_to_hub(hf_dataset, episode_data_index, info, stats, root, revision, dataset_id):
# push to main to indicate latest version
hf_dataset.push_to_hub(f"lerobot/{dataset_id}", token=True)
@@ -515,9 +517,9 @@ def download_and_upload_aloha(root, revision, dataset_id, fps=50):
"episode_index": Value(dtype="int64", id=None),
"frame_index": Value(dtype="int64", id=None),
"timestamp": Value(dtype="float32", id=None),
#'next.reward': Value(dtype='float32', id=None),
# "next.reward": Value(dtype="float32", id=None),
"next.done": Value(dtype="bool", id=None),
#'next.success': Value(dtype='bool', id=None),
# "next.success": Value(dtype="bool", id=None),
"index": Value(dtype="int64", id=None),
}
features = Features(features)
@@ -531,10 +533,236 @@ def download_and_upload_aloha(root, revision, dataset_id, fps=50):
push_to_hub(hf_dataset, episode_data_index, info, stats, root, revision, dataset_id)
def download_and_upload_umi(root, revision, dataset_id, fps=10):
# fps is equal to 10 source:https://arxiv.org/pdf/2402.10329.pdf#table.caption.16
import os
import re
import shutil
from glob import glob
import numpy as np
import torch
import tqdm
import zarr
from datasets import Dataset, Features, Image, Sequence, Value
from lerobot.common.datasets._umi_imagecodecs_numcodecs import register_codecs
# NOTE: This is critical otherwise ValueError: codec not available: 'imagecodecs_jpegxl'
# will be raised
register_codecs()
url_cup_in_the_wild = "https://real.stanford.edu/umi/data/zarr_datasets/cup_in_the_wild.zarr.zip"
cup_in_the_wild_zarr = Path("umi/cup_in_the_wild/cup_in_the_wild.zarr")
root = Path(root)
raw_dir = root / f"{dataset_id}_raw"
zarr_path = (raw_dir / cup_in_the_wild_zarr).resolve()
if not zarr_path.is_dir():
raw_dir.mkdir(parents=True, exist_ok=True)
download_and_extract_zip(url_cup_in_the_wild, zarr_path)
zarr_data = zarr.open(zarr_path, mode="r")
# We process the image data separately because it is too large to fit in memory
end_pose = torch.from_numpy(zarr_data["data/robot0_demo_end_pose"][:])
start_pos = torch.from_numpy(zarr_data["data/robot0_demo_start_pose"][:])
eff_pos = torch.from_numpy(zarr_data["data/robot0_eef_pos"][:])
eff_rot_axis_angle = torch.from_numpy(zarr_data["data/robot0_eef_rot_axis_angle"][:])
gripper_width = torch.from_numpy(zarr_data["data/robot0_gripper_width"][:])
states_pos = torch.cat([eff_pos, eff_rot_axis_angle], dim=1)
states = torch.cat([states_pos, gripper_width], dim=1)
def get_episode_idxs(episode_ends: np.ndarray) -> np.ndarray:
# Optimized and simplified version of this function: https://github.com/real-stanford/universal_manipulation_interface/blob/298776ce251f33b6b3185a98d6e7d1f9ad49168b/diffusion_policy/common/replay_buffer.py#L374
from numba import jit
@jit(nopython=True)
def _get_episode_idxs(episode_ends):
result = np.zeros((episode_ends[-1],), dtype=np.int64)
start_idx = 0
for episode_number, end_idx in enumerate(episode_ends):
result[start_idx:end_idx] = episode_number
start_idx = end_idx
return result
return _get_episode_idxs(episode_ends)
episode_ends = zarr_data["meta/episode_ends"][:]
num_episodes: int = episode_ends.shape[0]
episode_ids = torch.from_numpy(get_episode_idxs(episode_ends))
# We convert it in torch tensor later because the jit function does not support torch tensors
episode_ends = torch.from_numpy(episode_ends)
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
for episode_id in tqdm.tqdm(range(num_episodes)):
id_to = episode_ends[episode_id]
num_frames = id_to - id_from
assert (
episode_ids[id_from:id_to] == episode_id
).all(), f"episode_ids[{id_from}:{id_to}] != {episode_id}"
state = states[id_from:id_to]
ep_dict = {
# observation.image will be filled later
"observation.state": state,
"episode_index": torch.tensor([episode_id] * num_frames, dtype=torch.int),
"frame_index": torch.arange(0, num_frames, 1),
"timestamp": torch.arange(0, num_frames, 1) / fps,
"episode_data_index_from": torch.tensor([id_from] * num_frames),
"episode_data_index_to": torch.tensor([id_from + num_frames] * num_frames),
"end_pose": end_pose[id_from:id_to],
"start_pos": start_pos[id_from:id_to],
"gripper_width": gripper_width[id_from:id_to],
}
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from += num_frames
data_dict = concatenate_episodes(ep_dicts)
total_frames = id_from
data_dict["index"] = torch.arange(0, total_frames, 1)
print("Saving images to disk in temporary folder...")
# datasets.Image() can take a list of paths to images, so we save the images to a temporary folder
# to avoid loading them all in memory
_umi_save_images_concurrently(zarr_data, "tmp_umi_images", max_workers=12)
print("Saving images to disk in temporary folder... Done")
# Sort files by number eg. 1.png, 2.png, 3.png, 9.png, 10.png instead of 1.png, 10.png, 2.png, 3.png, 9.png
# to correctly match the images with the data
images_path = sorted(glob("tmp_umi_images/*"), key=lambda x: int(re.search(r"(\d+)\.png$", x).group(1)))
data_dict["observation.image"] = images_path
features = {
"observation.image": Image(),
"observation.state": Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
),
"episode_index": Value(dtype="int64", id=None),
"frame_index": Value(dtype="int64", id=None),
"timestamp": Value(dtype="float32", id=None),
"index": Value(dtype="int64", id=None),
"episode_data_index_from": Value(dtype="int64", id=None),
"episode_data_index_to": Value(dtype="int64", id=None),
# `start_pos` and `end_pos` respectively represent the positions of the end-effector
# at the beginning and the end of the episode.
# `gripper_width` indicates the distance between the grippers, and this value is included
# in the state vector, which comprises the concatenation of the end-effector position
# and gripper width.
"end_pose": Sequence(length=data_dict["end_pose"].shape[1], feature=Value(dtype="float32", id=None)),
"start_pos": Sequence(
length=data_dict["start_pos"].shape[1], feature=Value(dtype="float32", id=None)
),
"gripper_width": Sequence(
length=data_dict["gripper_width"].shape[1], feature=Value(dtype="float32", id=None)
),
}
features = Features(features)
hf_dataset = Dataset.from_dict(data_dict, features=features)
hf_dataset.set_transform(hf_transform_to_torch)
info = {
"fps": fps,
}
stats = compute_stats(hf_dataset)
push_to_hub(
hf_dataset=hf_dataset,
episode_data_index=episode_data_index,
info=info,
stats=stats,
root=root,
revision=revision,
dataset_id=dataset_id,
)
# Cleanup
if os.path.exists("tmp_umi_images"):
print("Removing temporary images folder")
shutil.rmtree("tmp_umi_images")
print("Cleanup done")
def _umi_clear_folder(folder_path: str):
import os
"""
Clears all the content of the specified folder. Creates the folder if it does not exist.
Args:
folder_path (str): Path to the folder to clear.
Examples:
>>> import os
>>> os.makedirs('example_folder', exist_ok=True)
>>> with open('example_folder/temp_file.txt', 'w') as f:
... f.write('example')
>>> clear_folder('example_folder')
>>> os.listdir('example_folder')
[]
"""
if os.path.exists(folder_path):
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print(f"Failed to delete {file_path}. Reason: {e}")
else:
os.makedirs(folder_path)
def _umi_save_image(img_array: np.array, i: int, folder_path: str):
import os
"""
Saves a single image to the specified folder.
Args:
img_array (ndarray): The numpy array of the image.
i (int): Index of the image, used for naming.
folder_path (str): Path to the folder where the image will be saved.
"""
img = PILImage.fromarray(img_array)
img_format = "PNG" if img_array.dtype == np.uint8 else "JPEG"
img.save(os.path.join(folder_path, f"{i}.{img_format.lower()}"), quality=100)
def _umi_save_images_concurrently(zarr_data: dict, folder_path: str, max_workers: int = 4):
from concurrent.futures import ThreadPoolExecutor
"""
Saves images from the zarr_data to the specified folder using multithreading.
Args:
zarr_data (dict): A dictionary containing image data in an array format.
folder_path (str): Path to the folder where images will be saved.
max_workers (int): The maximum number of threads to use for saving images.
"""
num_images = len(zarr_data["data/camera0_rgb"])
_umi_clear_folder(folder_path) # Clear or create folder first
with ThreadPoolExecutor(max_workers=max_workers) as executor:
[
executor.submit(_umi_save_image, zarr_data["data/camera0_rgb"][i], i, folder_path)
for i in range(num_images)
]
if __name__ == "__main__":
root = "data"
revision = "v1.1"
dataset_ids = [
"pusht",
"xarm_lift_medium",
@@ -545,6 +773,7 @@ if __name__ == "__main__":
"aloha_sim_insertion_scripted",
"aloha_sim_transfer_cube_human",
"aloha_sim_transfer_cube_scripted",
"umi_cup_in_the_wild",
]
for dataset_id in dataset_ids:
download_and_upload(root, revision, dataset_id)