forked from tangger/lerobot
Add UMI-gripper dataset (#83)
Co-authored-by: Remi <re.cadene@gmail.com>
This commit is contained in:
@@ -30,10 +30,31 @@ def download_and_upload(root, revision, dataset_id):
|
||||
download_and_upload_xarm(root, revision, dataset_id)
|
||||
elif "aloha" in dataset_id:
|
||||
download_and_upload_aloha(root, revision, dataset_id)
|
||||
elif "umi" in dataset_id:
|
||||
download_and_upload_umi(root, revision, dataset_id)
|
||||
else:
|
||||
raise ValueError(dataset_id)
|
||||
|
||||
|
||||
def concatenate_episodes(ep_dicts):
|
||||
data_dict = {}
|
||||
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
if torch.is_tensor(ep_dicts[0][key][0]):
|
||||
data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
|
||||
else:
|
||||
if key not in data_dict:
|
||||
data_dict[key] = []
|
||||
for ep_dict in ep_dicts:
|
||||
for x in ep_dict[key]:
|
||||
data_dict[key].append(x)
|
||||
|
||||
total_frames = data_dict["frame_index"].shape[0]
|
||||
data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
return data_dict
|
||||
|
||||
|
||||
def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
|
||||
import zipfile
|
||||
|
||||
@@ -62,25 +83,6 @@ def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def concatenate_episodes(ep_dicts):
|
||||
data_dict = {}
|
||||
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
if torch.is_tensor(ep_dicts[0][key][0]):
|
||||
data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
|
||||
else:
|
||||
if key not in data_dict:
|
||||
data_dict[key] = []
|
||||
for ep_dict in ep_dicts:
|
||||
for x in ep_dict[key]:
|
||||
data_dict[key].append(x)
|
||||
|
||||
total_frames = data_dict["frame_index"].shape[0]
|
||||
data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
return data_dict
|
||||
|
||||
|
||||
def push_to_hub(hf_dataset, episode_data_index, info, stats, root, revision, dataset_id):
|
||||
# push to main to indicate latest version
|
||||
hf_dataset.push_to_hub(f"lerobot/{dataset_id}", token=True)
|
||||
@@ -515,9 +517,9 @@ def download_and_upload_aloha(root, revision, dataset_id, fps=50):
|
||||
"episode_index": Value(dtype="int64", id=None),
|
||||
"frame_index": Value(dtype="int64", id=None),
|
||||
"timestamp": Value(dtype="float32", id=None),
|
||||
#'next.reward': Value(dtype='float32', id=None),
|
||||
# "next.reward": Value(dtype="float32", id=None),
|
||||
"next.done": Value(dtype="bool", id=None),
|
||||
#'next.success': Value(dtype='bool', id=None),
|
||||
# "next.success": Value(dtype="bool", id=None),
|
||||
"index": Value(dtype="int64", id=None),
|
||||
}
|
||||
features = Features(features)
|
||||
@@ -531,10 +533,236 @@ def download_and_upload_aloha(root, revision, dataset_id, fps=50):
|
||||
push_to_hub(hf_dataset, episode_data_index, info, stats, root, revision, dataset_id)
|
||||
|
||||
|
||||
def download_and_upload_umi(root, revision, dataset_id, fps=10):
|
||||
# fps is equal to 10 source:https://arxiv.org/pdf/2402.10329.pdf#table.caption.16
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from glob import glob
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import tqdm
|
||||
import zarr
|
||||
from datasets import Dataset, Features, Image, Sequence, Value
|
||||
|
||||
from lerobot.common.datasets._umi_imagecodecs_numcodecs import register_codecs
|
||||
|
||||
# NOTE: This is critical otherwise ValueError: codec not available: 'imagecodecs_jpegxl'
|
||||
# will be raised
|
||||
register_codecs()
|
||||
|
||||
url_cup_in_the_wild = "https://real.stanford.edu/umi/data/zarr_datasets/cup_in_the_wild.zarr.zip"
|
||||
cup_in_the_wild_zarr = Path("umi/cup_in_the_wild/cup_in_the_wild.zarr")
|
||||
|
||||
root = Path(root)
|
||||
raw_dir = root / f"{dataset_id}_raw"
|
||||
zarr_path = (raw_dir / cup_in_the_wild_zarr).resolve()
|
||||
if not zarr_path.is_dir():
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
download_and_extract_zip(url_cup_in_the_wild, zarr_path)
|
||||
zarr_data = zarr.open(zarr_path, mode="r")
|
||||
|
||||
# We process the image data separately because it is too large to fit in memory
|
||||
end_pose = torch.from_numpy(zarr_data["data/robot0_demo_end_pose"][:])
|
||||
start_pos = torch.from_numpy(zarr_data["data/robot0_demo_start_pose"][:])
|
||||
eff_pos = torch.from_numpy(zarr_data["data/robot0_eef_pos"][:])
|
||||
eff_rot_axis_angle = torch.from_numpy(zarr_data["data/robot0_eef_rot_axis_angle"][:])
|
||||
gripper_width = torch.from_numpy(zarr_data["data/robot0_gripper_width"][:])
|
||||
|
||||
states_pos = torch.cat([eff_pos, eff_rot_axis_angle], dim=1)
|
||||
states = torch.cat([states_pos, gripper_width], dim=1)
|
||||
|
||||
def get_episode_idxs(episode_ends: np.ndarray) -> np.ndarray:
|
||||
# Optimized and simplified version of this function: https://github.com/real-stanford/universal_manipulation_interface/blob/298776ce251f33b6b3185a98d6e7d1f9ad49168b/diffusion_policy/common/replay_buffer.py#L374
|
||||
from numba import jit
|
||||
|
||||
@jit(nopython=True)
|
||||
def _get_episode_idxs(episode_ends):
|
||||
result = np.zeros((episode_ends[-1],), dtype=np.int64)
|
||||
start_idx = 0
|
||||
for episode_number, end_idx in enumerate(episode_ends):
|
||||
result[start_idx:end_idx] = episode_number
|
||||
start_idx = end_idx
|
||||
return result
|
||||
|
||||
return _get_episode_idxs(episode_ends)
|
||||
|
||||
episode_ends = zarr_data["meta/episode_ends"][:]
|
||||
num_episodes: int = episode_ends.shape[0]
|
||||
|
||||
episode_ids = torch.from_numpy(get_episode_idxs(episode_ends))
|
||||
|
||||
# We convert it in torch tensor later because the jit function does not support torch tensors
|
||||
episode_ends = torch.from_numpy(episode_ends)
|
||||
|
||||
ep_dicts = []
|
||||
episode_data_index = {"from": [], "to": []}
|
||||
id_from = 0
|
||||
|
||||
for episode_id in tqdm.tqdm(range(num_episodes)):
|
||||
id_to = episode_ends[episode_id]
|
||||
|
||||
num_frames = id_to - id_from
|
||||
|
||||
assert (
|
||||
episode_ids[id_from:id_to] == episode_id
|
||||
).all(), f"episode_ids[{id_from}:{id_to}] != {episode_id}"
|
||||
|
||||
state = states[id_from:id_to]
|
||||
ep_dict = {
|
||||
# observation.image will be filled later
|
||||
"observation.state": state,
|
||||
"episode_index": torch.tensor([episode_id] * num_frames, dtype=torch.int),
|
||||
"frame_index": torch.arange(0, num_frames, 1),
|
||||
"timestamp": torch.arange(0, num_frames, 1) / fps,
|
||||
"episode_data_index_from": torch.tensor([id_from] * num_frames),
|
||||
"episode_data_index_to": torch.tensor([id_from + num_frames] * num_frames),
|
||||
"end_pose": end_pose[id_from:id_to],
|
||||
"start_pos": start_pos[id_from:id_to],
|
||||
"gripper_width": gripper_width[id_from:id_to],
|
||||
}
|
||||
ep_dicts.append(ep_dict)
|
||||
episode_data_index["from"].append(id_from)
|
||||
episode_data_index["to"].append(id_from + num_frames)
|
||||
id_from += num_frames
|
||||
|
||||
data_dict = concatenate_episodes(ep_dicts)
|
||||
|
||||
total_frames = id_from
|
||||
data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
||||
print("Saving images to disk in temporary folder...")
|
||||
# datasets.Image() can take a list of paths to images, so we save the images to a temporary folder
|
||||
# to avoid loading them all in memory
|
||||
_umi_save_images_concurrently(zarr_data, "tmp_umi_images", max_workers=12)
|
||||
print("Saving images to disk in temporary folder... Done")
|
||||
|
||||
# Sort files by number eg. 1.png, 2.png, 3.png, 9.png, 10.png instead of 1.png, 10.png, 2.png, 3.png, 9.png
|
||||
# to correctly match the images with the data
|
||||
images_path = sorted(glob("tmp_umi_images/*"), key=lambda x: int(re.search(r"(\d+)\.png$", x).group(1)))
|
||||
data_dict["observation.image"] = images_path
|
||||
|
||||
features = {
|
||||
"observation.image": Image(),
|
||||
"observation.state": Sequence(
|
||||
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
),
|
||||
"episode_index": Value(dtype="int64", id=None),
|
||||
"frame_index": Value(dtype="int64", id=None),
|
||||
"timestamp": Value(dtype="float32", id=None),
|
||||
"index": Value(dtype="int64", id=None),
|
||||
"episode_data_index_from": Value(dtype="int64", id=None),
|
||||
"episode_data_index_to": Value(dtype="int64", id=None),
|
||||
# `start_pos` and `end_pos` respectively represent the positions of the end-effector
|
||||
# at the beginning and the end of the episode.
|
||||
# `gripper_width` indicates the distance between the grippers, and this value is included
|
||||
# in the state vector, which comprises the concatenation of the end-effector position
|
||||
# and gripper width.
|
||||
"end_pose": Sequence(length=data_dict["end_pose"].shape[1], feature=Value(dtype="float32", id=None)),
|
||||
"start_pos": Sequence(
|
||||
length=data_dict["start_pos"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
),
|
||||
"gripper_width": Sequence(
|
||||
length=data_dict["gripper_width"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
),
|
||||
}
|
||||
features = Features(features)
|
||||
hf_dataset = Dataset.from_dict(data_dict, features=features)
|
||||
hf_dataset.set_transform(hf_transform_to_torch)
|
||||
|
||||
info = {
|
||||
"fps": fps,
|
||||
}
|
||||
stats = compute_stats(hf_dataset)
|
||||
push_to_hub(
|
||||
hf_dataset=hf_dataset,
|
||||
episode_data_index=episode_data_index,
|
||||
info=info,
|
||||
stats=stats,
|
||||
root=root,
|
||||
revision=revision,
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
# Cleanup
|
||||
if os.path.exists("tmp_umi_images"):
|
||||
print("Removing temporary images folder")
|
||||
shutil.rmtree("tmp_umi_images")
|
||||
print("Cleanup done")
|
||||
|
||||
|
||||
def _umi_clear_folder(folder_path: str):
|
||||
import os
|
||||
|
||||
"""
|
||||
Clears all the content of the specified folder. Creates the folder if it does not exist.
|
||||
|
||||
Args:
|
||||
folder_path (str): Path to the folder to clear.
|
||||
|
||||
Examples:
|
||||
>>> import os
|
||||
>>> os.makedirs('example_folder', exist_ok=True)
|
||||
>>> with open('example_folder/temp_file.txt', 'w') as f:
|
||||
... f.write('example')
|
||||
>>> clear_folder('example_folder')
|
||||
>>> os.listdir('example_folder')
|
||||
[]
|
||||
"""
|
||||
if os.path.exists(folder_path):
|
||||
for filename in os.listdir(folder_path):
|
||||
file_path = os.path.join(folder_path, filename)
|
||||
try:
|
||||
if os.path.isfile(file_path) or os.path.islink(file_path):
|
||||
os.unlink(file_path)
|
||||
elif os.path.isdir(file_path):
|
||||
shutil.rmtree(file_path)
|
||||
except Exception as e:
|
||||
print(f"Failed to delete {file_path}. Reason: {e}")
|
||||
else:
|
||||
os.makedirs(folder_path)
|
||||
|
||||
|
||||
def _umi_save_image(img_array: np.array, i: int, folder_path: str):
|
||||
import os
|
||||
|
||||
"""
|
||||
Saves a single image to the specified folder.
|
||||
|
||||
Args:
|
||||
img_array (ndarray): The numpy array of the image.
|
||||
i (int): Index of the image, used for naming.
|
||||
folder_path (str): Path to the folder where the image will be saved.
|
||||
"""
|
||||
img = PILImage.fromarray(img_array)
|
||||
img_format = "PNG" if img_array.dtype == np.uint8 else "JPEG"
|
||||
img.save(os.path.join(folder_path, f"{i}.{img_format.lower()}"), quality=100)
|
||||
|
||||
|
||||
def _umi_save_images_concurrently(zarr_data: dict, folder_path: str, max_workers: int = 4):
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
"""
|
||||
Saves images from the zarr_data to the specified folder using multithreading.
|
||||
|
||||
Args:
|
||||
zarr_data (dict): A dictionary containing image data in an array format.
|
||||
folder_path (str): Path to the folder where images will be saved.
|
||||
max_workers (int): The maximum number of threads to use for saving images.
|
||||
"""
|
||||
num_images = len(zarr_data["data/camera0_rgb"])
|
||||
_umi_clear_folder(folder_path) # Clear or create folder first
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
[
|
||||
executor.submit(_umi_save_image, zarr_data["data/camera0_rgb"][i], i, folder_path)
|
||||
for i in range(num_images)
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
root = "data"
|
||||
revision = "v1.1"
|
||||
|
||||
dataset_ids = [
|
||||
"pusht",
|
||||
"xarm_lift_medium",
|
||||
@@ -545,6 +773,7 @@ if __name__ == "__main__":
|
||||
"aloha_sim_insertion_scripted",
|
||||
"aloha_sim_transfer_cube_human",
|
||||
"aloha_sim_transfer_cube_scripted",
|
||||
"umi_cup_in_the_wild",
|
||||
]
|
||||
for dataset_id in dataset_ids:
|
||||
download_and_upload(root, revision, dataset_id)
|
||||
|
||||
Reference in New Issue
Block a user