Refactor push_dataset_to_hub (#118)

This commit is contained in:
Remi
2024-04-30 14:25:41 +02:00
committed by GitHub
parent 2765877f28
commit e4e739f4f8
25 changed files with 1089 additions and 1192 deletions

View File

@@ -4,20 +4,22 @@ useless dependencies when using datasets.
"""
import io
import logging
import shutil
from pathlib import Path
import tqdm
def download_raw(root, dataset_id) -> Path:
def download_raw(raw_dir, dataset_id):
if "pusht" in dataset_id:
return download_pusht(root=root, dataset_id=dataset_id)
download_pusht(raw_dir)
elif "xarm" in dataset_id:
return download_xarm(root=root, dataset_id=dataset_id)
download_xarm(raw_dir)
elif "aloha" in dataset_id:
return download_aloha(root=root, dataset_id=dataset_id)
download_aloha(raw_dir, dataset_id)
elif "umi" in dataset_id:
return download_umi(root=root, dataset_id=dataset_id)
download_umi(raw_dir)
else:
raise ValueError(dataset_id)
@@ -45,50 +47,53 @@ def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
with zipfile.ZipFile(zip_file, "r") as zip_ref:
zip_ref.extractall(destination_folder)
return True
else:
return False
def download_pusht(root: str, dataset_id: str = "pusht", fps: int = 10) -> Path:
def download_pusht(raw_dir: str):
pusht_url = "https://diffusion-policy.cs.columbia.edu/data/training/pusht.zip"
pusht_zarr = Path("pusht/pusht_cchi_v7_replay.zarr")
root = Path(root)
raw_dir: Path = root / f"{dataset_id}_raw"
zarr_path: Path = (raw_dir / pusht_zarr).resolve()
if not zarr_path.is_dir():
raw_dir.mkdir(parents=True, exist_ok=True)
download_and_extract_zip(pusht_url, raw_dir)
return zarr_path
raw_dir = Path(raw_dir)
raw_dir.mkdir(parents=True, exist_ok=True)
download_and_extract_zip(pusht_url, raw_dir)
# file is created inside a useful "pusht" directory, so we move it out and delete the dir
zarr_path = raw_dir / "pusht_cchi_v7_replay.zarr"
shutil.move(raw_dir / "pusht" / "pusht_cchi_v7_replay.zarr", zarr_path)
shutil.rmtree(raw_dir / "pusht")
def download_xarm(root: str, dataset_id: str, fps: int = 15) -> Path:
root = Path(root)
raw_dir: Path = root / "xarm_datasets_raw"
if not raw_dir.exists():
import zipfile
def download_xarm(raw_dir: Path):
"""Download all xarm datasets at once"""
import zipfile
import gdown
import gdown
raw_dir.mkdir(parents=True, exist_ok=True)
# from https://github.com/fyhMer/fowm/blob/main/scripts/download_datasets.py
url = "https://drive.google.com/uc?id=1nhxpykGtPDhmQKm-_B8zBSywVRdgeVya"
zip_path = raw_dir / "data.zip"
gdown.download(url, str(zip_path), quiet=False)
print("Extracting...")
with zipfile.ZipFile(str(zip_path), "r") as zip_f:
for member in zip_f.namelist():
if member.startswith("data/xarm") and member.endswith(".pkl"):
print(member)
zip_f.extract(member=member)
zip_path.unlink()
dataset_path: Path = root / f"{dataset_id}"
return dataset_path
raw_dir = Path(raw_dir)
raw_dir.mkdir(parents=True, exist_ok=True)
# from https://github.com/fyhMer/fowm/blob/main/scripts/download_datasets.py
url = "https://drive.google.com/uc?id=1nhxpykGtPDhmQKm-_B8zBSywVRdgeVya"
zip_path = raw_dir / "data.zip"
gdown.download(url, str(zip_path), quiet=False)
print("Extracting...")
with zipfile.ZipFile(str(zip_path), "r") as zip_f:
for pkl_path in zip_f.namelist():
if pkl_path.startswith("data/xarm") and pkl_path.endswith(".pkl"):
zip_f.extract(member=pkl_path)
# move to corresponding raw directory
extract_dir = pkl_path.replace("/buffer.pkl", "")
raw_pkl_path = raw_dir / "buffer.pkl"
shutil.move(pkl_path, raw_pkl_path)
shutil.rmtree(extract_dir)
zip_path.unlink()
def download_aloha(root: str, dataset_id: str) -> Path:
def download_aloha(raw_dir: Path, dataset_id: str):
# TODO(rcadene): remove gdown and use hugging face download instead
import gdown
logging.warning(
"Aloha download is broken and requires a custom version of gdown which is not limited on number of files"
)
folder_urls = {
"aloha_sim_insertion_human": "https://drive.google.com/drive/folders/1RgyD0JgTX30H4IM5XZn8I3zSV_mr8pyF",
"aloha_sim_insertion_scripted": "https://drive.google.com/drive/folders/1TsojQQSXtHEoGnqgJ3gmpPQR2DPLtS2N",
@@ -129,40 +134,32 @@ def download_aloha(root: str, dataset_id: str) -> Path:
"aloha_sim_transfer_cube_human": ["top"],
"aloha_sim_transfer_cube_scripted": ["top"],
}
root = Path(root)
raw_dir: Path = root / f"{dataset_id}_raw"
if not raw_dir.is_dir():
import gdown
assert dataset_id in folder_urls
assert dataset_id in ep48_urls
assert dataset_id in ep49_urls
assert dataset_id in folder_urls
assert dataset_id in ep48_urls
assert dataset_id in ep49_urls
raw_dir.mkdir(parents=True, exist_ok=True)
raw_dir = Path(raw_dir)
raw_dir.mkdir(parents=True, exist_ok=True)
gdown.download_folder(folder_urls[dataset_id], output=str(raw_dir))
gdown.download_folder(folder_urls[dataset_id], output=str(raw_dir))
# because of the 50 files limit per directory, two files episode 48 and 49 were missing
gdown.download(ep48_urls[dataset_id], output=str(raw_dir / "episode_48.hdf5"), fuzzy=True)
gdown.download(ep49_urls[dataset_id], output=str(raw_dir / "episode_49.hdf5"), fuzzy=True)
return raw_dir
# because of the 50 files limit per directory, two files episode 48 and 49 were missing
gdown.download(ep48_urls[dataset_id], output=str(raw_dir / "episode_48.hdf5"), fuzzy=True)
gdown.download(ep49_urls[dataset_id], output=str(raw_dir / "episode_49.hdf5"), fuzzy=True)
def download_umi(root: str, dataset_id: str) -> Path:
def download_umi(raw_dir: Path):
url_cup_in_the_wild = "https://real.stanford.edu/umi/data/zarr_datasets/cup_in_the_wild.zarr.zip"
cup_in_the_wild_zarr = Path("umi/cup_in_the_wild/cup_in_the_wild.zarr")
zarr_path = raw_dir / "cup_in_the_wild.zarr"
root = Path(root)
raw_dir: Path = root / f"{dataset_id}_raw"
zarr_path: Path = (raw_dir / cup_in_the_wild_zarr).resolve()
if not zarr_path.is_dir():
raw_dir.mkdir(parents=True, exist_ok=True)
download_and_extract_zip(url_cup_in_the_wild, zarr_path)
return zarr_path
raw_dir = Path(raw_dir)
raw_dir.mkdir(parents=True, exist_ok=True)
download_and_extract_zip(url_cup_in_the_wild, zarr_path)
if __name__ == "__main__":
root = "data"
data_dir = Path("data")
dataset_ids = [
"pusht",
"xarm_lift_medium",
@@ -176,4 +173,5 @@ if __name__ == "__main__":
"umi_cup_in_the_wild",
]
for dataset_id in dataset_ids:
download_raw(root=root, dataset_id=dataset_id)
raw_dir = data_dir / f"{dataset_id}_raw"
download_raw(raw_dir, dataset_id)

View File

@@ -0,0 +1,163 @@
"""
Contains utilities to process raw data format of HDF5 files like in: https://github.com/tonyzhaozh/act
"""
import re
import shutil
from pathlib import Path
import h5py
import torch
import tqdm
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.utils import (
hf_transform_to_torch,
)
# TODO(rcadene): enable for PR video dataset
# from lerobot.common.datasets.video_utils import encode_video_frames
def check_format(raw_dir) -> bool:
cameras = ["top"]
hdf5_files: list[Path] = list(raw_dir.glob("episode_*.hdf5"))
assert len(hdf5_files) != 0
hdf5_files = sorted(hdf5_files, key=lambda x: int(re.search(r"episode_(\d+).hdf5", x.name).group(1)))
# Check if the sequence is consecutive eg episode_0, episode_1, episode_2, etc.
previous_number = None
for file in hdf5_files:
current_number = int(re.search(r"episode_(\d+).hdf5", file.name).group(1))
if previous_number is not None:
assert current_number - previous_number == 1
previous_number = current_number
for file in hdf5_files:
with h5py.File(file, "r") as file:
# Check for the expected datasets within the HDF5 file
required_datasets = ["/action", "/observations/qpos"]
# Add camera-specific image datasets to the required datasets
camera_datasets = [f"/observations/images/{cam}" for cam in cameras]
required_datasets.extend(camera_datasets)
assert all(dataset in file for dataset in required_datasets)
def load_from_raw(raw_dir, out_dir, fps, video, debug):
hdf5_files = list(raw_dir.glob("*.hdf5"))
hdf5_files = sorted(hdf5_files, key=lambda x: int(re.search(r"episode_(\d+)", x.name).group(1)))
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
for ep_path in tqdm.tqdm(hdf5_files):
with h5py.File(ep_path, "r") as ep:
ep_idx = int(re.search(r"episode_(\d+)", ep_path.name).group(1))
num_frames = ep["/action"].shape[0]
# last step of demonstration is considered done
done = torch.zeros(num_frames, dtype=torch.bool)
done[-1] = True
state = torch.from_numpy(ep["/observations/qpos"][:])
action = torch.from_numpy(ep["/action"][:])
ep_dict = {}
cameras = list(ep["/observations/images"].keys())
for cam in cameras:
img_key = f"observation.images.{cam}"
imgs_array = ep[f"/observations/images/{cam}"][:] # b h w c
if video:
# save png images in temporary directory
tmp_imgs_dir = out_dir / "tmp_images"
save_images_concurrently(imgs_array, tmp_imgs_dir)
# encode images to a mp4 video
video_path = out_dir / "videos" / f"{img_key}_episode_{ep_idx:06d}.mp4"
encode_video_frames(tmp_imgs_dir, video_path, fps) # noqa: F821
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
# store the episode idx
ep_dict[img_key] = torch.tensor([ep_idx] * num_frames, dtype=torch.int)
else:
ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array]
ep_dict["observation.state"] = state
ep_dict["action"] = action
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames)
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
ep_dict["next.done"] = done
# TODO(rcadene): compute reward and success
# ep_dict[""next.reward"] = reward
# ep_dict[""next.success"] = success
assert isinstance(ep_idx, int)
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from += num_frames
# process first episode only
if debug:
break
data_dict = concatenate_episodes(ep_dicts)
return data_dict, episode_data_index
def to_hf_dataset(data_dict, video) -> Dataset:
features = {}
image_keys = [key for key in data_dict if "observation.images." in key]
for image_key in image_keys:
if video:
features[image_key] = Value(dtype="int64", id="video")
else:
features[image_key] = Image()
features["observation.state"] = Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
)
features["action"] = Sequence(
length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)
)
features["episode_index"] = Value(dtype="int64", id=None)
features["frame_index"] = Value(dtype="int64", id=None)
features["timestamp"] = Value(dtype="float32", id=None)
features["next.done"] = Value(dtype="bool", id=None)
features["index"] = Value(dtype="int64", id=None)
# TODO(rcadene): add reward and success
# features["next.reward"] = Value(dtype="float32", id=None)
# features["next.success"] = Value(dtype="bool", id=None)
hf_dataset = Dataset.from_dict(data_dict, features=Features(features))
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def from_raw_to_lerobot_format(raw_dir: Path, out_dir: Path, fps=None, video=True, debug=False):
# sanity check
check_format(raw_dir)
if fps is None:
fps = 50
data_dir, episode_data_index = load_from_raw(raw_dir, out_dir, fps, video, debug)
hf_dataset = to_hf_dataset(data_dir, video)
info = {
"fps": fps,
"video": video,
}
return hf_dataset, episode_data_index, info

View File

@@ -1,199 +0,0 @@
import re
from pathlib import Path
import h5py
import torch
import tqdm
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes
from lerobot.common.datasets.utils import (
hf_transform_to_torch,
)
class AlohaProcessor:
"""
Process HDF5 files formatted like in: https://github.com/tonyzhaozh/act
Attributes:
folder_path (Path): Path to the directory containing HDF5 files.
cameras (list[str]): List of camera identifiers to check in the files.
fps (int): Frames per second used in timestamp calculations.
Methods:
is_valid() -> bool:
Validates if each HDF5 file within the folder contains all required datasets.
preprocess() -> dict:
Processes the files and returns structured data suitable for further analysis.
to_hf_dataset(data_dict: dict) -> Dataset:
Converts processed data into a Hugging Face Dataset object.
"""
def __init__(self, folder_path: Path, cameras: list[str] | None = None, fps: int | None = None):
"""
Initializes the AlohaProcessor with a specified directory path containing HDF5 files,
an optional list of cameras, and a frame rate.
Args:
folder_path (Path): The directory path where HDF5 files are stored.
cameras (list[str] | None): Optional list of cameras to validate within the files. Defaults to ['top'] if None.
fps (int): Frame rate for the datasets, used in time calculations. Default is 50.
Examples:
>>> processor = AlohaProcessor(Path("path_to_hdf5_directory"), ["camera1", "camera2"])
>>> processor.is_valid()
True
"""
self.folder_path = folder_path
if cameras is None:
cameras = ["top"]
self.cameras = cameras
if fps is None:
fps = 50
self._fps = fps
@property
def fps(self) -> int:
return self._fps
def is_valid(self) -> bool:
"""
Validates the HDF5 files in the specified folder to ensure they contain the required datasets
for actions, positions, and images for each specified camera.
Returns:
bool: True if all files are valid HDF5 files with all required datasets, False otherwise.
"""
hdf5_files: list[Path] = list(self.folder_path.glob("episode_*.hdf5"))
if len(hdf5_files) == 0:
return False
try:
hdf5_files = sorted(
hdf5_files, key=lambda x: int(re.search(r"episode_(\d+).hdf5", x.name).group(1))
)
except AttributeError:
# All file names must contain a numerical identifier matching 'episode_(\\d+).hdf5
return False
# Check if the sequence is consecutive eg episode_0, episode_1, episode_2, etc.
# If not, return False
previous_number = None
for file in hdf5_files:
current_number = int(re.search(r"episode_(\d+).hdf5", file.name).group(1))
if previous_number is not None and current_number - previous_number != 1:
return False
previous_number = current_number
for file in hdf5_files:
try:
with h5py.File(file, "r") as file:
# Check for the expected datasets within the HDF5 file
required_datasets = ["/action", "/observations/qpos"]
# Add camera-specific image datasets to the required datasets
camera_datasets = [f"/observations/images/{cam}" for cam in self.cameras]
required_datasets.extend(camera_datasets)
if not all(dataset in file for dataset in required_datasets):
return False
except OSError:
return False
return True
def preprocess(self):
"""
Collects episode data from the HDF5 file and returns it as an AlohaStep named tuple.
Returns:
AlohaStep: Named tuple containing episode data.
Raises:
ValueError: If the file is not valid.
"""
if not self.is_valid():
raise ValueError("The HDF5 file is invalid or does not contain the required datasets.")
hdf5_files = list(self.folder_path.glob("*.hdf5"))
hdf5_files = sorted(hdf5_files, key=lambda x: int(re.search(r"episode_(\d+)", x.name).group(1)))
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
for ep_path in tqdm.tqdm(hdf5_files):
with h5py.File(ep_path, "r") as ep:
ep_id = int(re.search(r"episode_(\d+)", ep_path.name).group(1))
num_frames = ep["/action"].shape[0]
# last step of demonstration is considered done
done = torch.zeros(num_frames, dtype=torch.bool)
done[-1] = True
state = torch.from_numpy(ep["/observations/qpos"][:])
action = torch.from_numpy(ep["/action"][:])
ep_dict = {}
for cam in self.cameras:
image = torch.from_numpy(ep[f"/observations/images/{cam}"][:]) # b h w c
ep_dict[f"observation.images.{cam}"] = [PILImage.fromarray(x.numpy()) for x in image]
ep_dict.update(
{
"observation.state": state,
"action": action,
"episode_index": torch.tensor([ep_id] * num_frames),
"frame_index": torch.arange(0, num_frames, 1),
"timestamp": torch.arange(0, num_frames, 1) / self.fps,
# TODO(rcadene): compute reward and success
# "next.reward": reward,
"next.done": done,
# "next.success": success,
}
)
assert isinstance(ep_id, int)
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from += num_frames
data_dict = concatenate_episodes(ep_dicts)
return data_dict, episode_data_index
def to_hf_dataset(self, data_dict) -> Dataset:
"""
Converts a dictionary of data into a Hugging Face Dataset object.
Args:
data_dict (dict): A dictionary containing the data to be converted.
Returns:
Dataset: The converted Hugging Face Dataset object.
"""
image_features = {f"observation.images.{cam}": Image() for cam in self.cameras}
features = {
"observation.state": Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
),
"action": Sequence(length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)),
"episode_index": Value(dtype="int64", id=None),
"frame_index": Value(dtype="int64", id=None),
"timestamp": Value(dtype="float32", id=None),
# "next.reward": Value(dtype="float32", id=None),
"next.done": Value(dtype="bool", id=None),
# "next.success": Value(dtype="bool", id=None),
"index": Value(dtype="int64", id=None),
}
update_features = {**image_features, **features}
features = Features(update_features)
hf_dataset = Dataset.from_dict(data_dict, features=features)
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def cleanup(self):
pass

View File

@@ -1,180 +0,0 @@
from pathlib import Path
import numpy as np
import torch
import tqdm
import zarr
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes
from lerobot.common.datasets.utils import (
hf_transform_to_torch,
)
class PushTProcessor:
"""Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy"""
def __init__(self, folder_path: Path, fps: int | None = None):
self.zarr_path = folder_path
if fps is None:
fps = 10
self._fps = fps
@property
def fps(self) -> int:
return self._fps
def is_valid(self):
try:
zarr_data = zarr.open(self.zarr_path, mode="r")
except Exception:
# TODO (azouitine): Handle the exception properly
return False
required_datasets = {
"data/action",
"data/img",
"data/keypoint",
"data/n_contacts",
"data/state",
"meta/episode_ends",
}
for dataset in required_datasets:
if dataset not in zarr_data:
return False
nb_frames = zarr_data["data/img"].shape[0]
required_datasets.remove("meta/episode_ends")
return all(nb_frames == zarr_data[dataset].shape[0] for dataset in required_datasets)
def preprocess(self):
try:
import pymunk
from gym_pusht.envs.pusht import PushTEnv, pymunk_to_shapely
from lerobot.common.datasets.push_dataset_to_hub._diffusion_policy_replay_buffer import (
ReplayBuffer as DiffusionPolicyReplayBuffer,
)
except ModuleNotFoundError as e:
print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`")
raise e
# as define in env
success_threshold = 0.95 # 95% coverage,
dataset_dict = DiffusionPolicyReplayBuffer.copy_from_path(
self.zarr_path
) # , keys=['img', 'state', 'action'])
episode_ids = torch.from_numpy(dataset_dict.get_episode_idxs())
num_episodes = dataset_dict.meta["episode_ends"].shape[0]
assert len(
{dataset_dict[key].shape[0] for key in dataset_dict.keys()} # noqa: SIM118
), "Some data type dont have the same number of total frames."
# TODO: verify that goal pose is expected to be fixed
goal_pos_angle = np.array([256, 256, np.pi / 4]) # x, y, theta (in radians)
goal_body = PushTEnv.get_goal_pose_body(goal_pos_angle)
imgs = torch.from_numpy(dataset_dict["img"]) # b h w c
states = torch.from_numpy(dataset_dict["state"])
actions = torch.from_numpy(dataset_dict["action"])
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
for episode_id in tqdm.tqdm(range(num_episodes)):
id_to = dataset_dict.meta["episode_ends"][episode_id]
num_frames = id_to - id_from
assert (episode_ids[id_from:id_to] == episode_id).all()
image = imgs[id_from:id_to]
assert image.min() >= 0.0
assert image.max() <= 255.0
image = image.type(torch.uint8)
state = states[id_from:id_to]
agent_pos = state[:, :2]
block_pos = state[:, 2:4]
block_angle = state[:, 4]
reward = torch.zeros(num_frames)
success = torch.zeros(num_frames, dtype=torch.bool)
done = torch.zeros(num_frames, dtype=torch.bool)
for i in range(num_frames):
space = pymunk.Space()
space.gravity = 0, 0
space.damping = 0
# Add walls.
walls = [
PushTEnv.add_segment(space, (5, 506), (5, 5), 2),
PushTEnv.add_segment(space, (5, 5), (506, 5), 2),
PushTEnv.add_segment(space, (506, 5), (506, 506), 2),
PushTEnv.add_segment(space, (5, 506), (506, 506), 2),
]
space.add(*walls)
block_body = PushTEnv.add_tee(space, block_pos[i].tolist(), block_angle[i].item())
goal_geom = pymunk_to_shapely(goal_body, block_body.shapes)
block_geom = pymunk_to_shapely(block_body, block_body.shapes)
intersection_area = goal_geom.intersection(block_geom).area
goal_area = goal_geom.area
coverage = intersection_area / goal_area
reward[i] = np.clip(coverage / success_threshold, 0, 1)
success[i] = coverage > success_threshold
# last step of demonstration is considered done
done[-1] = True
ep_dict = {
"observation.image": [PILImage.fromarray(x.numpy()) for x in image],
"observation.state": agent_pos,
"action": actions[id_from:id_to],
"episode_index": torch.tensor([episode_id] * num_frames, dtype=torch.int),
"frame_index": torch.arange(0, num_frames, 1),
"timestamp": torch.arange(0, num_frames, 1) / self.fps,
# "next.observation.image": image[1:],
# "next.observation.state": agent_pos[1:],
# TODO(rcadene): verify that reward and done are aligned with image and agent_pos
"next.reward": torch.cat([reward[1:], reward[[-1]]]),
"next.done": torch.cat([done[1:], done[[-1]]]),
"next.success": torch.cat([success[1:], success[[-1]]]),
}
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from += num_frames
data_dict = concatenate_episodes(ep_dicts)
return data_dict, episode_data_index
def to_hf_dataset(self, data_dict):
features = {
"observation.image": Image(),
"observation.state": Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
),
"action": Sequence(length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)),
"episode_index": Value(dtype="int64", id=None),
"frame_index": Value(dtype="int64", id=None),
"timestamp": Value(dtype="float32", id=None),
"next.reward": Value(dtype="float32", id=None),
"next.done": Value(dtype="bool", id=None),
"next.success": Value(dtype="bool", id=None),
"index": Value(dtype="int64", id=None),
}
features = Features(features)
hf_dataset = Dataset.from_dict(data_dict, features=features)
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def cleanup(self):
pass

View File

@@ -0,0 +1,214 @@
"""Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy"""
import shutil
from pathlib import Path
import numpy as np
import torch
import tqdm
import zarr
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.utils import (
hf_transform_to_torch,
)
# TODO(rcadene): enable for PR video dataset
# from lerobot.common.datasets.video_utils import encode_video_frames
def check_format(raw_dir):
zarr_path = raw_dir / "pusht_cchi_v7_replay.zarr"
zarr_data = zarr.open(zarr_path, mode="r")
required_datasets = {
"data/action",
"data/img",
"data/keypoint",
"data/n_contacts",
"data/state",
"meta/episode_ends",
}
for dataset in required_datasets:
assert dataset in zarr_data
nb_frames = zarr_data["data/img"].shape[0]
required_datasets.remove("meta/episode_ends")
assert all(nb_frames == zarr_data[dataset].shape[0] for dataset in required_datasets)
def load_from_raw(raw_dir, out_dir, fps, video, debug):
try:
import pymunk
from gym_pusht.envs.pusht import PushTEnv, pymunk_to_shapely
from lerobot.common.datasets.push_dataset_to_hub._diffusion_policy_replay_buffer import (
ReplayBuffer as DiffusionPolicyReplayBuffer,
)
except ModuleNotFoundError as e:
print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`")
raise e
# as define in gmy-pusht env: https://github.com/huggingface/gym-pusht/blob/e0684ff988d223808c0a9dcfaba9dc4991791370/gym_pusht/envs/pusht.py#L174
success_threshold = 0.95 # 95% coverage,
zarr_path = raw_dir / "pusht_cchi_v7_replay.zarr"
zarr_data = DiffusionPolicyReplayBuffer.copy_from_path(zarr_path)
episode_ids = torch.from_numpy(zarr_data.get_episode_idxs())
num_episodes = zarr_data.meta["episode_ends"].shape[0]
assert len(
{zarr_data[key].shape[0] for key in zarr_data.keys()} # noqa: SIM118
), "Some data type dont have the same number of total frames."
# TODO(rcadene): verify that goal pose is expected to be fixed
goal_pos_angle = np.array([256, 256, np.pi / 4]) # x, y, theta (in radians)
goal_body = PushTEnv.get_goal_pose_body(goal_pos_angle)
imgs = torch.from_numpy(zarr_data["img"]) # b h w c
states = torch.from_numpy(zarr_data["state"])
actions = torch.from_numpy(zarr_data["action"])
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
for ep_idx in tqdm.tqdm(range(num_episodes)):
id_to = zarr_data.meta["episode_ends"][ep_idx]
num_frames = id_to - id_from
# sanity check
assert (episode_ids[id_from:id_to] == ep_idx).all()
# get image
image = imgs[id_from:id_to]
assert image.min() >= 0.0
assert image.max() <= 255.0
image = image.type(torch.uint8)
# get state
state = states[id_from:id_to]
agent_pos = state[:, :2]
block_pos = state[:, 2:4]
block_angle = state[:, 4]
# get reward, success, done
reward = torch.zeros(num_frames)
success = torch.zeros(num_frames, dtype=torch.bool)
done = torch.zeros(num_frames, dtype=torch.bool)
for i in range(num_frames):
space = pymunk.Space()
space.gravity = 0, 0
space.damping = 0
# Add walls.
walls = [
PushTEnv.add_segment(space, (5, 506), (5, 5), 2),
PushTEnv.add_segment(space, (5, 5), (506, 5), 2),
PushTEnv.add_segment(space, (506, 5), (506, 506), 2),
PushTEnv.add_segment(space, (5, 506), (506, 506), 2),
]
space.add(*walls)
block_body = PushTEnv.add_tee(space, block_pos[i].tolist(), block_angle[i].item())
goal_geom = pymunk_to_shapely(goal_body, block_body.shapes)
block_geom = pymunk_to_shapely(block_body, block_body.shapes)
intersection_area = goal_geom.intersection(block_geom).area
goal_area = goal_geom.area
coverage = intersection_area / goal_area
reward[i] = np.clip(coverage / success_threshold, 0, 1)
success[i] = coverage > success_threshold
# last step of demonstration is considered done
done[-1] = True
ep_dict = {}
imgs_array = [x.numpy() for x in image]
if video:
# save png images in temporary directory
tmp_imgs_dir = out_dir / "tmp_images"
save_images_concurrently(imgs_array, tmp_imgs_dir)
# encode images to a mp4 video
video_path = out_dir / "videos" / f"observation.image_episode_{ep_idx:06d}.mp4"
encode_video_frames(tmp_imgs_dir, video_path, fps) # noqa: F821
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
# store the episode index
ep_dict["observation.image"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int)
else:
ep_dict["observation.image"] = [PILImage.fromarray(x) for x in imgs_array]
ep_dict["observation.state"] = agent_pos
ep_dict["action"] = actions[id_from:id_to]
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int)
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
# ep_dict["next.observation.image"] = image[1:],
# ep_dict["next.observation.state"] = agent_pos[1:],
# TODO(rcadene)] = verify that reward and done are aligned with image and agent_pos
ep_dict["next.reward"] = torch.cat([reward[1:], reward[[-1]]])
ep_dict["next.done"] = torch.cat([done[1:], done[[-1]]])
ep_dict["next.success"] = torch.cat([success[1:], success[[-1]]])
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from += num_frames
# process first episode only
if debug:
break
data_dict = concatenate_episodes(ep_dicts)
return data_dict, episode_data_index
def to_hf_dataset(data_dict, video):
features = {}
if video:
features["observation.image"] = Value(dtype="int64", id="video")
else:
features["observation.image"] = Image()
features["observation.state"] = Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
)
features["action"] = Sequence(
length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)
)
features["episode_index"] = Value(dtype="int64", id=None)
features["frame_index"] = Value(dtype="int64", id=None)
features["timestamp"] = Value(dtype="float32", id=None)
features["next.reward"] = Value(dtype="float32", id=None)
features["next.done"] = Value(dtype="bool", id=None)
features["next.success"] = Value(dtype="bool", id=None)
features["index"] = Value(dtype="int64", id=None)
hf_dataset = Dataset.from_dict(data_dict, features=Features(features))
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def from_raw_to_lerobot_format(raw_dir: Path, out_dir: Path, fps=None, video=True, debug=False):
# sanity check
check_format(raw_dir)
if fps is None:
fps = 10
data_dict, episode_data_index = load_from_raw(raw_dir, out_dir, fps, video, debug)
hf_dataset = to_hf_dataset(data_dict, video)
info = {
"fps": fps,
"video": video,
}
return hf_dataset, episode_data_index, info

View File

@@ -1,280 +0,0 @@
import os
import re
import shutil
from glob import glob
import numpy as np
import torch
import tqdm
import zarr
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.push_dataset_to_hub._umi_imagecodecs_numcodecs import register_codecs
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes
from lerobot.common.datasets.utils import (
hf_transform_to_torch,
)
class UmiProcessor:
"""
Process UMI (Universal Manipulation Interface) data stored in Zarr format like in: https://github.com/real-stanford/universal_manipulation_interface
Attributes:
folder_path (str): The path to the folder containing Zarr datasets.
fps (int): Frames per second, used to calculate timestamps for frames.
"""
def __init__(self, folder_path: str, fps: int | None = None):
self.zarr_path = folder_path
if fps is None:
# https://arxiv.org/pdf/2402.10329#table.caption.16
fps = 10 # For umi cup in the wild
self._fps = fps
register_codecs()
@property
def fps(self) -> int:
return self._fps
def is_valid(self) -> bool:
"""
Validates the Zarr folder to ensure it contains all required datasets with consistent frame counts.
Returns:
bool: True if all required datasets are present and have consistent frame counts, False otherwise.
"""
# Check if the Zarr folder is valid
try:
zarr_data = zarr.open(self.zarr_path, mode="r")
except Exception:
# TODO (azouitine): Handle the exception properly
return False
required_datasets = {
"data/robot0_demo_end_pose",
"data/robot0_demo_start_pose",
"data/robot0_eef_pos",
"data/robot0_eef_rot_axis_angle",
"data/robot0_gripper_width",
"meta/episode_ends",
"data/camera0_rgb",
}
for dataset in required_datasets:
if dataset not in zarr_data:
return False
nb_frames = zarr_data["data/camera0_rgb"].shape[0]
required_datasets.remove("meta/episode_ends")
return all(nb_frames == zarr_data[dataset].shape[0] for dataset in required_datasets)
def preprocess(self):
"""
Collects and processes all episodes from the Zarr dataset into structured data dictionaries.
Returns:
Tuple[Dict, Dict]: A tuple containing the structured episode data and episode index mappings.
"""
zarr_data = zarr.open(self.zarr_path, mode="r")
# We process the image data separately because it is too large to fit in memory
end_pose = torch.from_numpy(zarr_data["data/robot0_demo_end_pose"][:])
start_pos = torch.from_numpy(zarr_data["data/robot0_demo_start_pose"][:])
eff_pos = torch.from_numpy(zarr_data["data/robot0_eef_pos"][:])
eff_rot_axis_angle = torch.from_numpy(zarr_data["data/robot0_eef_rot_axis_angle"][:])
gripper_width = torch.from_numpy(zarr_data["data/robot0_gripper_width"][:])
states_pos = torch.cat([eff_pos, eff_rot_axis_angle], dim=1)
states = torch.cat([states_pos, gripper_width], dim=1)
episode_ends = zarr_data["meta/episode_ends"][:]
num_episodes: int = episode_ends.shape[0]
episode_ids = torch.from_numpy(self.get_episode_idxs(episode_ends))
# We convert it in torch tensor later because the jit function does not support torch tensors
episode_ends = torch.from_numpy(episode_ends)
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
for episode_id in tqdm.tqdm(range(num_episodes)):
id_to = episode_ends[episode_id]
num_frames = id_to - id_from
assert (
episode_ids[id_from:id_to] == episode_id
).all(), f"episode_ids[{id_from}:{id_to}] != {episode_id}"
state = states[id_from:id_to]
ep_dict = {
# observation.image will be filled later
"observation.state": state,
"episode_index": torch.tensor([episode_id] * num_frames, dtype=torch.int),
"frame_index": torch.arange(0, num_frames, 1),
"timestamp": torch.arange(0, num_frames, 1) / self.fps,
"episode_data_index_from": torch.tensor([id_from] * num_frames),
"episode_data_index_to": torch.tensor([id_from + num_frames] * num_frames),
"end_pose": end_pose[id_from:id_to],
"start_pos": start_pos[id_from:id_to],
"gripper_width": gripper_width[id_from:id_to],
}
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from += num_frames
data_dict = concatenate_episodes(ep_dicts)
total_frames = id_from
data_dict["index"] = torch.arange(0, total_frames, 1)
print("Saving images to disk in temporary folder...")
# datasets.Image() can take a list of paths to images, so we save the images to a temporary folder
# to avoid loading them all in memory
_save_images_concurrently(
data=zarr_data, image_key="data/camera0_rgb", folder_path="tmp_umi_images", max_workers=12
)
print("Saving images to disk in temporary folder... Done")
# Sort files by number eg. 1.png, 2.png, 3.png, 9.png, 10.png instead of 1.png, 10.png, 2.png, 3.png, 9.png
# to correctly match the images with the data
images_path = sorted(
glob("tmp_umi_images/*"), key=lambda x: int(re.search(r"(\d+)\.png$", x).group(1))
)
data_dict["observation.image"] = images_path
print("Images saved to disk, do not forget to delete the folder tmp_umi_images/")
# Cleanup
return data_dict, episode_data_index
def to_hf_dataset(self, data_dict):
"""
Converts the processed data dictionary into a Hugging Face dataset with defined features.
Args:
data_dict (Dict): The data dictionary containing tensors and episode information.
Returns:
Dataset: A Hugging Face dataset constructed from the provided data dictionary.
"""
features = {
"observation.image": Image(),
"observation.state": Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
),
"episode_index": Value(dtype="int64", id=None),
"frame_index": Value(dtype="int64", id=None),
"timestamp": Value(dtype="float32", id=None),
"index": Value(dtype="int64", id=None),
"episode_data_index_from": Value(dtype="int64", id=None),
"episode_data_index_to": Value(dtype="int64", id=None),
# `start_pos` and `end_pos` respectively represent the positions of the end-effector
# at the beginning and the end of the episode.
# `gripper_width` indicates the distance between the grippers, and this value is included
# in the state vector, which comprises the concatenation of the end-effector position
# and gripper width.
"end_pose": Sequence(
length=data_dict["end_pose"].shape[1], feature=Value(dtype="float32", id=None)
),
"start_pos": Sequence(
length=data_dict["start_pos"].shape[1], feature=Value(dtype="float32", id=None)
),
"gripper_width": Sequence(
length=data_dict["gripper_width"].shape[1], feature=Value(dtype="float32", id=None)
),
}
features = Features(features)
hf_dataset = Dataset.from_dict(data_dict, features=features)
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def cleanup(self):
# Cleanup
if os.path.exists("tmp_umi_images"):
print("Removing temporary images folder")
shutil.rmtree("tmp_umi_images")
print("Cleanup done")
@classmethod
def get_episode_idxs(cls, episode_ends: np.ndarray) -> np.ndarray:
# Optimized and simplified version of this function: https://github.com/real-stanford/universal_manipulation_interface/blob/298776ce251f33b6b3185a98d6e7d1f9ad49168b/diffusion_policy/common/replay_buffer.py#L374
from numba import jit
@jit(nopython=True)
def _get_episode_idxs(episode_ends):
result = np.zeros((episode_ends[-1],), dtype=np.int64)
start_idx = 0
for episode_number, end_idx in enumerate(episode_ends):
result[start_idx:end_idx] = episode_number
start_idx = end_idx
return result
return _get_episode_idxs(episode_ends)
def _clear_folder(folder_path: str):
"""
Clears all the content of the specified folder. Creates the folder if it does not exist.
Args:
folder_path (str): Path to the folder to clear.
Examples:
>>> import os
>>> os.makedirs('example_folder', exist_ok=True)
>>> with open('example_folder/temp_file.txt', 'w') as f:
... f.write('example')
>>> clear_folder('example_folder')
>>> os.listdir('example_folder')
[]
"""
if os.path.exists(folder_path):
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print(f"Failed to delete {file_path}. Reason: {e}")
else:
os.makedirs(folder_path)
def _save_image(img_array: np.array, i: int, folder_path: str):
"""
Saves a single image to the specified folder.
Args:
img_array (ndarray): The numpy array of the image.
i (int): Index of the image, used for naming.
folder_path (str): Path to the folder where the image will be saved.
"""
img = PILImage.fromarray(img_array)
img_format = "PNG" if img_array.dtype == np.uint8 else "JPEG"
img.save(os.path.join(folder_path, f"{i}.{img_format.lower()}"), quality=100)
def _save_images_concurrently(data: dict, image_key: str, folder_path: str, max_workers: int = 4):
from concurrent.futures import ThreadPoolExecutor
"""
Saves images from the zarr_data to the specified folder using multithreading.
Args:
zarr_data (dict): A dictionary containing image data in an array format.
folder_path (str): Path to the folder where images will be saved.
max_workers (int): The maximum number of threads to use for saving images.
"""
num_images = len(data["data/camera0_rgb"])
_clear_folder(folder_path) # Clear or create folder first
with ThreadPoolExecutor(max_workers=max_workers) as executor:
[executor.submit(_save_image, data[image_key][i], i, folder_path) for i in range(num_images)]

View File

@@ -0,0 +1,207 @@
"""Process UMI (Universal Manipulation Interface) data stored in Zarr format like in: https://github.com/real-stanford/universal_manipulation_interface"""
import logging
import shutil
from pathlib import Path
import numpy as np
import torch
import tqdm
import zarr
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.push_dataset_to_hub._umi_imagecodecs_numcodecs import register_codecs
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.utils import (
hf_transform_to_torch,
)
# TODO(rcadene): enable for PR video dataset
# from lerobot.common.datasets.video_utils import encode_video_frames
def check_format(raw_dir) -> bool:
zarr_path = raw_dir / "cup_in_the_wild.zarr"
zarr_data = zarr.open(zarr_path, mode="r")
required_datasets = {
"data/robot0_demo_end_pose",
"data/robot0_demo_start_pose",
"data/robot0_eef_pos",
"data/robot0_eef_rot_axis_angle",
"data/robot0_gripper_width",
"meta/episode_ends",
"data/camera0_rgb",
}
for dataset in required_datasets:
if dataset not in zarr_data:
return False
# mandatory to access zarr_data
register_codecs()
nb_frames = zarr_data["data/camera0_rgb"].shape[0]
required_datasets.remove("meta/episode_ends")
assert all(nb_frames == zarr_data[dataset].shape[0] for dataset in required_datasets)
def get_episode_idxs(episode_ends: np.ndarray) -> np.ndarray:
# Optimized and simplified version of this function: https://github.com/real-stanford/universal_manipulation_interface/blob/298776ce251f33b6b3185a98d6e7d1f9ad49168b/diffusion_policy/common/replay_buffer.py#L374
from numba import jit
@jit(nopython=True)
def _get_episode_idxs(episode_ends):
result = np.zeros((episode_ends[-1],), dtype=np.int64)
start_idx = 0
for episode_number, end_idx in enumerate(episode_ends):
result[start_idx:end_idx] = episode_number
start_idx = end_idx
return result
return _get_episode_idxs(episode_ends)
def load_from_raw(raw_dir, out_dir, fps, video, debug):
zarr_path = raw_dir / "cup_in_the_wild.zarr"
zarr_data = zarr.open(zarr_path, mode="r")
# We process the image data separately because it is too large to fit in memory
end_pose = torch.from_numpy(zarr_data["data/robot0_demo_end_pose"][:])
start_pos = torch.from_numpy(zarr_data["data/robot0_demo_start_pose"][:])
eff_pos = torch.from_numpy(zarr_data["data/robot0_eef_pos"][:])
eff_rot_axis_angle = torch.from_numpy(zarr_data["data/robot0_eef_rot_axis_angle"][:])
gripper_width = torch.from_numpy(zarr_data["data/robot0_gripper_width"][:])
states_pos = torch.cat([eff_pos, eff_rot_axis_angle], dim=1)
states = torch.cat([states_pos, gripper_width], dim=1)
episode_ends = zarr_data["meta/episode_ends"][:]
num_episodes = episode_ends.shape[0]
episode_ids = torch.from_numpy(get_episode_idxs(episode_ends))
# We convert it in torch tensor later because the jit function does not support torch tensors
episode_ends = torch.from_numpy(episode_ends)
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
for ep_idx in tqdm.tqdm(range(num_episodes)):
id_to = episode_ends[ep_idx]
num_frames = id_to - id_from
# sanity heck
assert (episode_ids[id_from:id_to] == ep_idx).all()
# TODO(rcadene): save temporary images of the episode?
state = states[id_from:id_to]
ep_dict = {}
# load 57MB of images in RAM (400x224x224x3 uint8)
imgs_array = zarr_data["data/camera0_rgb"][id_from:id_to]
if video:
# save png images in temporary directory
tmp_imgs_dir = out_dir / "tmp_images"
save_images_concurrently(imgs_array, tmp_imgs_dir)
# encode images to a mp4 video
video_path = out_dir / "videos" / f"observation.image_episode_{ep_idx:06d}.mp4"
encode_video_frames(tmp_imgs_dir, video_path, fps) # noqa: F821
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
# store the episode index
ep_dict["observation.image"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int)
else:
ep_dict["observation.image"] = [PILImage.fromarray(x) for x in imgs_array]
ep_dict["observation.state"] = state
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int)
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
ep_dict["episode_data_index_from"] = torch.tensor([id_from] * num_frames)
ep_dict["episode_data_index_to"] = torch.tensor([id_from + num_frames] * num_frames)
ep_dict["end_pose"] = end_pose[id_from:id_to]
ep_dict["start_pos"] = start_pos[id_from:id_to]
ep_dict["gripper_width"] = gripper_width[id_from:id_to]
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from += num_frames
# process first episode only
if debug:
break
data_dict = concatenate_episodes(ep_dicts)
total_frames = id_from
data_dict["index"] = torch.arange(0, total_frames, 1)
return data_dict, episode_data_index
def to_hf_dataset(data_dict, video):
features = {}
if video:
features["observation.image"] = Value(dtype="int64", id="video")
else:
features["observation.image"] = Image()
features["observation.state"] = Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
)
features["episode_index"] = Value(dtype="int64", id=None)
features["frame_index"] = Value(dtype="int64", id=None)
features["timestamp"] = Value(dtype="float32", id=None)
features["index"] = Value(dtype="int64", id=None)
features["episode_data_index_from"] = Value(dtype="int64", id=None)
features["episode_data_index_to"] = Value(dtype="int64", id=None)
# `start_pos` and `end_pos` respectively represent the positions of the end-effector
# at the beginning and the end of the episode.
# `gripper_width` indicates the distance between the grippers, and this value is included
# in the state vector, which comprises the concatenation of the end-effector position
# and gripper width.
features["end_pose"] = Sequence(
length=data_dict["end_pose"].shape[1], feature=Value(dtype="float32", id=None)
)
features["start_pos"] = Sequence(
length=data_dict["start_pos"].shape[1], feature=Value(dtype="float32", id=None)
)
features["gripper_width"] = Sequence(
length=data_dict["gripper_width"].shape[1], feature=Value(dtype="float32", id=None)
)
hf_dataset = Dataset.from_dict(data_dict, features=Features(features))
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def from_raw_to_lerobot_format(raw_dir: Path, out_dir: Path, fps=None, video=True, debug=False):
# sanity check
check_format(raw_dir)
if fps is None:
# For umi cup in the wild: https://arxiv.org/pdf/2402.10329#table.caption.16
fps = 10
if not video:
logging.warning(
"Generating UMI dataset without `video=True` creates ~150GB on disk and requires ~80GB in RAM."
)
data_dict, episode_data_index = load_from_raw(raw_dir, out_dir, fps, video, debug)
hf_dataset = to_hf_dataset(data_dict, video)
info = {
"fps": fps,
"video": video,
}
return hf_dataset, episode_data_index, info

View File

@@ -1,3 +1,8 @@
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import numpy
import PIL
import torch
@@ -18,3 +23,16 @@ def concatenate_episodes(ep_dicts):
total_frames = data_dict["frame_index"].shape[0]
data_dict["index"] = torch.arange(0, total_frames, 1)
return data_dict
def save_images_concurrently(imgs_array: numpy.array, out_dir: Path, max_workers: int = 4):
out_dir = Path(out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
def save_image(img_array, i, out_dir):
img = PIL.Image.fromarray(img_array)
img.save(str(out_dir / f"frame_{i:06d}.png"), quality=100)
num_images = len(imgs_array)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
[executor.submit(save_image, imgs_array[i], i, out_dir) for i in range(num_images)]

View File

@@ -0,0 +1,163 @@
"""Process pickle files formatted like in: https://github.com/fyhMer/fowm"""
import pickle
import shutil
from pathlib import Path
import einops
import torch
import tqdm
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.utils import (
hf_transform_to_torch,
)
# TODO(rcadene): enable for PR video dataset
# from lerobot.common.datasets.video_utils import encode_video_frames
def check_format(raw_dir):
keys = {"actions", "rewards", "dones"}
nested_keys = {"observations": {"rgb", "state"}, "next_observations": {"rgb", "state"}}
xarm_files = list(raw_dir.glob("*.pkl"))
assert len(xarm_files) > 0
with open(xarm_files[0], "rb") as f:
dataset_dict = pickle.load(f)
assert isinstance(dataset_dict, dict)
assert all(k in dataset_dict for k in keys)
# Check for consistent lengths in nested keys
expected_len = len(dataset_dict["actions"])
assert all(len(dataset_dict[key]) == expected_len for key in keys if key in dataset_dict)
for key, subkeys in nested_keys.items():
nested_dict = dataset_dict.get(key, {})
assert all(len(nested_dict[subkey]) == expected_len for subkey in subkeys if subkey in nested_dict)
def load_from_raw(raw_dir, out_dir, fps, video, debug):
pkl_path = raw_dir / "buffer.pkl"
with open(pkl_path, "rb") as f:
pkl_data = pickle.load(f)
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
id_to = 0
ep_idx = 0
total_frames = pkl_data["actions"].shape[0]
for i in tqdm.tqdm(range(total_frames)):
id_to += 1
if not pkl_data["dones"][i]:
continue
num_frames = id_to - id_from
image = torch.tensor(pkl_data["observations"]["rgb"][id_from:id_to])
image = einops.rearrange(image, "b c h w -> b h w c")
state = torch.tensor(pkl_data["observations"]["state"][id_from:id_to])
action = torch.tensor(pkl_data["actions"][id_from:id_to])
# TODO(rcadene): we have a missing last frame which is the observation when the env is done
# it is critical to have this frame for tdmpc to predict a "done observation/state"
# next_image = torch.tensor(pkl_data["next_observations"]["rgb"][id_from:id_to])
# next_state = torch.tensor(pkl_data["next_observations"]["state"][id_from:id_to])
next_reward = torch.tensor(pkl_data["rewards"][id_from:id_to])
next_done = torch.tensor(pkl_data["dones"][id_from:id_to])
ep_dict = {}
imgs_array = [x.numpy() for x in image]
if video:
# save png images in temporary directory
tmp_imgs_dir = out_dir / "tmp_images"
save_images_concurrently(imgs_array, tmp_imgs_dir)
# encode images to a mp4 video
video_path = out_dir / "videos" / f"observation.image_episode_{ep_idx:06d}.mp4"
encode_video_frames(tmp_imgs_dir, video_path, fps) # noqa: F821
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
# store the episode index
ep_dict["observation.image"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int)
else:
ep_dict["observation.image"] = [PILImage.fromarray(x) for x in imgs_array]
ep_dict["observation.state"] = state
ep_dict["action"] = action
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int)
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
# ep_dict["next.observation.image"] = next_image
# ep_dict["next.observation.state"] = next_state
ep_dict["next.reward"] = next_reward
ep_dict["next.done"] = next_done
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from = id_to
ep_idx += 1
# process first episode only
if debug:
break
data_dict = concatenate_episodes(ep_dicts)
return data_dict, episode_data_index
def to_hf_dataset(data_dict, video):
features = {}
if video:
features["observation.image"] = Value(dtype="int64", id="video")
else:
features["observation.image"] = Image()
features["observation.state"] = Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
)
features["action"] = Sequence(
length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)
)
features["episode_index"] = Value(dtype="int64", id=None)
features["frame_index"] = Value(dtype="int64", id=None)
features["timestamp"] = Value(dtype="float32", id=None)
features["next.reward"] = Value(dtype="float32", id=None)
features["next.done"] = Value(dtype="bool", id=None)
features["index"] = Value(dtype="int64", id=None)
# TODO(rcadene): add success
# features["next.success"] = Value(dtype='bool', id=None)
hf_dataset = Dataset.from_dict(data_dict, features=Features(features))
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def from_raw_to_lerobot_format(raw_dir: Path, out_dir: Path, fps=None, video=True, debug=False):
# sanity check
check_format(raw_dir)
if fps is None:
fps = 15
data_dict, episode_data_index = load_from_raw(raw_dir, out_dir, fps, video, debug)
hf_dataset = to_hf_dataset(data_dict, video)
info = {
"fps": fps,
"video": video,
}
return hf_dataset, episode_data_index, info

View File

@@ -1,145 +0,0 @@
import pickle
from pathlib import Path
import einops
import torch
import tqdm
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes
from lerobot.common.datasets.utils import (
hf_transform_to_torch,
)
class XarmProcessor:
"""Process pickle files formatted like in: https://github.com/fyhMer/fowm"""
def __init__(self, folder_path: str, fps: int | None = None):
self.folder_path = Path(folder_path)
self.keys = {"actions", "rewards", "dones"}
self.nested_keys = {"observations": {"rgb", "state"}, "next_observations": {"rgb", "state"}}
if fps is None:
fps = 15
self._fps = fps
@property
def fps(self) -> int:
return self._fps
def is_valid(self) -> bool:
# get all .pkl files
xarm_files = list(self.folder_path.glob("*.pkl"))
if len(xarm_files) != 1:
return False
try:
with open(xarm_files[0], "rb") as f:
dataset_dict = pickle.load(f)
except Exception:
return False
if not isinstance(dataset_dict, dict):
return False
if not all(k in dataset_dict for k in self.keys):
return False
# Check for consistent lengths in nested keys
try:
expected_len = len(dataset_dict["actions"])
if any(len(dataset_dict[key]) != expected_len for key in self.keys if key in dataset_dict):
return False
for key, subkeys in self.nested_keys.items():
nested_dict = dataset_dict.get(key, {})
if any(
len(nested_dict[subkey]) != expected_len for subkey in subkeys if subkey in nested_dict
):
return False
except KeyError: # If any expected key or subkey is missing
return False
return True # All checks passed
def preprocess(self):
if not self.is_valid():
raise ValueError("The Xarm file is invalid or does not contain the required datasets.")
xarm_files = list(self.folder_path.glob("*.pkl"))
with open(xarm_files[0], "rb") as f:
dataset_dict = pickle.load(f)
ep_dicts = []
episode_data_index = {"from": [], "to": []}
id_from = 0
id_to = 0
episode_id = 0
total_frames = dataset_dict["actions"].shape[0]
for i in tqdm.tqdm(range(total_frames)):
id_to += 1
if not dataset_dict["dones"][i]:
continue
num_frames = id_to - id_from
image = torch.tensor(dataset_dict["observations"]["rgb"][id_from:id_to])
image = einops.rearrange(image, "b c h w -> b h w c")
state = torch.tensor(dataset_dict["observations"]["state"][id_from:id_to])
action = torch.tensor(dataset_dict["actions"][id_from:id_to])
# TODO(rcadene): we have a missing last frame which is the observation when the env is done
# it is critical to have this frame for tdmpc to predict a "done observation/state"
# next_image = torch.tensor(dataset_dict["next_observations"]["rgb"][id_from:id_to])
# next_state = torch.tensor(dataset_dict["next_observations"]["state"][id_from:id_to])
next_reward = torch.tensor(dataset_dict["rewards"][id_from:id_to])
next_done = torch.tensor(dataset_dict["dones"][id_from:id_to])
ep_dict = {
"observation.image": [PILImage.fromarray(x.numpy()) for x in image],
"observation.state": state,
"action": action,
"episode_index": torch.tensor([episode_id] * num_frames, dtype=torch.int),
"frame_index": torch.arange(0, num_frames, 1),
"timestamp": torch.arange(0, num_frames, 1) / self.fps,
# "next.observation.image": next_image,
# "next.observation.state": next_state,
"next.reward": next_reward,
"next.done": next_done,
}
ep_dicts.append(ep_dict)
episode_data_index["from"].append(id_from)
episode_data_index["to"].append(id_from + num_frames)
id_from = id_to
episode_id += 1
data_dict = concatenate_episodes(ep_dicts)
return data_dict, episode_data_index
def to_hf_dataset(self, data_dict):
features = {
"observation.image": Image(),
"observation.state": Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
),
"action": Sequence(length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)),
"episode_index": Value(dtype="int64", id=None),
"frame_index": Value(dtype="int64", id=None),
"timestamp": Value(dtype="float32", id=None),
"next.reward": Value(dtype="float32", id=None),
"next.done": Value(dtype="bool", id=None),
#'next.success': Value(dtype='bool', id=None),
"index": Value(dtype="int64", id=None),
}
features = Features(features)
hf_dataset = Dataset.from_dict(data_dict, features=features)
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def cleanup(self):
pass