Convert datasets to av1 encoding (#302)

This commit is contained in:
Simon Alibert
2024-07-22 20:08:59 +02:00
committed by GitHub
parent 461d5472d3
commit 0b21210d72
571 changed files with 988 additions and 1311 deletions

View File

@@ -35,9 +35,8 @@ from lerobot.common.datasets.utils import (
)
from lerobot.common.datasets.video_utils import VideoFrame, load_from_videos
# For maintainers, see lerobot/common/datasets/push_dataset_to_hub/codebase_version.md
CODEBASE_VERSION = "v1.5"
# For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md
CODEBASE_VERSION = "v1.6"
DATA_DIR = Path(os.environ["DATA_DIR"]) if "DATA_DIR" in os.environ else None

View File

@@ -10,7 +10,8 @@ For instance, [`lerobot/pusht`](https://huggingface.co/datasets/lerobot/pusht) h
- [v1.2](https://huggingface.co/datasets/lerobot/pusht/tree/v1.2)
- [v1.3](https://huggingface.co/datasets/lerobot/pusht/tree/v1.3)
- [v1.4](https://huggingface.co/datasets/lerobot/pusht/tree/v1.4)
- [v1.5](https://huggingface.co/datasets/lerobot/pusht/tree/v1.5) <-- last version
- [v1.5](https://huggingface.co/datasets/lerobot/pusht/tree/v1.5)
- [v1.6](https://huggingface.co/datasets/lerobot/pusht/tree/v1.6) <-- last version
- [main](https://huggingface.co/datasets/lerobot/pusht/tree/main) <-- points to the last version
Starting with v1.6, every dataset pushed to the hub or saved locally also have this version number in their
@@ -45,13 +46,11 @@ for repo_id in available_datasets:
dataset_info = api.list_repo_refs(repo_id, repo_type="dataset")
branches = [b.name for b in dataset_info.branches]
if CODEBASE_VERSION in branches:
# First check if the newer version already exists.
print(f"Found existing branch for {repo_id}. Please contact a member of the core LeRobot team.")
print("Exiting early")
break
print(f"{repo_id} already @{CODEBASE_VERSION}, skipping.")
continue
else:
# Now create a branch named after the new version by branching out from "main"
# which is expected to be the preceding version
api.create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION, revision="main")
print(f"{repo_id} successfully updated")
print(f"{repo_id} successfully updated @{CODEBASE_VERSION}")
```

View File

@@ -19,8 +19,8 @@ This file contains download scripts for raw datasets.
Example of usage:
```
python lerobot/common/datasets/push_dataset_to_hub/_download_raw.py \
--raw-dir data/cadene/pusht_raw \
--repo-id cadene/pusht_raw
--raw-dir data/lerobot-raw/pusht_raw \
--repo-id lerobot-raw/pusht_raw
```
"""
@@ -31,63 +31,65 @@ from pathlib import Path
from huggingface_hub import snapshot_download
AVAILABLE_RAW_REPO_IDS = [
"lerobot-raw/aloha_mobile_cabinet_raw",
"lerobot-raw/aloha_mobile_chair_raw",
"lerobot-raw/aloha_mobile_elevator_raw",
"lerobot-raw/aloha_mobile_shrimp_raw",
"lerobot-raw/aloha_mobile_wash_pan_raw",
"lerobot-raw/aloha_mobile_wipe_wine_raw",
"lerobot-raw/aloha_sim_insertion_human_raw",
"lerobot-raw/aloha_sim_insertion_scripted_raw",
"lerobot-raw/aloha_sim_transfer_cube_human_raw",
"lerobot-raw/aloha_sim_transfer_cube_scripted_raw",
"lerobot-raw/aloha_static_battery_raw",
"lerobot-raw/aloha_static_candy_raw",
"lerobot-raw/aloha_static_coffee_new_raw",
"lerobot-raw/aloha_static_coffee_raw",
"lerobot-raw/aloha_static_cups_open_raw",
"lerobot-raw/aloha_static_fork_pick_up_raw",
"lerobot-raw/aloha_static_pingpong_test_raw",
"lerobot-raw/aloha_static_pro_pencil_raw",
"lerobot-raw/aloha_static_screw_driver_raw",
"lerobot-raw/aloha_static_tape_raw",
"lerobot-raw/aloha_static_thread_velcro_raw",
"lerobot-raw/aloha_static_towel_raw",
"lerobot-raw/aloha_static_vinh_cup_left_raw",
"lerobot-raw/aloha_static_vinh_cup_raw",
"lerobot-raw/aloha_static_ziploc_slide_raw",
"lerobot-raw/pusht_raw",
"lerobot-raw/umi_cup_in_the_wild_raw",
"lerobot-raw/unitreeh1_fold_clothes_raw",
"lerobot-raw/unitreeh1_rearrange_objects_raw",
"lerobot-raw/unitreeh1_two_robot_greeting_raw",
"lerobot-raw/unitreeh1_warehouse_raw",
"lerobot-raw/xarm_lift_medium_raw",
"lerobot-raw/xarm_lift_medium_replay_raw",
"lerobot-raw/xarm_push_medium_raw",
"lerobot-raw/xarm_push_medium_replay_raw",
]
from lerobot.common.datasets.push_dataset_to_hub.utils import check_repo_id
# {raw_repo_id: raw_format}
AVAILABLE_RAW_REPO_IDS = {
"lerobot-raw/aloha_mobile_cabinet_raw": "aloha_hdf5",
"lerobot-raw/aloha_mobile_chair_raw": "aloha_hdf5",
"lerobot-raw/aloha_mobile_elevator_raw": "aloha_hdf5",
"lerobot-raw/aloha_mobile_shrimp_raw": "aloha_hdf5",
"lerobot-raw/aloha_mobile_wash_pan_raw": "aloha_hdf5",
"lerobot-raw/aloha_mobile_wipe_wine_raw": "aloha_hdf5",
"lerobot-raw/aloha_sim_insertion_human_raw": "aloha_hdf5",
"lerobot-raw/aloha_sim_insertion_scripted_raw": "aloha_hdf5",
"lerobot-raw/aloha_sim_transfer_cube_human_raw": "aloha_hdf5",
"lerobot-raw/aloha_sim_transfer_cube_scripted_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_battery_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_candy_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_coffee_new_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_coffee_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_cups_open_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_fork_pick_up_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_pingpong_test_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_pro_pencil_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_screw_driver_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_tape_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_thread_velcro_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_towel_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_vinh_cup_left_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_vinh_cup_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_ziploc_slide_raw": "aloha_hdf5",
"lerobot-raw/pusht_raw": "pusht_zarr",
"lerobot-raw/umi_cup_in_the_wild_raw": "umi_zarr",
"lerobot-raw/unitreeh1_fold_clothes_raw": "aloha_hdf5",
"lerobot-raw/unitreeh1_rearrange_objects_raw": "aloha_hdf5",
"lerobot-raw/unitreeh1_two_robot_greeting_raw": "aloha_hdf5",
"lerobot-raw/unitreeh1_warehouse_raw": "aloha_hdf5",
"lerobot-raw/xarm_lift_medium_raw": "xarm_pkl",
"lerobot-raw/xarm_lift_medium_replay_raw": "xarm_pkl",
"lerobot-raw/xarm_push_medium_raw": "xarm_pkl",
"lerobot-raw/xarm_push_medium_replay_raw": "xarm_pkl",
}
def download_raw(raw_dir: Path, repo_id: str):
# Check repo_id is well formated
if len(repo_id.split("/")) != 2:
raise ValueError(
f"`repo_id` is expected to contain a community or user id `/` the name of the dataset (e.g. 'lerobot/pusht'), but contains '{repo_id}'."
)
check_repo_id(repo_id)
user_id, dataset_id = repo_id.split("/")
if not dataset_id.endswith("_raw"):
warnings.warn(
f"`dataset_id` ({dataset_id}) doesn't end with '_raw' (e.g. 'lerobot/pusht_raw'). Following this naming convention by renaming your repository is advised, but not mandatory.",
f"""`dataset_id` ({dataset_id}) doesn't end with '_raw' (e.g. 'lerobot/pusht_raw'). Following this
naming convention by renaming your repository is advised, but not mandatory.""",
stacklevel=1,
)
# Send warning if raw_dir isn't well formated
if raw_dir.parts[-2] != user_id or raw_dir.parts[-1] != dataset_id:
warnings.warn(
f"`raw_dir` ({raw_dir}) doesn't contain a community or user id `/` the name of the dataset that match the `repo_id` (e.g. 'data/lerobot/pusht_raw'). Following this naming convention is advised, but not mandatory.",
f"""`raw_dir` ({raw_dir}) doesn't contain a community or user id `/` the name of the dataset that
match the `repo_id` (e.g. 'data/lerobot/pusht_raw'). Following this naming convention is advised,
but not mandatory.""",
stacklevel=1,
)
raw_dir.mkdir(parents=True, exist_ok=True)
@@ -97,8 +99,9 @@ def download_raw(raw_dir: Path, repo_id: str):
logging.info(f"Finish downloading from huggingface.co/{user_id} for {dataset_id}")
def download_all_raw_datasets():
data_dir = Path("data")
def download_all_raw_datasets(data_dir: Path | None = None):
if data_dir is None:
data_dir = Path("data")
for repo_id in AVAILABLE_RAW_REPO_IDS:
raw_dir = data_dir / repo_id
download_raw(raw_dir, repo_id)
@@ -106,7 +109,8 @@ def download_all_raw_datasets():
def main():
parser = argparse.ArgumentParser(
description=f"A script to download raw datasets from Hugging Face hub to a local directory. Here is a non exhaustive list of available repositories to use in `--repo-id`: {AVAILABLE_RAW_REPO_IDS}",
description=f"""A script to download raw datasets from Hugging Face hub to a local directory. Here is a
non exhaustive list of available repositories to use in `--repo-id`: {AVAILABLE_RAW_REPO_IDS}""",
)
parser.add_argument(
@@ -119,7 +123,8 @@ def main():
"--repo-id",
type=str,
required=True,
help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset (e.g. `lerobot/pusht_raw`, `cadene/aloha_sim_insertion_human_raw`).",
help="""Repositery identifier on Hugging Face: a community or a user name `/` the name of
the dataset (e.g. `lerobot/pusht_raw`, `cadene/aloha_sim_insertion_human_raw`).""",
)
args = parser.parse_args()
download_raw(**vars(args))

View File

@@ -0,0 +1,184 @@
#!/usr/bin/env python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Use this script to batch encode lerobot dataset from their raw format to LeRobotDataset and push their updated
version to the hub. Under the hood, this script reuses 'push_dataset_to_hub.py'. It assumes that you already
downloaded raw datasets, which you can do with the related '_download_raw.py' script.
For instance, for codebase_version = 'v1.6', the following command was run, assuming raw datasets from
lerobot-raw were downloaded in 'raw/datasets/directory':
```bash
python lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py \
--raw-dir raw/datasets/directory \
--raw-repo-ids lerobot-raw \
--local-dir push/datasets/directory \
--tests-data-dir tests/data \
--push-repo lerobot \
--vcodec libsvtav1 \
--pix-fmt yuv420p \
--g 2 \
--crf 30
```
"""
import argparse
from pathlib import Path
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
from lerobot.common.datasets.push_dataset_to_hub._download_raw import AVAILABLE_RAW_REPO_IDS
from lerobot.common.datasets.push_dataset_to_hub.utils import check_repo_id
from lerobot.scripts.push_dataset_to_hub import push_dataset_to_hub
def get_push_repo_id_from_raw(raw_repo_id: str, push_repo: str) -> str:
dataset_id_raw = raw_repo_id.split("/")[1]
dataset_id = dataset_id_raw.removesuffix("_raw")
return f"{push_repo}/{dataset_id}"
def encode_datasets(
raw_dir: Path,
raw_repo_ids: list[str],
push_repo: str,
vcodec: str,
pix_fmt: str,
g: int,
crf: int,
local_dir: Path | None = None,
tests_data_dir: Path | None = None,
raw_format: str | None = None,
dry_run: bool = False,
) -> None:
if len(raw_repo_ids) == 1 and raw_repo_ids[0].lower() == "lerobot-raw":
raw_repo_ids_format = AVAILABLE_RAW_REPO_IDS
else:
if raw_format is None:
raise ValueError(raw_format)
raw_repo_ids_format = {id_: raw_format for id_ in raw_repo_ids}
for raw_repo_id, repo_raw_format in raw_repo_ids_format.items():
check_repo_id(raw_repo_id)
dataset_repo_id_push = get_push_repo_id_from_raw(raw_repo_id, push_repo)
dataset_raw_dir = raw_dir / raw_repo_id
dataset_dir = local_dir / dataset_repo_id_push if local_dir is not None else None
encoding = {
"vcodec": vcodec,
"pix_fmt": pix_fmt,
"g": g,
"crf": crf,
}
if not (dataset_raw_dir).is_dir():
raise NotADirectoryError(dataset_raw_dir)
if not dry_run:
push_dataset_to_hub(
dataset_raw_dir,
raw_format=repo_raw_format,
repo_id=dataset_repo_id_push,
local_dir=dataset_dir,
resume=True,
encoding=encoding,
tests_data_dir=tests_data_dir,
)
else:
print(
f"DRY RUN: {dataset_raw_dir} --> {dataset_dir} --> {dataset_repo_id_push}@{CODEBASE_VERSION}"
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--raw-dir",
type=Path,
default=Path("data"),
help="Directory where raw datasets are located.",
)
parser.add_argument(
"--raw-repo-ids",
type=str,
nargs="*",
default=["lerobot-raw"],
help="""Raw dataset repo ids. if 'lerobot-raw', the keys from `AVAILABLE_RAW_REPO_IDS` will be
used and raw datasets will be fetched from the 'lerobot-raw/' repo and pushed with their
associated format. It is assumed that each dataset is located at `raw_dir / raw_repo_id` """,
)
parser.add_argument(
"--raw-format",
type=str,
default=None,
help="""Raw format to use for the raw repo-ids. Must be specified if --raw-repo-ids is not
'lerobot-raw'""",
)
parser.add_argument(
"--local-dir",
type=Path,
default=None,
help="""When provided, writes the dataset converted to LeRobotDataset format in this directory
(e.g. `data/lerobot/aloha_mobile_chair`).""",
)
parser.add_argument(
"--push-repo",
type=str,
default="lerobot",
help="Repo to upload datasets to",
)
parser.add_argument(
"--vcodec",
type=str,
default="libsvtav1",
help="Codec to use for encoding videos",
)
parser.add_argument(
"--pix-fmt",
type=str,
default="yuv420p",
help="Pixel formats (chroma subsampling) to be used for encoding",
)
parser.add_argument(
"--g",
type=int,
default=2,
help="Group of pictures sizes to be used for encoding.",
)
parser.add_argument(
"--crf",
type=int,
default=30,
help="Constant rate factors to be used for encoding.",
)
parser.add_argument(
"--tests-data-dir",
type=Path,
default=None,
help=(
"When provided, save tests artifacts into the given directory "
"(e.g. `--tests-data-dir tests/data` will save to tests/data/{--repo-id})."
),
)
parser.add_argument(
"--dry-run",
type=int,
default=0,
help="If not set to 0, this script won't download or upload anything.",
)
args = parser.parse_args()
encode_datasets(**vars(args))
if __name__ == "__main__":
main()

View File

@@ -29,7 +29,11 @@ from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.push_dataset_to_hub.utils import (
concatenate_episodes,
get_default_encoding,
save_images_concurrently,
)
from lerobot.common.datasets.utils import (
calculate_episode_data_index,
hf_transform_to_torch,
@@ -72,7 +76,14 @@ def check_format(raw_dir) -> bool:
assert c < h and c < w, f"Expect (h,w,c) image format but ({h=},{w=},{c=}) provided."
def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None):
def load_from_raw(
raw_dir: Path,
videos_dir: Path,
fps: int,
video: bool,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
# only frames from simulation are uncompressed
compressed_images = "sim" not in raw_dir.name
@@ -123,7 +134,7 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
# encode images to a mp4 video
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
video_path = videos_dir / fname
encode_video_frames(tmp_imgs_dir, video_path, fps)
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
@@ -200,6 +211,7 @@ def from_raw_to_lerobot_format(
fps: int | None = None,
video: bool = True,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
# sanity check
check_format(raw_dir)
@@ -207,7 +219,7 @@ def from_raw_to_lerobot_format(
if fps is None:
fps = 50
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes)
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding)
hf_dataset = to_hf_dataset(data_dict, video)
episode_data_index = calculate_episode_data_index(hf_dataset)
info = {
@@ -215,4 +227,7 @@ def from_raw_to_lerobot_format(
"fps": fps,
"video": video,
}
if video:
info["encoding"] = get_default_encoding()
return hf_dataset, episode_data_index, info

View File

@@ -81,8 +81,9 @@ def from_raw_to_lerobot_format(
fps: int | None = None,
video: bool = True,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
if video or episodes is not None:
if video or episodes or encoding is not None:
# TODO(aliberts): support this
raise NotImplementedError

View File

@@ -18,6 +18,7 @@ Contains utilities to process raw data format from dora-record
"""
import re
import warnings
from pathlib import Path
import pandas as pd
@@ -199,6 +200,7 @@ def from_raw_to_lerobot_format(
fps: int | None = None,
video: bool = True,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
# sanity check
check_format(raw_dir)
@@ -211,6 +213,12 @@ def from_raw_to_lerobot_format(
if not video:
raise NotImplementedError()
if encoding is not None:
warnings.warn(
"Video encoding is currently done outside of LeRobot for the dora_parquet format.",
stacklevel=1,
)
data_df = load_from_raw(raw_dir, videos_dir, fps, episodes)
hf_dataset = to_hf_dataset(data_df, video)
episode_data_index = calculate_episode_data_index(hf_dataset)
@@ -219,4 +227,7 @@ def from_raw_to_lerobot_format(
"fps": fps,
"video": video,
}
if video:
info["encoding"] = "unknown"
return hf_dataset, episode_data_index, info

View File

@@ -26,7 +26,11 @@ from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.push_dataset_to_hub.utils import (
concatenate_episodes,
get_default_encoding,
save_images_concurrently,
)
from lerobot.common.datasets.utils import (
calculate_episode_data_index,
hf_transform_to_torch,
@@ -62,6 +66,7 @@ def load_from_raw(
video: bool,
episodes: list[int] | None = None,
keypoints_instead_of_image: bool = False,
encoding: dict | None = None,
):
try:
import pymunk
@@ -172,7 +177,7 @@ def load_from_raw(
# encode images to a mp4 video
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
video_path = videos_dir / fname
encode_video_frames(tmp_imgs_dir, video_path, fps)
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
@@ -244,6 +249,7 @@ def from_raw_to_lerobot_format(
fps: int | None = None,
video: bool = True,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
# Manually change this to True to use keypoints of the T instead of an image observation (but don't merge
# with True). Also make sure to use video = 0 in the `push_dataset_to_hub.py` script.
@@ -255,7 +261,7 @@ def from_raw_to_lerobot_format(
if fps is None:
fps = 10
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, keypoints_instead_of_image)
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, keypoints_instead_of_image, encoding)
hf_dataset = to_hf_dataset(data_dict, video, keypoints_instead_of_image)
episode_data_index = calculate_episode_data_index(hf_dataset)
info = {
@@ -263,4 +269,7 @@ def from_raw_to_lerobot_format(
"fps": fps,
"video": video if not keypoints_instead_of_image else 0,
}
if video:
info["encoding"] = get_default_encoding()
return hf_dataset, episode_data_index, info

View File

@@ -27,7 +27,11 @@ from PIL import Image as PILImage
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
from lerobot.common.datasets.push_dataset_to_hub._umi_imagecodecs_numcodecs import register_codecs
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.push_dataset_to_hub.utils import (
concatenate_episodes,
get_default_encoding,
save_images_concurrently,
)
from lerobot.common.datasets.utils import (
calculate_episode_data_index,
hf_transform_to_torch,
@@ -60,7 +64,14 @@ def check_format(raw_dir) -> bool:
assert all(nb_frames == zarr_data[dataset].shape[0] for dataset in required_datasets)
def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None):
def load_from_raw(
raw_dir: Path,
videos_dir: Path,
fps: int,
video: bool,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
zarr_path = raw_dir / "cup_in_the_wild.zarr"
zarr_data = zarr.open(zarr_path, mode="r")
@@ -88,49 +99,61 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
to_ids.append(to_idx)
from_idx = to_idx
ep_dicts_dir = videos_dir / "ep_dicts"
ep_dicts_dir.mkdir(exist_ok=True, parents=True)
ep_dicts = []
ep_ids = episodes if episodes else range(num_episodes)
for ep_idx, selected_ep_idx in tqdm.tqdm(enumerate(ep_ids)):
from_idx = from_ids[selected_ep_idx]
to_idx = to_ids[selected_ep_idx]
num_frames = to_idx - from_idx
ep_dict_path = ep_dicts_dir / f"{ep_idx}"
if not ep_dict_path.is_file():
from_idx = from_ids[selected_ep_idx]
to_idx = to_ids[selected_ep_idx]
num_frames = to_idx - from_idx
# TODO(rcadene): save temporary images of the episode?
# TODO(rcadene): save temporary images of the episode?
state = states[from_idx:to_idx]
state = states[from_idx:to_idx]
ep_dict = {}
ep_dict = {}
# load 57MB of images in RAM (400x224x224x3 uint8)
imgs_array = zarr_data["data/camera0_rgb"][from_idx:to_idx]
img_key = "observation.image"
if video:
# save png images in temporary directory
tmp_imgs_dir = videos_dir / "tmp_images"
save_images_concurrently(imgs_array, tmp_imgs_dir)
# load 57MB of images in RAM (400x224x224x3 uint8)
imgs_array = zarr_data["data/camera0_rgb"][from_idx:to_idx]
img_key = "observation.image"
if video:
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
video_path = videos_dir / fname
if not video_path.is_file():
# save png images in temporary directory
tmp_imgs_dir = videos_dir / "tmp_images"
save_images_concurrently(imgs_array, tmp_imgs_dir)
# encode images to a mp4 video
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
video_path = videos_dir / fname
encode_video_frames(tmp_imgs_dir, video_path, fps)
# encode images to a mp4 video
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
# store the reference to the video frame
ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames)]
# store the reference to the video frame
ep_dict[img_key] = [
{"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames)
]
else:
ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array]
ep_dict["observation.state"] = state
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64)
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
ep_dict["episode_data_index_from"] = torch.tensor([from_idx] * num_frames)
ep_dict["episode_data_index_to"] = torch.tensor([from_idx + num_frames] * num_frames)
ep_dict["end_pose"] = end_pose[from_idx:to_idx]
ep_dict["start_pos"] = start_pos[from_idx:to_idx]
ep_dict["gripper_width"] = gripper_width[from_idx:to_idx]
torch.save(ep_dict, ep_dict_path)
else:
ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array]
ep_dict = torch.load(ep_dict_path)
ep_dict["observation.state"] = state
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64)
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
ep_dict["episode_data_index_from"] = torch.tensor([from_idx] * num_frames)
ep_dict["episode_data_index_to"] = torch.tensor([from_idx + num_frames] * num_frames)
ep_dict["end_pose"] = end_pose[from_idx:to_idx]
ep_dict["start_pos"] = start_pos[from_idx:to_idx]
ep_dict["gripper_width"] = gripper_width[from_idx:to_idx]
ep_dicts.append(ep_dict)
data_dict = concatenate_episodes(ep_dicts)
@@ -183,6 +206,7 @@ def from_raw_to_lerobot_format(
fps: int | None = None,
video: bool = True,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
# sanity check
check_format(raw_dir)
@@ -196,7 +220,7 @@ def from_raw_to_lerobot_format(
"Generating UMI dataset without `video=True` creates ~150GB on disk and requires ~80GB in RAM."
)
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes)
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding)
hf_dataset = to_hf_dataset(data_dict, video)
episode_data_index = calculate_episode_data_index(hf_dataset)
info = {
@@ -204,4 +228,7 @@ def from_raw_to_lerobot_format(
"fps": fps,
"video": video,
}
if video:
info["encoding"] = get_default_encoding()
return hf_dataset, episode_data_index, info

View File

@@ -13,6 +13,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
@@ -20,6 +21,8 @@ import numpy
import PIL
import torch
from lerobot.common.datasets.video_utils import encode_video_frames
def concatenate_episodes(ep_dicts):
data_dict = {}
@@ -51,3 +54,21 @@ def save_images_concurrently(imgs_array: numpy.array, out_dir: Path, max_workers
num_images = len(imgs_array)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
[executor.submit(save_image, imgs_array[i], i, out_dir) for i in range(num_images)]
def get_default_encoding() -> dict:
"""Returns the default ffmpeg encoding parameters used by `encode_video_frames`."""
signature = inspect.signature(encode_video_frames)
return {
k: v.default
for k, v in signature.parameters.items()
if v.default is not inspect.Parameter.empty and k in ["vcodec", "pix_fmt", "g", "crf"]
}
def check_repo_id(repo_id: str) -> None:
if len(repo_id.split("/")) != 2:
raise ValueError(
f"""`repo_id` is expected to contain a community or user id `/` the name of the dataset
(e.g. 'lerobot/pusht'), but contains '{repo_id}'."""
)

View File

@@ -26,7 +26,11 @@ from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.push_dataset_to_hub.utils import (
concatenate_episodes,
get_default_encoding,
save_images_concurrently,
)
from lerobot.common.datasets.utils import (
calculate_episode_data_index,
hf_transform_to_torch,
@@ -56,7 +60,14 @@ def check_format(raw_dir):
assert all(len(nested_dict[subkey]) == expected_len for subkey in subkeys if subkey in nested_dict)
def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None):
def load_from_raw(
raw_dir: Path,
videos_dir: Path,
fps: int,
video: bool,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
pkl_path = raw_dir / "buffer.pkl"
with open(pkl_path, "rb") as f:
@@ -105,7 +116,7 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
# encode images to a mp4 video
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
video_path = videos_dir / fname
encode_video_frames(tmp_imgs_dir, video_path, fps)
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
@@ -167,6 +178,7 @@ def from_raw_to_lerobot_format(
fps: int | None = None,
video: bool = True,
episodes: list[int] | None = None,
encoding: dict | None = None,
):
# sanity check
check_format(raw_dir)
@@ -174,7 +186,7 @@ def from_raw_to_lerobot_format(
if fps is None:
fps = 15
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes)
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding)
hf_dataset = to_hf_dataset(data_dict, video)
episode_data_index = calculate_episode_data_index(hf_dataset)
info = {
@@ -182,4 +194,7 @@ def from_raw_to_lerobot_format(
"fps": fps,
"video": video,
}
if video:
info["encoding"] = get_default_encoding()
return hf_dataset, episode_data_index, info

View File

@@ -166,10 +166,10 @@ def encode_video_frames(
imgs_dir: Path,
video_path: Path,
fps: int,
video_codec: str = "libsvtav1",
pixel_format: str = "yuv420p",
group_of_pictures_size: int | None = 2,
constant_rate_factor: int | None = 30,
vcodec: str = "libsvtav1",
pix_fmt: str = "yuv420p",
g: int | None = 2,
crf: int | None = 30,
fast_decode: int = 0,
log_level: str | None = "error",
overwrite: bool = False,
@@ -183,20 +183,20 @@ def encode_video_frames(
("-f", "image2"),
("-r", str(fps)),
("-i", str(imgs_dir / "frame_%06d.png")),
("-vcodec", video_codec),
("-pix_fmt", pixel_format),
("-vcodec", vcodec),
("-pix_fmt", pix_fmt),
]
)
if group_of_pictures_size is not None:
ffmpeg_args["-g"] = str(group_of_pictures_size)
if g is not None:
ffmpeg_args["-g"] = str(g)
if constant_rate_factor is not None:
ffmpeg_args["-crf"] = str(constant_rate_factor)
if crf is not None:
ffmpeg_args["-crf"] = str(crf)
if fast_decode:
key = "-svtav1-params" if video_codec == "libsvtav1" else "-tune"
value = f"fast-decode={fast_decode}" if video_codec == "libsvtav1" else "fastdecode"
key = "-svtav1-params" if vcodec == "libsvtav1" else "-tune"
value = f"fast-decode={fast_decode}" if vcodec == "libsvtav1" else "fastdecode"
ffmpeg_args[key] = value
if log_level is not None: