Convert datasets to av1 encoding (#302)
This commit is contained in:
@@ -35,9 +35,8 @@ from lerobot.common.datasets.utils import (
|
||||
)
|
||||
from lerobot.common.datasets.video_utils import VideoFrame, load_from_videos
|
||||
|
||||
# For maintainers, see lerobot/common/datasets/push_dataset_to_hub/codebase_version.md
|
||||
CODEBASE_VERSION = "v1.5"
|
||||
|
||||
# For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md
|
||||
CODEBASE_VERSION = "v1.6"
|
||||
DATA_DIR = Path(os.environ["DATA_DIR"]) if "DATA_DIR" in os.environ else None
|
||||
|
||||
|
||||
|
||||
@@ -10,7 +10,8 @@ For instance, [`lerobot/pusht`](https://huggingface.co/datasets/lerobot/pusht) h
|
||||
- [v1.2](https://huggingface.co/datasets/lerobot/pusht/tree/v1.2)
|
||||
- [v1.3](https://huggingface.co/datasets/lerobot/pusht/tree/v1.3)
|
||||
- [v1.4](https://huggingface.co/datasets/lerobot/pusht/tree/v1.4)
|
||||
- [v1.5](https://huggingface.co/datasets/lerobot/pusht/tree/v1.5) <-- last version
|
||||
- [v1.5](https://huggingface.co/datasets/lerobot/pusht/tree/v1.5)
|
||||
- [v1.6](https://huggingface.co/datasets/lerobot/pusht/tree/v1.6) <-- last version
|
||||
- [main](https://huggingface.co/datasets/lerobot/pusht/tree/main) <-- points to the last version
|
||||
|
||||
Starting with v1.6, every dataset pushed to the hub or saved locally also have this version number in their
|
||||
@@ -45,13 +46,11 @@ for repo_id in available_datasets:
|
||||
dataset_info = api.list_repo_refs(repo_id, repo_type="dataset")
|
||||
branches = [b.name for b in dataset_info.branches]
|
||||
if CODEBASE_VERSION in branches:
|
||||
# First check if the newer version already exists.
|
||||
print(f"Found existing branch for {repo_id}. Please contact a member of the core LeRobot team.")
|
||||
print("Exiting early")
|
||||
break
|
||||
print(f"{repo_id} already @{CODEBASE_VERSION}, skipping.")
|
||||
continue
|
||||
else:
|
||||
# Now create a branch named after the new version by branching out from "main"
|
||||
# which is expected to be the preceding version
|
||||
api.create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION, revision="main")
|
||||
print(f"{repo_id} successfully updated")
|
||||
print(f"{repo_id} successfully updated @{CODEBASE_VERSION}")
|
||||
```
|
||||
|
||||
@@ -19,8 +19,8 @@ This file contains download scripts for raw datasets.
|
||||
Example of usage:
|
||||
```
|
||||
python lerobot/common/datasets/push_dataset_to_hub/_download_raw.py \
|
||||
--raw-dir data/cadene/pusht_raw \
|
||||
--repo-id cadene/pusht_raw
|
||||
--raw-dir data/lerobot-raw/pusht_raw \
|
||||
--repo-id lerobot-raw/pusht_raw
|
||||
```
|
||||
"""
|
||||
|
||||
@@ -31,63 +31,65 @@ from pathlib import Path
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
AVAILABLE_RAW_REPO_IDS = [
|
||||
"lerobot-raw/aloha_mobile_cabinet_raw",
|
||||
"lerobot-raw/aloha_mobile_chair_raw",
|
||||
"lerobot-raw/aloha_mobile_elevator_raw",
|
||||
"lerobot-raw/aloha_mobile_shrimp_raw",
|
||||
"lerobot-raw/aloha_mobile_wash_pan_raw",
|
||||
"lerobot-raw/aloha_mobile_wipe_wine_raw",
|
||||
"lerobot-raw/aloha_sim_insertion_human_raw",
|
||||
"lerobot-raw/aloha_sim_insertion_scripted_raw",
|
||||
"lerobot-raw/aloha_sim_transfer_cube_human_raw",
|
||||
"lerobot-raw/aloha_sim_transfer_cube_scripted_raw",
|
||||
"lerobot-raw/aloha_static_battery_raw",
|
||||
"lerobot-raw/aloha_static_candy_raw",
|
||||
"lerobot-raw/aloha_static_coffee_new_raw",
|
||||
"lerobot-raw/aloha_static_coffee_raw",
|
||||
"lerobot-raw/aloha_static_cups_open_raw",
|
||||
"lerobot-raw/aloha_static_fork_pick_up_raw",
|
||||
"lerobot-raw/aloha_static_pingpong_test_raw",
|
||||
"lerobot-raw/aloha_static_pro_pencil_raw",
|
||||
"lerobot-raw/aloha_static_screw_driver_raw",
|
||||
"lerobot-raw/aloha_static_tape_raw",
|
||||
"lerobot-raw/aloha_static_thread_velcro_raw",
|
||||
"lerobot-raw/aloha_static_towel_raw",
|
||||
"lerobot-raw/aloha_static_vinh_cup_left_raw",
|
||||
"lerobot-raw/aloha_static_vinh_cup_raw",
|
||||
"lerobot-raw/aloha_static_ziploc_slide_raw",
|
||||
"lerobot-raw/pusht_raw",
|
||||
"lerobot-raw/umi_cup_in_the_wild_raw",
|
||||
"lerobot-raw/unitreeh1_fold_clothes_raw",
|
||||
"lerobot-raw/unitreeh1_rearrange_objects_raw",
|
||||
"lerobot-raw/unitreeh1_two_robot_greeting_raw",
|
||||
"lerobot-raw/unitreeh1_warehouse_raw",
|
||||
"lerobot-raw/xarm_lift_medium_raw",
|
||||
"lerobot-raw/xarm_lift_medium_replay_raw",
|
||||
"lerobot-raw/xarm_push_medium_raw",
|
||||
"lerobot-raw/xarm_push_medium_replay_raw",
|
||||
]
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import check_repo_id
|
||||
|
||||
# {raw_repo_id: raw_format}
|
||||
AVAILABLE_RAW_REPO_IDS = {
|
||||
"lerobot-raw/aloha_mobile_cabinet_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_mobile_chair_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_mobile_elevator_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_mobile_shrimp_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_mobile_wash_pan_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_mobile_wipe_wine_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_sim_insertion_human_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_sim_insertion_scripted_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_sim_transfer_cube_human_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_sim_transfer_cube_scripted_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_battery_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_candy_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_coffee_new_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_coffee_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_cups_open_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_fork_pick_up_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_pingpong_test_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_pro_pencil_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_screw_driver_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_tape_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_thread_velcro_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_towel_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_vinh_cup_left_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_vinh_cup_raw": "aloha_hdf5",
|
||||
"lerobot-raw/aloha_static_ziploc_slide_raw": "aloha_hdf5",
|
||||
"lerobot-raw/pusht_raw": "pusht_zarr",
|
||||
"lerobot-raw/umi_cup_in_the_wild_raw": "umi_zarr",
|
||||
"lerobot-raw/unitreeh1_fold_clothes_raw": "aloha_hdf5",
|
||||
"lerobot-raw/unitreeh1_rearrange_objects_raw": "aloha_hdf5",
|
||||
"lerobot-raw/unitreeh1_two_robot_greeting_raw": "aloha_hdf5",
|
||||
"lerobot-raw/unitreeh1_warehouse_raw": "aloha_hdf5",
|
||||
"lerobot-raw/xarm_lift_medium_raw": "xarm_pkl",
|
||||
"lerobot-raw/xarm_lift_medium_replay_raw": "xarm_pkl",
|
||||
"lerobot-raw/xarm_push_medium_raw": "xarm_pkl",
|
||||
"lerobot-raw/xarm_push_medium_replay_raw": "xarm_pkl",
|
||||
}
|
||||
|
||||
|
||||
def download_raw(raw_dir: Path, repo_id: str):
|
||||
# Check repo_id is well formated
|
||||
if len(repo_id.split("/")) != 2:
|
||||
raise ValueError(
|
||||
f"`repo_id` is expected to contain a community or user id `/` the name of the dataset (e.g. 'lerobot/pusht'), but contains '{repo_id}'."
|
||||
)
|
||||
check_repo_id(repo_id)
|
||||
user_id, dataset_id = repo_id.split("/")
|
||||
|
||||
if not dataset_id.endswith("_raw"):
|
||||
warnings.warn(
|
||||
f"`dataset_id` ({dataset_id}) doesn't end with '_raw' (e.g. 'lerobot/pusht_raw'). Following this naming convention by renaming your repository is advised, but not mandatory.",
|
||||
f"""`dataset_id` ({dataset_id}) doesn't end with '_raw' (e.g. 'lerobot/pusht_raw'). Following this
|
||||
naming convention by renaming your repository is advised, but not mandatory.""",
|
||||
stacklevel=1,
|
||||
)
|
||||
|
||||
# Send warning if raw_dir isn't well formated
|
||||
if raw_dir.parts[-2] != user_id or raw_dir.parts[-1] != dataset_id:
|
||||
warnings.warn(
|
||||
f"`raw_dir` ({raw_dir}) doesn't contain a community or user id `/` the name of the dataset that match the `repo_id` (e.g. 'data/lerobot/pusht_raw'). Following this naming convention is advised, but not mandatory.",
|
||||
f"""`raw_dir` ({raw_dir}) doesn't contain a community or user id `/` the name of the dataset that
|
||||
match the `repo_id` (e.g. 'data/lerobot/pusht_raw'). Following this naming convention is advised,
|
||||
but not mandatory.""",
|
||||
stacklevel=1,
|
||||
)
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -97,8 +99,9 @@ def download_raw(raw_dir: Path, repo_id: str):
|
||||
logging.info(f"Finish downloading from huggingface.co/{user_id} for {dataset_id}")
|
||||
|
||||
|
||||
def download_all_raw_datasets():
|
||||
data_dir = Path("data")
|
||||
def download_all_raw_datasets(data_dir: Path | None = None):
|
||||
if data_dir is None:
|
||||
data_dir = Path("data")
|
||||
for repo_id in AVAILABLE_RAW_REPO_IDS:
|
||||
raw_dir = data_dir / repo_id
|
||||
download_raw(raw_dir, repo_id)
|
||||
@@ -106,7 +109,8 @@ def download_all_raw_datasets():
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description=f"A script to download raw datasets from Hugging Face hub to a local directory. Here is a non exhaustive list of available repositories to use in `--repo-id`: {AVAILABLE_RAW_REPO_IDS}",
|
||||
description=f"""A script to download raw datasets from Hugging Face hub to a local directory. Here is a
|
||||
non exhaustive list of available repositories to use in `--repo-id`: {AVAILABLE_RAW_REPO_IDS}""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -119,7 +123,8 @@ def main():
|
||||
"--repo-id",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset (e.g. `lerobot/pusht_raw`, `cadene/aloha_sim_insertion_human_raw`).",
|
||||
help="""Repositery identifier on Hugging Face: a community or a user name `/` the name of
|
||||
the dataset (e.g. `lerobot/pusht_raw`, `cadene/aloha_sim_insertion_human_raw`).""",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
download_raw(**vars(args))
|
||||
|
||||
184
lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py
Normal file
184
lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py
Normal file
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Use this script to batch encode lerobot dataset from their raw format to LeRobotDataset and push their updated
|
||||
version to the hub. Under the hood, this script reuses 'push_dataset_to_hub.py'. It assumes that you already
|
||||
downloaded raw datasets, which you can do with the related '_download_raw.py' script.
|
||||
|
||||
For instance, for codebase_version = 'v1.6', the following command was run, assuming raw datasets from
|
||||
lerobot-raw were downloaded in 'raw/datasets/directory':
|
||||
```bash
|
||||
python lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py \
|
||||
--raw-dir raw/datasets/directory \
|
||||
--raw-repo-ids lerobot-raw \
|
||||
--local-dir push/datasets/directory \
|
||||
--tests-data-dir tests/data \
|
||||
--push-repo lerobot \
|
||||
--vcodec libsvtav1 \
|
||||
--pix-fmt yuv420p \
|
||||
--g 2 \
|
||||
--crf 30
|
||||
```
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub._download_raw import AVAILABLE_RAW_REPO_IDS
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import check_repo_id
|
||||
from lerobot.scripts.push_dataset_to_hub import push_dataset_to_hub
|
||||
|
||||
|
||||
def get_push_repo_id_from_raw(raw_repo_id: str, push_repo: str) -> str:
|
||||
dataset_id_raw = raw_repo_id.split("/")[1]
|
||||
dataset_id = dataset_id_raw.removesuffix("_raw")
|
||||
return f"{push_repo}/{dataset_id}"
|
||||
|
||||
|
||||
def encode_datasets(
|
||||
raw_dir: Path,
|
||||
raw_repo_ids: list[str],
|
||||
push_repo: str,
|
||||
vcodec: str,
|
||||
pix_fmt: str,
|
||||
g: int,
|
||||
crf: int,
|
||||
local_dir: Path | None = None,
|
||||
tests_data_dir: Path | None = None,
|
||||
raw_format: str | None = None,
|
||||
dry_run: bool = False,
|
||||
) -> None:
|
||||
if len(raw_repo_ids) == 1 and raw_repo_ids[0].lower() == "lerobot-raw":
|
||||
raw_repo_ids_format = AVAILABLE_RAW_REPO_IDS
|
||||
else:
|
||||
if raw_format is None:
|
||||
raise ValueError(raw_format)
|
||||
raw_repo_ids_format = {id_: raw_format for id_ in raw_repo_ids}
|
||||
|
||||
for raw_repo_id, repo_raw_format in raw_repo_ids_format.items():
|
||||
check_repo_id(raw_repo_id)
|
||||
dataset_repo_id_push = get_push_repo_id_from_raw(raw_repo_id, push_repo)
|
||||
dataset_raw_dir = raw_dir / raw_repo_id
|
||||
dataset_dir = local_dir / dataset_repo_id_push if local_dir is not None else None
|
||||
encoding = {
|
||||
"vcodec": vcodec,
|
||||
"pix_fmt": pix_fmt,
|
||||
"g": g,
|
||||
"crf": crf,
|
||||
}
|
||||
|
||||
if not (dataset_raw_dir).is_dir():
|
||||
raise NotADirectoryError(dataset_raw_dir)
|
||||
|
||||
if not dry_run:
|
||||
push_dataset_to_hub(
|
||||
dataset_raw_dir,
|
||||
raw_format=repo_raw_format,
|
||||
repo_id=dataset_repo_id_push,
|
||||
local_dir=dataset_dir,
|
||||
resume=True,
|
||||
encoding=encoding,
|
||||
tests_data_dir=tests_data_dir,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"DRY RUN: {dataset_raw_dir} --> {dataset_dir} --> {dataset_repo_id_push}@{CODEBASE_VERSION}"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--raw-dir",
|
||||
type=Path,
|
||||
default=Path("data"),
|
||||
help="Directory where raw datasets are located.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--raw-repo-ids",
|
||||
type=str,
|
||||
nargs="*",
|
||||
default=["lerobot-raw"],
|
||||
help="""Raw dataset repo ids. if 'lerobot-raw', the keys from `AVAILABLE_RAW_REPO_IDS` will be
|
||||
used and raw datasets will be fetched from the 'lerobot-raw/' repo and pushed with their
|
||||
associated format. It is assumed that each dataset is located at `raw_dir / raw_repo_id` """,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--raw-format",
|
||||
type=str,
|
||||
default=None,
|
||||
help="""Raw format to use for the raw repo-ids. Must be specified if --raw-repo-ids is not
|
||||
'lerobot-raw'""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--local-dir",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="""When provided, writes the dataset converted to LeRobotDataset format in this directory
|
||||
(e.g. `data/lerobot/aloha_mobile_chair`).""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--push-repo",
|
||||
type=str,
|
||||
default="lerobot",
|
||||
help="Repo to upload datasets to",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--vcodec",
|
||||
type=str,
|
||||
default="libsvtav1",
|
||||
help="Codec to use for encoding videos",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pix-fmt",
|
||||
type=str,
|
||||
default="yuv420p",
|
||||
help="Pixel formats (chroma subsampling) to be used for encoding",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--g",
|
||||
type=int,
|
||||
default=2,
|
||||
help="Group of pictures sizes to be used for encoding.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--crf",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Constant rate factors to be used for encoding.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tests-data-dir",
|
||||
type=Path,
|
||||
default=None,
|
||||
help=(
|
||||
"When provided, save tests artifacts into the given directory "
|
||||
"(e.g. `--tests-data-dir tests/data` will save to tests/data/{--repo-id})."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
type=int,
|
||||
default=0,
|
||||
help="If not set to 0, this script won't download or upload anything.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
encode_datasets(**vars(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -29,7 +29,11 @@ from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import (
|
||||
concatenate_episodes,
|
||||
get_default_encoding,
|
||||
save_images_concurrently,
|
||||
)
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
hf_transform_to_torch,
|
||||
@@ -72,7 +76,14 @@ def check_format(raw_dir) -> bool:
|
||||
assert c < h and c < w, f"Expect (h,w,c) image format but ({h=},{w=},{c=}) provided."
|
||||
|
||||
|
||||
def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None):
|
||||
def load_from_raw(
|
||||
raw_dir: Path,
|
||||
videos_dir: Path,
|
||||
fps: int,
|
||||
video: bool,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
# only frames from simulation are uncompressed
|
||||
compressed_images = "sim" not in raw_dir.name
|
||||
|
||||
@@ -123,7 +134,7 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
|
||||
# encode images to a mp4 video
|
||||
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
|
||||
video_path = videos_dir / fname
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps)
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
|
||||
|
||||
# clean temporary images directory
|
||||
shutil.rmtree(tmp_imgs_dir)
|
||||
@@ -200,6 +211,7 @@ def from_raw_to_lerobot_format(
|
||||
fps: int | None = None,
|
||||
video: bool = True,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
# sanity check
|
||||
check_format(raw_dir)
|
||||
@@ -207,7 +219,7 @@ def from_raw_to_lerobot_format(
|
||||
if fps is None:
|
||||
fps = 50
|
||||
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes)
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding)
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
@@ -215,4 +227,7 @@ def from_raw_to_lerobot_format(
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
if video:
|
||||
info["encoding"] = get_default_encoding()
|
||||
|
||||
return hf_dataset, episode_data_index, info
|
||||
|
||||
@@ -81,8 +81,9 @@ def from_raw_to_lerobot_format(
|
||||
fps: int | None = None,
|
||||
video: bool = True,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
if video or episodes is not None:
|
||||
if video or episodes or encoding is not None:
|
||||
# TODO(aliberts): support this
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ Contains utilities to process raw data format from dora-record
|
||||
"""
|
||||
|
||||
import re
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
@@ -199,6 +200,7 @@ def from_raw_to_lerobot_format(
|
||||
fps: int | None = None,
|
||||
video: bool = True,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
# sanity check
|
||||
check_format(raw_dir)
|
||||
@@ -211,6 +213,12 @@ def from_raw_to_lerobot_format(
|
||||
if not video:
|
||||
raise NotImplementedError()
|
||||
|
||||
if encoding is not None:
|
||||
warnings.warn(
|
||||
"Video encoding is currently done outside of LeRobot for the dora_parquet format.",
|
||||
stacklevel=1,
|
||||
)
|
||||
|
||||
data_df = load_from_raw(raw_dir, videos_dir, fps, episodes)
|
||||
hf_dataset = to_hf_dataset(data_df, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
@@ -219,4 +227,7 @@ def from_raw_to_lerobot_format(
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
if video:
|
||||
info["encoding"] = "unknown"
|
||||
|
||||
return hf_dataset, episode_data_index, info
|
||||
|
||||
@@ -26,7 +26,11 @@ from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import (
|
||||
concatenate_episodes,
|
||||
get_default_encoding,
|
||||
save_images_concurrently,
|
||||
)
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
hf_transform_to_torch,
|
||||
@@ -62,6 +66,7 @@ def load_from_raw(
|
||||
video: bool,
|
||||
episodes: list[int] | None = None,
|
||||
keypoints_instead_of_image: bool = False,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
try:
|
||||
import pymunk
|
||||
@@ -172,7 +177,7 @@ def load_from_raw(
|
||||
# encode images to a mp4 video
|
||||
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
|
||||
video_path = videos_dir / fname
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps)
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
|
||||
|
||||
# clean temporary images directory
|
||||
shutil.rmtree(tmp_imgs_dir)
|
||||
@@ -244,6 +249,7 @@ def from_raw_to_lerobot_format(
|
||||
fps: int | None = None,
|
||||
video: bool = True,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
# Manually change this to True to use keypoints of the T instead of an image observation (but don't merge
|
||||
# with True). Also make sure to use video = 0 in the `push_dataset_to_hub.py` script.
|
||||
@@ -255,7 +261,7 @@ def from_raw_to_lerobot_format(
|
||||
if fps is None:
|
||||
fps = 10
|
||||
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, keypoints_instead_of_image)
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, keypoints_instead_of_image, encoding)
|
||||
hf_dataset = to_hf_dataset(data_dict, video, keypoints_instead_of_image)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
@@ -263,4 +269,7 @@ def from_raw_to_lerobot_format(
|
||||
"fps": fps,
|
||||
"video": video if not keypoints_instead_of_image else 0,
|
||||
}
|
||||
if video:
|
||||
info["encoding"] = get_default_encoding()
|
||||
|
||||
return hf_dataset, episode_data_index, info
|
||||
|
||||
@@ -27,7 +27,11 @@ from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub._umi_imagecodecs_numcodecs import register_codecs
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import (
|
||||
concatenate_episodes,
|
||||
get_default_encoding,
|
||||
save_images_concurrently,
|
||||
)
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
hf_transform_to_torch,
|
||||
@@ -60,7 +64,14 @@ def check_format(raw_dir) -> bool:
|
||||
assert all(nb_frames == zarr_data[dataset].shape[0] for dataset in required_datasets)
|
||||
|
||||
|
||||
def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None):
|
||||
def load_from_raw(
|
||||
raw_dir: Path,
|
||||
videos_dir: Path,
|
||||
fps: int,
|
||||
video: bool,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
zarr_path = raw_dir / "cup_in_the_wild.zarr"
|
||||
zarr_data = zarr.open(zarr_path, mode="r")
|
||||
|
||||
@@ -88,49 +99,61 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
|
||||
to_ids.append(to_idx)
|
||||
from_idx = to_idx
|
||||
|
||||
ep_dicts_dir = videos_dir / "ep_dicts"
|
||||
ep_dicts_dir.mkdir(exist_ok=True, parents=True)
|
||||
ep_dicts = []
|
||||
|
||||
ep_ids = episodes if episodes else range(num_episodes)
|
||||
for ep_idx, selected_ep_idx in tqdm.tqdm(enumerate(ep_ids)):
|
||||
from_idx = from_ids[selected_ep_idx]
|
||||
to_idx = to_ids[selected_ep_idx]
|
||||
num_frames = to_idx - from_idx
|
||||
ep_dict_path = ep_dicts_dir / f"{ep_idx}"
|
||||
if not ep_dict_path.is_file():
|
||||
from_idx = from_ids[selected_ep_idx]
|
||||
to_idx = to_ids[selected_ep_idx]
|
||||
num_frames = to_idx - from_idx
|
||||
|
||||
# TODO(rcadene): save temporary images of the episode?
|
||||
# TODO(rcadene): save temporary images of the episode?
|
||||
|
||||
state = states[from_idx:to_idx]
|
||||
state = states[from_idx:to_idx]
|
||||
|
||||
ep_dict = {}
|
||||
ep_dict = {}
|
||||
|
||||
# load 57MB of images in RAM (400x224x224x3 uint8)
|
||||
imgs_array = zarr_data["data/camera0_rgb"][from_idx:to_idx]
|
||||
img_key = "observation.image"
|
||||
if video:
|
||||
# save png images in temporary directory
|
||||
tmp_imgs_dir = videos_dir / "tmp_images"
|
||||
save_images_concurrently(imgs_array, tmp_imgs_dir)
|
||||
# load 57MB of images in RAM (400x224x224x3 uint8)
|
||||
imgs_array = zarr_data["data/camera0_rgb"][from_idx:to_idx]
|
||||
img_key = "observation.image"
|
||||
if video:
|
||||
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
|
||||
video_path = videos_dir / fname
|
||||
if not video_path.is_file():
|
||||
# save png images in temporary directory
|
||||
tmp_imgs_dir = videos_dir / "tmp_images"
|
||||
save_images_concurrently(imgs_array, tmp_imgs_dir)
|
||||
|
||||
# encode images to a mp4 video
|
||||
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
|
||||
video_path = videos_dir / fname
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps)
|
||||
# encode images to a mp4 video
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
|
||||
|
||||
# clean temporary images directory
|
||||
shutil.rmtree(tmp_imgs_dir)
|
||||
# clean temporary images directory
|
||||
shutil.rmtree(tmp_imgs_dir)
|
||||
|
||||
# store the reference to the video frame
|
||||
ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames)]
|
||||
# store the reference to the video frame
|
||||
ep_dict[img_key] = [
|
||||
{"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames)
|
||||
]
|
||||
else:
|
||||
ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array]
|
||||
|
||||
ep_dict["observation.state"] = state
|
||||
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64)
|
||||
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
|
||||
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
|
||||
ep_dict["episode_data_index_from"] = torch.tensor([from_idx] * num_frames)
|
||||
ep_dict["episode_data_index_to"] = torch.tensor([from_idx + num_frames] * num_frames)
|
||||
ep_dict["end_pose"] = end_pose[from_idx:to_idx]
|
||||
ep_dict["start_pos"] = start_pos[from_idx:to_idx]
|
||||
ep_dict["gripper_width"] = gripper_width[from_idx:to_idx]
|
||||
torch.save(ep_dict, ep_dict_path)
|
||||
else:
|
||||
ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array]
|
||||
ep_dict = torch.load(ep_dict_path)
|
||||
|
||||
ep_dict["observation.state"] = state
|
||||
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64)
|
||||
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
|
||||
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
|
||||
ep_dict["episode_data_index_from"] = torch.tensor([from_idx] * num_frames)
|
||||
ep_dict["episode_data_index_to"] = torch.tensor([from_idx + num_frames] * num_frames)
|
||||
ep_dict["end_pose"] = end_pose[from_idx:to_idx]
|
||||
ep_dict["start_pos"] = start_pos[from_idx:to_idx]
|
||||
ep_dict["gripper_width"] = gripper_width[from_idx:to_idx]
|
||||
ep_dicts.append(ep_dict)
|
||||
|
||||
data_dict = concatenate_episodes(ep_dicts)
|
||||
@@ -183,6 +206,7 @@ def from_raw_to_lerobot_format(
|
||||
fps: int | None = None,
|
||||
video: bool = True,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
# sanity check
|
||||
check_format(raw_dir)
|
||||
@@ -196,7 +220,7 @@ def from_raw_to_lerobot_format(
|
||||
"Generating UMI dataset without `video=True` creates ~150GB on disk and requires ~80GB in RAM."
|
||||
)
|
||||
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes)
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding)
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
@@ -204,4 +228,7 @@ def from_raw_to_lerobot_format(
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
if video:
|
||||
info["encoding"] = get_default_encoding()
|
||||
|
||||
return hf_dataset, episode_data_index, info
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import inspect
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
|
||||
@@ -20,6 +21,8 @@ import numpy
|
||||
import PIL
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.video_utils import encode_video_frames
|
||||
|
||||
|
||||
def concatenate_episodes(ep_dicts):
|
||||
data_dict = {}
|
||||
@@ -51,3 +54,21 @@ def save_images_concurrently(imgs_array: numpy.array, out_dir: Path, max_workers
|
||||
num_images = len(imgs_array)
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
[executor.submit(save_image, imgs_array[i], i, out_dir) for i in range(num_images)]
|
||||
|
||||
|
||||
def get_default_encoding() -> dict:
|
||||
"""Returns the default ffmpeg encoding parameters used by `encode_video_frames`."""
|
||||
signature = inspect.signature(encode_video_frames)
|
||||
return {
|
||||
k: v.default
|
||||
for k, v in signature.parameters.items()
|
||||
if v.default is not inspect.Parameter.empty and k in ["vcodec", "pix_fmt", "g", "crf"]
|
||||
}
|
||||
|
||||
|
||||
def check_repo_id(repo_id: str) -> None:
|
||||
if len(repo_id.split("/")) != 2:
|
||||
raise ValueError(
|
||||
f"""`repo_id` is expected to contain a community or user id `/` the name of the dataset
|
||||
(e.g. 'lerobot/pusht'), but contains '{repo_id}'."""
|
||||
)
|
||||
|
||||
@@ -26,7 +26,11 @@ from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import (
|
||||
concatenate_episodes,
|
||||
get_default_encoding,
|
||||
save_images_concurrently,
|
||||
)
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
hf_transform_to_torch,
|
||||
@@ -56,7 +60,14 @@ def check_format(raw_dir):
|
||||
assert all(len(nested_dict[subkey]) == expected_len for subkey in subkeys if subkey in nested_dict)
|
||||
|
||||
|
||||
def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None):
|
||||
def load_from_raw(
|
||||
raw_dir: Path,
|
||||
videos_dir: Path,
|
||||
fps: int,
|
||||
video: bool,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
pkl_path = raw_dir / "buffer.pkl"
|
||||
|
||||
with open(pkl_path, "rb") as f:
|
||||
@@ -105,7 +116,7 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod
|
||||
# encode images to a mp4 video
|
||||
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
|
||||
video_path = videos_dir / fname
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps)
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
|
||||
|
||||
# clean temporary images directory
|
||||
shutil.rmtree(tmp_imgs_dir)
|
||||
@@ -167,6 +178,7 @@ def from_raw_to_lerobot_format(
|
||||
fps: int | None = None,
|
||||
video: bool = True,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
):
|
||||
# sanity check
|
||||
check_format(raw_dir)
|
||||
@@ -174,7 +186,7 @@ def from_raw_to_lerobot_format(
|
||||
if fps is None:
|
||||
fps = 15
|
||||
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes)
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding)
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
@@ -182,4 +194,7 @@ def from_raw_to_lerobot_format(
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
if video:
|
||||
info["encoding"] = get_default_encoding()
|
||||
|
||||
return hf_dataset, episode_data_index, info
|
||||
|
||||
@@ -166,10 +166,10 @@ def encode_video_frames(
|
||||
imgs_dir: Path,
|
||||
video_path: Path,
|
||||
fps: int,
|
||||
video_codec: str = "libsvtav1",
|
||||
pixel_format: str = "yuv420p",
|
||||
group_of_pictures_size: int | None = 2,
|
||||
constant_rate_factor: int | None = 30,
|
||||
vcodec: str = "libsvtav1",
|
||||
pix_fmt: str = "yuv420p",
|
||||
g: int | None = 2,
|
||||
crf: int | None = 30,
|
||||
fast_decode: int = 0,
|
||||
log_level: str | None = "error",
|
||||
overwrite: bool = False,
|
||||
@@ -183,20 +183,20 @@ def encode_video_frames(
|
||||
("-f", "image2"),
|
||||
("-r", str(fps)),
|
||||
("-i", str(imgs_dir / "frame_%06d.png")),
|
||||
("-vcodec", video_codec),
|
||||
("-pix_fmt", pixel_format),
|
||||
("-vcodec", vcodec),
|
||||
("-pix_fmt", pix_fmt),
|
||||
]
|
||||
)
|
||||
|
||||
if group_of_pictures_size is not None:
|
||||
ffmpeg_args["-g"] = str(group_of_pictures_size)
|
||||
if g is not None:
|
||||
ffmpeg_args["-g"] = str(g)
|
||||
|
||||
if constant_rate_factor is not None:
|
||||
ffmpeg_args["-crf"] = str(constant_rate_factor)
|
||||
if crf is not None:
|
||||
ffmpeg_args["-crf"] = str(crf)
|
||||
|
||||
if fast_decode:
|
||||
key = "-svtav1-params" if video_codec == "libsvtav1" else "-tune"
|
||||
value = f"fast-decode={fast_decode}" if video_codec == "libsvtav1" else "fastdecode"
|
||||
key = "-svtav1-params" if vcodec == "libsvtav1" else "-tune"
|
||||
value = f"fast-decode={fast_decode}" if vcodec == "libsvtav1" else "fastdecode"
|
||||
ffmpeg_args[key] = value
|
||||
|
||||
if log_level is not None:
|
||||
|
||||
Reference in New Issue
Block a user