forked from tangger/lerobot
Fix datasets missing versions (#318)
This commit is contained in:
@@ -0,0 +1,57 @@
|
||||
## Using / Updating `CODEBASE_VERSION` (for maintainers)
|
||||
|
||||
Since our dataset pushed to the hub are decoupled with the evolution of this repo, we ensure compatibility of
|
||||
the datasets with our code, we use a `CODEBASE_VERSION` (defined in
|
||||
lerobot/common/datasets/lerobot_dataset.py) variable.
|
||||
|
||||
For instance, [`lerobot/pusht`](https://huggingface.co/datasets/lerobot/pusht) has many versions to maintain backward compatibility between LeRobot codebase versions:
|
||||
- [v1.0](https://huggingface.co/datasets/lerobot/pusht/tree/v1.0)
|
||||
- [v1.1](https://huggingface.co/datasets/lerobot/pusht/tree/v1.1)
|
||||
- [v1.2](https://huggingface.co/datasets/lerobot/pusht/tree/v1.2)
|
||||
- [v1.3](https://huggingface.co/datasets/lerobot/pusht/tree/v1.3)
|
||||
- [v1.4](https://huggingface.co/datasets/lerobot/pusht/tree/v1.4)
|
||||
- [v1.5](https://huggingface.co/datasets/lerobot/pusht/tree/v1.5) <-- last version
|
||||
- [main](https://huggingface.co/datasets/lerobot/pusht/tree/main) <-- points to the last version
|
||||
|
||||
Starting with v1.6, every dataset pushed to the hub or saved locally also have this version number in their
|
||||
`info.json` metadata.
|
||||
|
||||
### Uploading a new dataset
|
||||
If you are pushing a new dataset, you don't need to worry about any of the instructions below, nor to be
|
||||
compatible with previous codebase versions. The `push_dataset_to_hub.py` script will automatically tag your
|
||||
dataset with the current `CODEBASE_VERSION`.
|
||||
|
||||
### Updating an existing dataset
|
||||
If you want to update an existing dataset, you need to change the `CODEBASE_VERSION` from `lerobot_dataset.py`
|
||||
before running `push_dataset_to_hub.py`. This is especially useful if you introduce a breaking change
|
||||
intentionally or not (i.e. something not backward compatible such as modifying the reward functions used,
|
||||
deleting some frames at the end of an episode, etc.). That way, people running a previous version of the
|
||||
codebase won't be affected by your change and backward compatibility is maintained.
|
||||
|
||||
However, you will need to update the version of ALL the other datasets so that they have the new
|
||||
`CODEBASE_VERSION` as a branch in their hugging face dataset repository. Don't worry, there is an easy way
|
||||
that doesn't require to run `push_dataset_to_hub.py`. You can just "branch-out" from the `main` branch on HF
|
||||
dataset repo by running this script which corresponds to a `git checkout -b` (so no copy or upload needed):
|
||||
|
||||
```python
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
from lerobot import available_datasets
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
|
||||
api = HfApi()
|
||||
|
||||
for repo_id in available_datasets:
|
||||
dataset_info = api.list_repo_refs(repo_id, repo_type="dataset")
|
||||
branches = [b.name for b in dataset_info.branches]
|
||||
if CODEBASE_VERSION in branches:
|
||||
# First check if the newer version already exists.
|
||||
print(f"Found existing branch for {repo_id}. Please contact a member of the core LeRobot team.")
|
||||
print("Exiting early")
|
||||
break
|
||||
else:
|
||||
# Now create a branch named after the new version by branching out from "main"
|
||||
# which is expected to be the preceding version
|
||||
api.create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION, revision="main")
|
||||
print(f"{repo_id} successfully updated")
|
||||
```
|
||||
@@ -32,46 +32,41 @@ from pathlib import Path
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
AVAILABLE_RAW_REPO_IDS = [
|
||||
"cadene/pusht_image_raw",
|
||||
"cadene/xarm_lift_medium_image_raw",
|
||||
"cadene/xarm_lift_medium_replay_image_raw",
|
||||
"cadene/xarm_push_medium_image_raw",
|
||||
"cadene/xarm_push_medium_replay_image_raw",
|
||||
"cadene/aloha_sim_insertion_human_image_raw",
|
||||
"cadene/aloha_sim_insertion_scripted_image_raw",
|
||||
"cadene/aloha_sim_transfer_cube_human_image_raw",
|
||||
"cadene/aloha_sim_transfer_cube_scripted_image_raw",
|
||||
"cadene/pusht_raw",
|
||||
"cadene/xarm_lift_medium_raw",
|
||||
"cadene/xarm_lift_medium_replay_raw",
|
||||
"cadene/xarm_push_medium_raw",
|
||||
"cadene/xarm_push_medium_replay_raw",
|
||||
"cadene/aloha_sim_insertion_human_raw",
|
||||
"cadene/aloha_sim_insertion_scripted_raw",
|
||||
"cadene/aloha_sim_transfer_cube_human_raw",
|
||||
"cadene/aloha_sim_transfer_cube_scripted_raw",
|
||||
"cadene/aloha_mobile_cabinet_raw",
|
||||
"cadene/aloha_mobile_chair_raw",
|
||||
"cadene/aloha_mobile_elevator_raw",
|
||||
"cadene/aloha_mobile_shrimp_raw",
|
||||
"cadene/aloha_mobile_wash_pan_raw",
|
||||
"cadene/aloha_mobile_wipe_wine_raw",
|
||||
"cadene/aloha_static_battery_raw",
|
||||
"cadene/aloha_static_candy_raw",
|
||||
"cadene/aloha_static_coffee_raw",
|
||||
"cadene/aloha_static_coffee_new_raw",
|
||||
"cadene/aloha_static_cups_open_raw",
|
||||
"cadene/aloha_static_fork_pick_up_raw",
|
||||
"cadene/aloha_static_pingpong_test_raw",
|
||||
"cadene/aloha_static_pro_pencil_raw",
|
||||
"cadene/aloha_static_screw_driver_raw",
|
||||
"cadene/aloha_static_tape_raw",
|
||||
"cadene/aloha_static_thread_velcro_raw",
|
||||
"cadene/aloha_static_towel_raw",
|
||||
"cadene/aloha_static_vinh_cup_raw",
|
||||
"cadene/aloha_static_vinh_cup_left_raw",
|
||||
"cadene/aloha_static_ziploc_slide_raw",
|
||||
"cadene/umi_cup_in_the_wild_raw",
|
||||
"lerobot-raw/aloha_mobile_cabinet_raw",
|
||||
"lerobot-raw/aloha_mobile_chair_raw",
|
||||
"lerobot-raw/aloha_mobile_elevator_raw",
|
||||
"lerobot-raw/aloha_mobile_shrimp_raw",
|
||||
"lerobot-raw/aloha_mobile_wash_pan_raw",
|
||||
"lerobot-raw/aloha_mobile_wipe_wine_raw",
|
||||
"lerobot-raw/aloha_sim_insertion_human_raw",
|
||||
"lerobot-raw/aloha_sim_insertion_scripted_raw",
|
||||
"lerobot-raw/aloha_sim_transfer_cube_human_raw",
|
||||
"lerobot-raw/aloha_sim_transfer_cube_scripted_raw",
|
||||
"lerobot-raw/aloha_static_battery_raw",
|
||||
"lerobot-raw/aloha_static_candy_raw",
|
||||
"lerobot-raw/aloha_static_coffee_new_raw",
|
||||
"lerobot-raw/aloha_static_coffee_raw",
|
||||
"lerobot-raw/aloha_static_cups_open_raw",
|
||||
"lerobot-raw/aloha_static_fork_pick_up_raw",
|
||||
"lerobot-raw/aloha_static_pingpong_test_raw",
|
||||
"lerobot-raw/aloha_static_pro_pencil_raw",
|
||||
"lerobot-raw/aloha_static_screw_driver_raw",
|
||||
"lerobot-raw/aloha_static_tape_raw",
|
||||
"lerobot-raw/aloha_static_thread_velcro_raw",
|
||||
"lerobot-raw/aloha_static_towel_raw",
|
||||
"lerobot-raw/aloha_static_vinh_cup_left_raw",
|
||||
"lerobot-raw/aloha_static_vinh_cup_raw",
|
||||
"lerobot-raw/aloha_static_ziploc_slide_raw",
|
||||
"lerobot-raw/pusht_raw",
|
||||
"lerobot-raw/umi_cup_in_the_wild_raw",
|
||||
"lerobot-raw/unitreeh1_fold_clothes_raw",
|
||||
"lerobot-raw/unitreeh1_rearrange_objects_raw",
|
||||
"lerobot-raw/unitreeh1_two_robot_greeting_raw",
|
||||
"lerobot-raw/unitreeh1_warehouse_raw",
|
||||
"lerobot-raw/xarm_lift_medium_raw",
|
||||
"lerobot-raw/xarm_lift_medium_replay_raw",
|
||||
"lerobot-raw/xarm_push_medium_raw",
|
||||
"lerobot-raw/xarm_push_medium_replay_raw",
|
||||
]
|
||||
|
||||
|
||||
@@ -89,7 +84,6 @@ def download_raw(raw_dir: Path, repo_id: str):
|
||||
stacklevel=1,
|
||||
)
|
||||
|
||||
raw_dir = Path(raw_dir)
|
||||
# Send warning if raw_dir isn't well formated
|
||||
if raw_dir.parts[-2] != user_id or raw_dir.parts[-1] != dataset_id:
|
||||
warnings.warn(
|
||||
@@ -99,7 +93,7 @@ def download_raw(raw_dir: Path, repo_id: str):
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logging.info(f"Start downloading from huggingface.co/{user_id} for {dataset_id}")
|
||||
snapshot_download(f"{repo_id}", repo_type="dataset", local_dir=raw_dir)
|
||||
snapshot_download(repo_id, repo_type="dataset", local_dir=raw_dir)
|
||||
logging.info(f"Finish downloading from huggingface.co/{user_id} for {dataset_id}")
|
||||
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ import tqdm
|
||||
from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
@@ -210,6 +211,7 @@ def from_raw_to_lerobot_format(
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import torch
|
||||
from datasets import Dataset, Features, Image, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes
|
||||
from lerobot.common.datasets.utils import calculate_episode_data_index, hf_transform_to_torch
|
||||
from lerobot.common.datasets.video_utils import VideoFrame
|
||||
@@ -95,6 +96,7 @@ def from_raw_to_lerobot_format(
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ import pandas as pd
|
||||
import torch
|
||||
from datasets import Dataset, Features, Image, Sequence, Value
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
hf_transform_to_torch,
|
||||
@@ -214,6 +215,7 @@ def from_raw_to_lerobot_format(
|
||||
hf_dataset = to_hf_dataset(data_df, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ import zarr
|
||||
from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
@@ -258,6 +259,7 @@ def from_raw_to_lerobot_format(
|
||||
hf_dataset = to_hf_dataset(data_dict, video, keypoints_instead_of_image)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video if not keypoints_instead_of_image else 0,
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ import zarr
|
||||
from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub._umi_imagecodecs_numcodecs import register_codecs
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
|
||||
from lerobot.common.datasets.utils import (
|
||||
@@ -199,6 +200,7 @@ def from_raw_to_lerobot_format(
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ import tqdm
|
||||
from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
@@ -177,6 +178,7 @@ def from_raw_to_lerobot_format(
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user