forked from tangger/lerobot
Fix datasets missing versions (#318)
This commit is contained in:
@@ -475,6 +475,7 @@ def record_dataset(
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
|
||||
@@ -40,60 +40,6 @@ python lerobot/scripts/push_dataset_to_hub.py \
|
||||
--raw-format umi_zarr \
|
||||
--repo-id lerobot/umi_cup_in_the_wild
|
||||
```
|
||||
|
||||
**WARNING: Updating an existing dataset**
|
||||
|
||||
If you want to update an existing dataset, you need to change the `CODEBASE_VERSION` from `lerobot_dataset.py`
|
||||
before running `push_dataset_to_hub.py`. This is especially useful if you introduce a breaking change
|
||||
intentionally or not (i.e. something not backward compatible such as modifying the reward functions used,
|
||||
deleting some frames at the end of an episode, etc.). That way, people running a previous version of the
|
||||
codebase won't be affected by your change and backward compatibility is maintained.
|
||||
|
||||
For instance, Pusht has many versions to maintain backward compatibility between LeRobot codebase versions:
|
||||
- [v1.0](https://huggingface.co/datasets/lerobot/pusht/tree/v1.0)
|
||||
- [v1.1](https://huggingface.co/datasets/lerobot/pusht/tree/v1.1)
|
||||
- [v1.2](https://huggingface.co/datasets/lerobot/pusht/tree/v1.2)
|
||||
- [v1.3](https://huggingface.co/datasets/lerobot/pusht/tree/v1.3)
|
||||
- [v1.4](https://huggingface.co/datasets/lerobot/pusht/tree/v1.4)
|
||||
- [v1.5](https://huggingface.co/datasets/lerobot/pusht/tree/v1.5) <-- last version
|
||||
- [main](https://huggingface.co/datasets/lerobot/pusht/tree/main) <-- points to the last version
|
||||
|
||||
However, you will need to update the version of ALL the other datasets so that they have the new
|
||||
`CODEBASE_VERSION` as a branch in their hugging face dataset repository. Don't worry, there is an easy way
|
||||
that doesn't require to run `push_dataset_to_hub.py`. You can just "branch-out" from the `main` branch on HF
|
||||
dataset repo by running this script which corresponds to a `git checkout -b` (so no copy or upload needed):
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
from huggingface_hub import create_branch, hf_hub_download
|
||||
from huggingface_hub.utils._errors import RepositoryNotFoundError
|
||||
|
||||
from lerobot import available_datasets
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
|
||||
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" # makes it easier to see the print-out below
|
||||
|
||||
NEW_CODEBASE_VERSION = "v1.5" # REPLACE THIS WITH YOUR DESIRED VERSION
|
||||
|
||||
for repo_id in available_datasets:
|
||||
# First check if the newer version already exists.
|
||||
try:
|
||||
hf_hub_download(
|
||||
repo_id=repo_id, repo_type="dataset", filename=".gitattributes", revision=NEW_CODEBASE_VERSION
|
||||
)
|
||||
print(f"Found existing branch for {repo_id}. Please contact a member of the core LeRobot team.")
|
||||
print("Exiting early")
|
||||
break
|
||||
except RepositoryNotFoundError:
|
||||
# Now create a branch.
|
||||
create_branch(repo_id, repo_type="dataset", branch=NEW_CODEBASE_VERSION, revision=CODEBASE_VERSION)
|
||||
print(f"{repo_id} successfully updated")
|
||||
|
||||
```
|
||||
|
||||
On the other hand, if you are pushing a new dataset, you don't need to worry about any of the instructions
|
||||
above, nor to be compatible with previous codebase versions.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -104,7 +50,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import torch
|
||||
from huggingface_hub import HfApi, create_branch
|
||||
from huggingface_hub import HfApi
|
||||
from safetensors.torch import save_file
|
||||
|
||||
from lerobot.common.datasets.compute_stats import compute_stats
|
||||
@@ -270,7 +216,8 @@ def push_dataset_to_hub(
|
||||
push_meta_data_to_hub(repo_id, meta_data_dir, revision="main")
|
||||
if video:
|
||||
push_videos_to_hub(repo_id, videos_dir, revision="main")
|
||||
create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
|
||||
api = HfApi()
|
||||
api.create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
|
||||
|
||||
if tests_data_dir:
|
||||
# get the first episode
|
||||
|
||||
Reference in New Issue
Block a user