optimize shard

This commit is contained in:
Remi Cadene
2025-02-22 10:13:09 +00:00
parent eda0b996cd
commit 689c5efc72
2 changed files with 15 additions and 6 deletions

View File

@@ -205,7 +205,16 @@ def create_lerobot_dataset(
builder = tfds.builder(dataset_name, data_dir=data_dir, version=version)
features = generate_features_from_raw(dataset_name, builder, use_videos)
raw_dataset = builder.as_dataset(split="train")
if num_shards is not None:
if num_shards != builder.info.splits["train"].num_shards:
raise ValueError()
if shard_index >= builder.info.splits["train"].num_shards:
raise ValueError()
raw_dataset = builder.as_dataset(split=f"train[{shard_index}shard]")
else:
raw_dataset = builder.as_dataset(split="train")
if fps is None:
if dataset_name in OXE_DATASET_CONFIGS:
@@ -234,8 +243,6 @@ def create_lerobot_dataset(
dataset_name,
lerobot_dataset,
raw_dataset,
num_shards=num_shards,
shard_index=shard_index,
)
if push_to_hub: