Add missing finalize calls in example (#2175)
- add missing calls to dataset.finalize in the example recording scripts - add section in the dataset docs on calling dataset.finalize
This commit is contained in:
@@ -279,3 +279,36 @@ python -m lerobot.datasets.v30.convert_dataset_v21_to_v30 --repo-id=<HF_USER/DAT
|
|||||||
- Aggregates parquet files: `episode-0000.parquet`, `episode-0001.parquet`, … → **`file-0000.parquet`**, …
|
- Aggregates parquet files: `episode-0000.parquet`, `episode-0001.parquet`, … → **`file-0000.parquet`**, …
|
||||||
- Aggregates mp4 files: `episode-0000.mp4`, `episode-0001.mp4`, … → **`file-0000.mp4`**, …
|
- Aggregates mp4 files: `episode-0000.mp4`, `episode-0001.mp4`, … → **`file-0000.mp4`**, …
|
||||||
- Updates `meta/episodes/*` (chunked Parquet) with per‑episode lengths, tasks, and byte/frame offsets.
|
- Updates `meta/episodes/*` (chunked Parquet) with per‑episode lengths, tasks, and byte/frame offsets.
|
||||||
|
|
||||||
|
## Common Issues
|
||||||
|
|
||||||
|
### Always call `finalize()` before pushing
|
||||||
|
|
||||||
|
When creating or recording datasets, you **must** call `dataset.finalize()` to properly close parquet writers. See the [PR #1903](https://github.com/huggingface/lerobot/pull/1903) for more details.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
|
|
||||||
|
# Create dataset and record episodes
|
||||||
|
dataset = LeRobotDataset.create(...)
|
||||||
|
|
||||||
|
for episode in range(num_episodes):
|
||||||
|
# Record frames
|
||||||
|
for frame in episode_data:
|
||||||
|
dataset.add_frame(frame)
|
||||||
|
dataset.save_episode()
|
||||||
|
|
||||||
|
# Call finalize() when done recording and before push_to_hub()
|
||||||
|
dataset.finalize() # Closes parquet writers, writes metadata footers
|
||||||
|
dataset.push_to_hub()
|
||||||
|
```
|
||||||
|
|
||||||
|
**Why is this necessary?**
|
||||||
|
|
||||||
|
Dataset v3.0 uses incremental parquet writing with buffered metadata for efficiency. The `finalize()` method:
|
||||||
|
|
||||||
|
- Flushes any buffered episode metadata to disk
|
||||||
|
- Closes parquet writers to write footer metadata, otherwise the parquet files will be corrupt
|
||||||
|
- Ensures the dataset is valid for loading
|
||||||
|
|
||||||
|
Without calling `finalize()`, your parquet files will be incomplete and the dataset won't load properly.
|
||||||
|
|||||||
@@ -133,4 +133,6 @@ while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
|
|||||||
log_say("Stop recording")
|
log_say("Stop recording")
|
||||||
robot.disconnect()
|
robot.disconnect()
|
||||||
listener.stop()
|
listener.stop()
|
||||||
|
|
||||||
|
dataset.finalize()
|
||||||
dataset.push_to_hub()
|
dataset.push_to_hub()
|
||||||
|
|||||||
@@ -130,4 +130,6 @@ robot.disconnect()
|
|||||||
leader_arm.disconnect()
|
leader_arm.disconnect()
|
||||||
keyboard.disconnect()
|
keyboard.disconnect()
|
||||||
listener.stop()
|
listener.stop()
|
||||||
|
|
||||||
|
dataset.finalize()
|
||||||
dataset.push_to_hub()
|
dataset.push_to_hub()
|
||||||
|
|||||||
@@ -194,4 +194,6 @@ for episode_idx in range(NUM_EPISODES):
|
|||||||
log_say("Stop recording")
|
log_say("Stop recording")
|
||||||
robot.disconnect()
|
robot.disconnect()
|
||||||
listener.stop()
|
listener.stop()
|
||||||
|
|
||||||
|
dataset.finalize()
|
||||||
dataset.push_to_hub()
|
dataset.push_to_hub()
|
||||||
|
|||||||
@@ -200,4 +200,6 @@ log_say("Stop recording")
|
|||||||
robot.disconnect()
|
robot.disconnect()
|
||||||
phone.disconnect()
|
phone.disconnect()
|
||||||
listener.stop()
|
listener.stop()
|
||||||
|
|
||||||
|
dataset.finalize()
|
||||||
dataset.push_to_hub()
|
dataset.push_to_hub()
|
||||||
|
|||||||
@@ -362,6 +362,8 @@ def port_droid(
|
|||||||
lerobot_dataset.save_episode()
|
lerobot_dataset.save_episode()
|
||||||
logging.info("Save_episode")
|
logging.info("Save_episode")
|
||||||
|
|
||||||
|
lerobot_dataset.finalize()
|
||||||
|
|
||||||
if push_to_hub:
|
if push_to_hub:
|
||||||
lerobot_dataset.push_to_hub(
|
lerobot_dataset.push_to_hub(
|
||||||
# Add openx tag, since it belongs to the openx collection of datasets
|
# Add openx tag, since it belongs to the openx collection of datasets
|
||||||
|
|||||||
@@ -195,4 +195,6 @@ for episode_idx in range(NUM_EPISODES):
|
|||||||
log_say("Stop recording")
|
log_say("Stop recording")
|
||||||
robot.disconnect()
|
robot.disconnect()
|
||||||
listener.stop()
|
listener.stop()
|
||||||
|
|
||||||
|
dataset.finalize()
|
||||||
dataset.push_to_hub()
|
dataset.push_to_hub()
|
||||||
|
|||||||
@@ -199,4 +199,6 @@ log_say("Stop recording")
|
|||||||
leader.disconnect()
|
leader.disconnect()
|
||||||
follower.disconnect()
|
follower.disconnect()
|
||||||
listener.stop()
|
listener.stop()
|
||||||
|
|
||||||
|
dataset.finalize()
|
||||||
dataset.push_to_hub()
|
dataset.push_to_hub()
|
||||||
|
|||||||
Reference in New Issue
Block a user