Add example for training on custom openpi datasets (#140)

2024-12-23 16:09:03 -08:00
parent 385780ecc3 be131bfc6b
commit 5eed870a64
9 changed files with 76 additions and 45 deletions
--- a/README.md
+++ b/README.md
@@ -63,6 +63,8 @@ uv run scripts/train.py pi0_aloha_sim --exp-name=my_experiment --overwrite
 The `pi0_aloha_sim` config is optimized for training on a single H100 GPU. By default, JAX pre-allocates 75% of available GPU memory. We set `XLA_PYTHON_CLIENT_MEM_FRACTION=0.9` to allow JAX to use up to 90% of GPU memory, which enables training with larger batch sizes while maintaining stability.
 The training script automatically utilizes all available GPUs on a single node. Currently, distributed training across multiple nodes is not supported.
 An example for how to train on your own Aloha dataset is provided in the [ALOHA Real README](examples/aloha_real/README.md).
 ## Running examples
--- a/examples/aloha_real/README.md
+++ b/examples/aloha_real/README.md
@@ -54,20 +54,6 @@ While we strongly recommend fine-tuning the model to your own data to adapt it t
 ## Training on your own Aloha dataset
-OpenPI suppports training on data collected in the default aloha hdf5 format. To do so you must first convert the data to the huggingface format. We include `scripts/aloha_hd5.py` to help you do this. Once the dataset is converted, add a new `TrainConfig` to `src/openpi/training/configs.py` and replace repo id with the id assigned to your dataset during conversion.
+OpenPI suppports training on data collected in the default aloha hdf5 format using the `scripts/aloha_hd5.py` conversion script. Once the dataset is converted, add a new `TrainConfig` to `src/openpi/training/configs.py` (see the `aloha_static_cups_open` example config) and replace repo id with the id assigned to your dataset during conversion. Before training on a new dataset, you must first compute the norm stats using `scripts/compute_norm_stats.py`.
-```python
+NOTE: When finetuning the pi0 base model on Aloha data it is recommended that you set `adapt_to_pi=True`. This maps the state and action spaces from the original aloha data to the state and action spaces of the aloha data used  to train the base model.
 TrainConfig(
    name=<your-config-name>,
    data=LeRobotAlohaDataConfig(
        repo_id=<your-repo-id>,
        delta_action_mask=[True] * 6 + [False] + [True] * 6 + [False],
    ),
 ),
 ```
 Run the training script:
 ```bash
 uv run scripts/train.py <your-config-name>
 ```
--- a/scripts/serve_policy.py
+++ b/scripts/serve_policy.py
@@ -16,6 +16,7 @@ from openpi.policies import libero_policy
 from openpi.policies import policy as _policy
 from openpi.policies import policy_config as _policy_config
 from openpi.serving import websocket_policy_server
 from openpi.shared import delta_actions
 from openpi.training import config as _config
@@ -146,7 +147,7 @@ def create_default_policy(
    logging.info("Creating policy...")
    match env:
        case EnvMode.ALOHA:
-            delta_action_mask = _policy_config.make_bool_mask(6, -1, 6, -1)
+            delta_action_mask = delta_actions.make_bool_mask(6, -1, 6, -1)
            config = make_policy_config(
                input_layers=[
                    aloha_policy.ActInputsRepack(),
--- a/src/openpi/policies/policy_config.py
+++ b/src/openpi/policies/policy_config.py
@@ -99,25 +99,3 @@ def create_trained_policy(
        ],
        sample_kwargs=sample_kwargs,
    )
 def make_bool_mask(*dims: int) -> tuple[bool, ...]:
    """Make a boolean mask for the given dimensions.
    Example:
        make_bool_mask(2, -2, 2) == (True, True, False, False, True, True)
        make_bool_mask(2, 0, 2) == (True, True, True, True)
    Args:
        dims: The dimensions to make the mask for.
    Returns:
        A tuple of booleans.
    """
    result = []
    for dim in dims:
        if dim > 0:
            result.extend([True] * (dim))
        else:
            result.extend([False] * (-dim))
    return tuple(result)
--- a/src/openpi/policies/policy_config_test.py
+++ b/src/openpi/policies/policy_config_test.py
@@ -2,11 +2,6 @@ from openpi.policies import policy_config as _policy_config
 from openpi.training import config as _config
 def test_make_bool_mask():
    assert _policy_config.make_bool_mask(2, -2, 2) == (True, True, False, False, True, True)
    assert _policy_config.make_bool_mask(2, 0, 2) == (True, True, True, True)
 def test_create_trained_policy():
    policy = _policy_config.create_trained_policy(
        _config.get_config("debug"),
--- a/src/openpi/shared/delta_actions.py
+++ b/src/openpi/shared/delta_actions.py
@@ -0,0 +1,20 @@
 def make_bool_mask(*dims: int) -> tuple[bool, ...]:
    """Make a boolean mask for the given dimensions.
    Example:
        make_bool_mask(2, -2, 2) == (True, True, False, False, True, True)
        make_bool_mask(2, 0, 2) == (True, True, True, True)
    Args:
        dims: The dimensions to make the mask for.
    Returns:
        A tuple of booleans.
    """
    result = []
    for dim in dims:
        if dim > 0:
            result.extend([True] * (dim))
        else:
            result.extend([False] * (-dim))
    return tuple(result)
--- a/src/openpi/shared/delta_actions_test.py
+++ b/src/openpi/shared/delta_actions_test.py
@@ -0,0 +1,6 @@
 from openpi.shared import delta_actions
 def test_make_bool_mask():
    assert delta_actions.make_bool_mask(2, -2, 2) == (True, True, False, False, True, True)
    assert delta_actions.make_bool_mask(2, 0, 2) == (True, True, True, True)
--- a/src/openpi/training/config.py
+++ b/src/openpi/training/config.py
@@ -14,6 +14,7 @@ import openpi.models.pi0 as pi0
 import openpi.models.pi0_small as pi0_small
 import openpi.models.tokenizer as _tokenizer
 import openpi.policies.aloha_policy as aloha_policy
 from openpi.shared import delta_actions
 import openpi.shared.download as download
 import openpi.shared.normalize as _normalize
 import openpi.training.optimizer as _optimizer
@@ -45,6 +46,9 @@ class DataConfig:
    # Indicates where the cached dataset should be stored.
    dataset_root: str | None = dataclasses.field(default_factory=default_dataset_root)
    # If true, will disable syncing the dataset from the huggingface hub. Allows training on local-only datasets.
    local_files_only: bool = False
 class DataConfigFactory(Protocol):
    def create(self, metadata_dir: pathlib.Path, model: _model.Model) -> DataConfig:
@@ -70,6 +74,8 @@ class LeRobotAlohaDataConfig(DataConfigFactory):
    adapt_to_pi: bool = False
    # Repack transforms. Default is used if not provided.
    repack_transforms: _transforms.Group | None = None
    # If true, will disable syncing the dataset from the huggingface hub.
    local_files_only: bool = False
    def create(self, metadata_dir: pathlib.Path, model: _model.Model) -> DataConfig:
        norm_stats_path = metadata_dir / self.repo_id / "norm_stats.json"
@@ -115,6 +121,7 @@ class LeRobotAlohaDataConfig(DataConfigFactory):
                    ),
                ]
            ),
            local_files_only=self.local_files_only,
        )
@@ -237,6 +244,39 @@ _CONFIGS = [
        weight_loader=weight_loaders.GoogleViTWeightLoader(),
    ),
    #
    # Example configs.
    #
    TrainConfig(
        name="aloha_static_cups_open",
        data=LeRobotAlohaDataConfig(
            repo_id="lerobot/aloha_static_cups_open",
            delta_action_mask=delta_actions.make_bool_mask(6, -1, 6, -1),
            adapt_to_pi=True,
            repack_transforms=_transforms.Group(
                inputs=[
                    _transforms.RepackTransform(
                        {
                            "images": {
                                "cam_high": "observation.images.cam_high",
                                "cam_left_wrist": "observation.images.cam_left_wrist",
                                "cam_right_wrist": "observation.images.cam_right_wrist",
                            },
                            "state": "observation.state",
                            "actions": "action",
                        }
                    )
                ]
            ),
            # Set this to true if you are using a dataset that is not on the huggingface hub.
            local_files_only=False,
        ),
        weight_loader=weight_loaders.CheckpointWeightLoader("s3://openpi-assets/checkpoints/pi0_base/params"),
        num_train_steps=30_000,
        batch_size=64,
        lr_schedule=_optimizer.CosineDecaySchedule(
            warmup_steps=1_000, peak_lr=2.5e-5, decay_steps=30_000, decay_lr=2.5e-6
        ),
    ),
    # Debugging configs.
    #
    TrainConfig(
--- a/src/openpi/training/data_loader.py
+++ b/src/openpi/training/data_loader.py
@@ -88,11 +88,14 @@ def create_dataset(data_config: _config.DataConfig, model: _model.Model) -> Data
    if repo_id == "fake":
        return FakeDataset(model, num_samples=1024)
-    dataset_meta = lerobot_dataset.LeRobotDatasetMetadata(repo_id)
+    dataset_meta = lerobot_dataset.LeRobotDatasetMetadata(
        repo_id, root=data_config.dataset_root, local_files_only=data_config.local_files_only
    )
    return lerobot_dataset.LeRobotDataset(
        data_config.repo_id,
        delta_timestamps={"action": [t / dataset_meta.fps for t in range(model.action_horizon)]},
        root=data_config.dataset_root,
        local_files_only=data_config.local_files_only,
    )