Feat: Improve hub integration (#1382)

* feat(policies): Initial setup to push policies to hub with tags and model card * feat: add dataset that is used to train * Add model template summary * fix: Update link model_card template * fix: remove print * fix: change import name * fix: add model summary in template * fix: minor text * fix: comments Lucain * fix: feedback steven * fix: restructure push to hub * fix: remove unneeded changes * fix: import * fix: import 2 * Add MANIFEST.in * fix: feedback pr * Fix tests * tests: Add smolvla end-to-end test * Fix: smolvla test * fix test name * fix policy tests * Add push to hub false policy tests * Do push to hub cleaner * fix(ci): add push_to_hub false in tests --------- Co-authored-by: Steven Palma <steven.palma@huggingface.co>
2025-06-26 14:36:16 +02:00
parent a989c79558
commit 0b2285d1ec
13 changed files with 206 additions and 101 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include lerobot/templates/lerobot_modelcard_template.md
+include lerobot/common/datasets/card_template.md
--- a/38
+++ b/38
@@ -40,6 +40,8 @@ test-end-to-end:
 	${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval
 	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train
 	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval
+	${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-train
+	${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval

 test-act-ete-train:
 	python lerobot/scripts/train.py \
@@ -48,6 +50,7 @@ test-act-ete-train:
 		--policy.n_action_steps=20 \
 		--policy.chunk_size=20 \
 		--policy.device=$(DEVICE) \
+		--policy.push_to_hub=false \
 		--env.type=aloha \
 		--env.episode_length=5 \
 		--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
@@ -85,6 +88,7 @@ test-diffusion-ete-train:
 		--policy.diffusion_step_embed_dim=32 \
 		--policy.num_inference_steps=10 \
 		--policy.device=$(DEVICE) \
+		--policy.push_to_hub=false \
 		--env.type=pusht \
 		--env.episode_length=5 \
 		--dataset.repo_id=lerobot/pusht \
@@ -114,6 +118,7 @@ test-tdmpc-ete-train:
 	python lerobot/scripts/train.py \
 		--policy.type=tdmpc \
 		--policy.device=$(DEVICE) \
+		--policy.push_to_hub=false \
 		--env.type=xarm \
 		--env.task=XarmLift-v0 \
 		--env.episode_length=5 \
@@ -140,3 +145,36 @@ test-tdmpc-ete-eval:
 		--env.task=XarmLift-v0 \
 		--eval.n_episodes=1 \
 		--eval.batch_size=1
+
+
+test-smolvla-ete-train:
+	python lerobot/scripts/train.py \
+		--policy.type=smolvla \
+		--policy.n_action_steps=20 \
+		--policy.chunk_size=20 \
+		--policy.device=$(DEVICE) \
+		--policy.push_to_hub=false \
+		--env.type=aloha \
+		--env.episode_length=5 \
+		--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
+		--dataset.image_transforms.enable=true \
+		--dataset.episodes="[0]" \
+		--batch_size=2 \
+		--steps=4 \
+		--eval_freq=2 \
+		--eval.n_episodes=1 \
+		--eval.batch_size=1 \
+		--save_freq=2 \
+		--save_checkpoint=true \
+		--log_freq=1 \
+		--wandb.enable=false \
+		--output_dir=tests/outputs/smolvla/
+
+test-smolvla-ete-eval:
+	python lerobot/scripts/eval.py \
+		--policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \
+		--policy.device=$(DEVICE) \
+		--env.type=aloha \
+		--env.episode_length=5 \
+		--eval.n_episodes=1 \
+		--eval.batch_size=1
--- a/docs/source/il_robots.mdx
+++ b/docs/source/il_robots.mdx
@@ -255,7 +255,8 @@ python lerobot/scripts/train.py \
  --output_dir=outputs/train/act_so101_test \
  --job_name=act_so101_test \
  --policy.device=cuda \
-  --wandb.enable=true
+  --wandb.enable=true \
+  --policy.repo_id=${HF_USER}/my_policy
 ```

 Let's explain the command:
@@ -273,6 +274,10 @@ python lerobot/scripts/train.py \
  --resume=true
 ```

+If you do not want to push your model to the hub after training use `--policy.push_to_hub=false`.
+
+Additionally you can provide extra `tags` or specify a `license` for your model or make the model repo `private` by adding this: `--policy.private=true --policy.tags=\[ppo,rl\] --policy.license=mit`
+
 #### Train using Collab
 If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act).

--- a/lerobot/common/policies/pretrained.py
+++ b/lerobot/common/policies/pretrained.py
@@ -14,12 +14,14 @@
 import abc
 import logging
 import os
+from importlib.resources import files
 from pathlib import Path
-from typing import Type, TypeVar
+from tempfile import TemporaryDirectory
+from typing import List, Type, TypeVar

 import packaging
 import safetensors
-from huggingface_hub import hf_hub_download
+from huggingface_hub import HfApi, ModelCard, ModelCardData, hf_hub_download
 from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
 from huggingface_hub.errors import HfHubHTTPError
 from safetensors.torch import load_model as load_model_as_safetensor
@@ -28,20 +30,10 @@ from torch import Tensor, nn

 from lerobot.common.utils.hub import HubMixin
 from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs.train import TrainPipelineConfig

 T = TypeVar("T", bound="PreTrainedPolicy")

-DEFAULT_POLICY_CARD = """
---
-# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
-# Doc / guide: https://huggingface.co/docs/hub/model-cards
-{{ card_data }}
---
-
-This policy has been pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot):
- Docs: {{ docs_url | default("[More Information Needed]", true) }}
-"""
-

 class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
    """
@@ -150,16 +142,6 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
            safetensors.torch.load_model(model, model_file, strict=strict, device=map_location)
        return model

-    # def generate_model_card(self, *args, **kwargs) -> ModelCard:
-    #     card = ModelCard.from_template(
-    #         card_data=self._hub_mixin_info.model_card_data,
-    #         template_str=self._hub_mixin_info.model_card_template,
-    #         repo_url=self._hub_mixin_info.repo_url,
-    #         docs_url=self._hub_mixin_info.docs_url,
-    #         **kwargs,
-    #     )
-    #     return card
-
    @abc.abstractmethod
    def get_optim_params(self) -> dict:
        """
@@ -197,3 +179,56 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
        with caching.
        """
        raise NotImplementedError
+
+    def push_model_to_hub(
+        self,
+        cfg: TrainPipelineConfig,
+    ):
+        api = HfApi()
+        repo_id = api.create_repo(
+            repo_id=self.config.repo_id, private=self.config.private, exist_ok=True
+        ).repo_id
+
+        # Push the files to the repo in a single commit
+        with TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
+            saved_path = Path(tmp) / repo_id
+
+            self.save_pretrained(saved_path)  # Calls _save_pretrained and stores model tensors
+
+            card = self.generate_model_card(
+                cfg.dataset.repo_id, self.config.type, self.config.license, self.config.tags
+            )
+            card.save(str(saved_path / "README.md"))
+
+            cfg.save_pretrained(saved_path)  # Calls _save_pretrained and stores train config
+
+            commit_info = api.upload_folder(
+                repo_id=repo_id,
+                repo_type="model",
+                folder_path=saved_path,
+                commit_message="Upload policy weights, train config and readme",
+                allow_patterns=["*.safetensors", "*.json", "*.yaml", "*.md"],
+                ignore_patterns=["*.tmp", "*.log"],
+            )
+
+            logging.info(f"Model pushed to {commit_info.repo_url.url}")
+
+    def generate_model_card(
+        self, dataset_repo_id: str, model_type: str, license: str | None, tags: List[str] | None
+    ) -> ModelCard:
+        base_model = "lerobot/smolvla_base" if model_type == "smolvla" else None  # Set a base model
+
+        card_data = ModelCardData(
+            license=license or "apache-2.0",
+            library_name="lerobot",
+            pipeline_tag="robotics",
+            tags=list(set(tags or []).union({"robotics", "lerobot", model_type})),
+            model_name=model_type,
+            datasets=dataset_repo_id,
+            base_model=base_model,
+        )
+
+        template_card = files("lerobot.templates").joinpath("lerobot_modelcard_template.md").read_text()
+        card = ModelCard.from_template(card_data, template_str=template_card)
+        card.validate()
+        return card
--- a/lerobot/configs/policies.py
+++ b/lerobot/configs/policies.py
@@ -60,6 +60,16 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
    # automatic gradient scaling is used.
    use_amp: bool = False

+    push_to_hub: bool = True
+    repo_id: str | None = None
+
+    # Upload on private repository on the Hugging Face hub.
+    private: bool | None = None
+    # Add tags to your policy on the hub.
+    tags: list[str] | None = None
+    # Add tags to your policy on the hub.
+    license: str | None = None
+
    def __post_init__(self):
        self.pretrained_path = None
        if not self.device or not is_torch_device_available(self.device):
--- a/lerobot/configs/train.py
+++ b/lerobot/configs/train.py
@@ -116,6 +116,11 @@ class TrainPipelineConfig(HubMixin):
            self.optimizer = self.policy.get_optimizer_preset()
            self.scheduler = self.policy.get_scheduler_preset()

+        if self.policy.push_to_hub and not self.policy.repo_id:
+            raise ValueError(
+                "'policy.repo_id' argument missing. Please specify it to push the model to the hub."
+            )
+
    @classmethod
    def __get_path_fields__(cls) -> list[str]:
        """This enables the parser to load config from the policy using `--policy.path=local/dir`"""
--- a/lerobot/record.py
+++ b/lerobot/record.py
@@ -65,7 +65,10 @@ from lerobot.common.robots import (  # noqa: F401
 from lerobot.common.teleoperators import (  # noqa: F401
    Teleoperator,
    TeleoperatorConfig,
+    koch_leader,
    make_teleoperator_from_config,
+    so100_leader,
+    so101_leader,
 )
 from lerobot.common.utils.control_utils import (
    init_keyboard_listener,
@@ -84,8 +87,6 @@ from lerobot.common.utils.visualization_utils import _init_rerun
 from lerobot.configs import parser
 from lerobot.configs.policies import PreTrainedConfig

-from .common.teleoperators import koch_leader, so100_leader, so101_leader  # noqa: F401
-

@dataclass
 class DatasetRecordConfig:
--- a/lerobot/scripts/push_pretrained.py
+++ b/lerobot/scripts/push_pretrained.py
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Once you have trained a policy with our training script (lerobot/scripts/train.py), use this script to push it
-to the hub.
-
-Example:
-
-```bash
-python lerobot/scripts/push_pretrained.py \
-    --pretrained_path=outputs/train/act_aloha_sim_transfer_cube_human/checkpoints/last/pretrained_model \
-    --repo_id=lerobot/act_aloha_sim_transfer_cube_human
-```
-"""
-
-from dataclasses import dataclass
-from pathlib import Path
-
-import draccus
-from huggingface_hub import HfApi
-
-
-@dataclass
-class PushPreTrainedConfig:
-    pretrained_path: Path
-    repo_id: str
-    branch: str | None = None
-    private: bool = False
-    exist_ok: bool = False
-
-
-@draccus.wrap()
-def main(cfg: PushPreTrainedConfig):
-    hub_api = HfApi()
-    hub_api.create_repo(
-        repo_id=cfg.repo_id,
-        private=cfg.private,
-        repo_type="model",
-        exist_ok=cfg.exist_ok,
-    )
-    if cfg.branch:
-        hub_api.create_branch(
-            repo_id=cfg.repo_id,
-            branch=cfg.branch,
-            repo_type="model",
-            exist_ok=cfg.exist_ok,
-        )
-
-    hub_api.upload_folder(
-        repo_id=cfg.repo_id,
-        folder_path=cfg.pretrained_path,
-        repo_type="model",
-        revision=cfg.branch,
-    )
-
-
-if __name__ == "__main__":
-    main()
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -282,6 +282,9 @@ def train(cfg: TrainPipelineConfig):
        eval_env.close()
    logging.info("End of training")

+    if cfg.policy.push_to_hub:
+        policy.push_model_to_hub(cfg)
+

 if __name__ == "__main__":
    init_logging()
--- a/lerobot/templates/lerobot_modelcard_template.md
+++ b/lerobot/templates/lerobot_modelcard_template.md
@@ -0,0 +1,74 @@
+---
+# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
+# Doc / guide: https://huggingface.co/docs/hub/model-cards
+{{ card_data }}
+---
+
+# Model Card for {{ model_name | default("Model ID", true) }}
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+{% if model_name == "smolvla" %}
+[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware.
+{% elif model_name == "act" %}
+[Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates.
+{% elif model_name == "tdmpc" %}
+[TD-MPC](https://huggingface.co/papers/2203.04955) combines model-free and model-based approaches to improve sample efficiency and performance in continuous control tasks by using a learned latent dynamics model and terminal value function.
+{% elif model_name == "diffusion" %}
+[Diffusion Policy](https://huggingface.co/papers/2303.04137) treats visuomotor control as a generative diffusion process, producing smooth, multi-step action trajectories that excel at contact-rich manipulation.
+{% elif model_name == "vqbet" %}
+[VQ-BET](https://huggingface.co/papers/2403.03181) combines vector-quantised action tokens with Behaviour Transformers to discretise control and achieve data-efficient imitation across diverse skills.
+{% elif model_name == "pi0" %}
+[Pi0](https://huggingface.co/papers/2410.24164) is a generalist vision-language-action transformer that converts multimodal observations and text instructions into robot actions for zero-shot task transfer.
+{% elif model_name == "pi0fast" %}
+[Pi0-Fast](https://huggingface.co/papers/2501.09747) is a variant of Pi0 that uses a new tokenization method called FAST, which enables training of an autoregressive vision-language-action policy for high-frequency robotic tasks with improved performance and reduced training time.
+{% elif model_name == "sac" %}
+[Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) is an entropy-regularised actor-critic algorithm offering stable, sample-efficient learning in continuous-control environments.
+{% elif model_name == "reward_classifier" %}
+A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable.
+{% else %}
+_Model type not recognized — please update this template._
+{% endif %}
+
+This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
+See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index).
+
+---
+
+## How to Get Started with the Model
+
+For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy).
+Below is the short version on how to train and run inference/eval:
+
+### Train from scratch
+
+```bash
+python lerobot/scripts/train.py \
+  --dataset.repo_id=${HF_USER}/<dataset> \
+  --policy.type=act \
+  --output_dir=outputs/train/<desired_policy_repo_id> \
+  --job_name=lerobot_training \
+  --policy.device=cuda \
+  --policy.repo_id=${HF_USER}/<desired_policy_repo_id>
+  --wandb.enable=true
+```
+
+*Writes checkpoints to `outputs/train/<desired_policy_repo_id>/checkpoints/`.*
+
+### Evaluate the policy/run inference
+
+```bash
+python -m lerobot.record \
+  --robot.type=so100_follower \
+  --dataset.repo_id=<hf_user>/eval_<dataset> \
+  --policy.path=<hf_user>/<desired_policy_repo_id> \
+  --episodes=10
+```
+
+Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint.
+
+---
+
+## Model Details
+
+* **License:** {{ license | default("\[More Information Needed]", true) }}
--- a/tests/artifacts/policies/save_policy_to_safetensors.py
+++ b/tests/artifacts/policies/save_policy_to_safetensors.py
@@ -32,7 +32,7 @@ def get_policy_stats(ds_repo_id: str, policy_name: str, policy_kwargs: dict):
    train_cfg = TrainPipelineConfig(
        # TODO(rcadene, aliberts): remove dataset download
        dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]),
-        policy=make_policy_config(policy_name, **policy_kwargs),
+        policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs),
    )
    train_cfg.validate()  # Needed for auto-setting some parameters

--- a/tests/datasets/test_datasets.py
+++ b/tests/datasets/test_datasets.py
@@ -338,8 +338,9 @@ def test_factory(env_name, repo_id, policy_name):
        # TODO(rcadene, aliberts): remove dataset download
        dataset=DatasetConfig(repo_id=repo_id, episodes=[0]),
        env=make_env_config(env_name),
-        policy=make_policy_config(policy_name),
+        policy=make_policy_config(policy_name, push_to_hub=False),
    )
+    cfg.validate()

    dataset = make_dataset(cfg)
    delta_timestamps = dataset.delta_timestamps
--- a/tests/policies/test_policies.py
+++ b/tests/policies/test_policies.py
@@ -142,9 +142,10 @@ def test_policy(ds_repo_id, env_name, env_kwargs, policy_name, policy_kwargs):
    train_cfg = TrainPipelineConfig(
        # TODO(rcadene, aliberts): remove dataset download
        dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]),
-        policy=make_policy_config(policy_name, **policy_kwargs),
+        policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs),
        env=make_env_config(env_name, **env_kwargs),
    )
+    train_cfg.validate()

    # Check that we can make the policy object.
    dataset = make_dataset(train_cfg)
@@ -213,7 +214,7 @@ def test_act_backbone_lr():
    cfg = TrainPipelineConfig(
        # TODO(rcadene, aliberts): remove dataset download
        dataset=DatasetConfig(repo_id="lerobot/aloha_sim_insertion_scripted", episodes=[0]),
-        policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001),
+        policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001, push_to_hub=False),
    )
    cfg.validate()  # Needed for auto-setting some parameters

@@ -415,6 +416,7 @@ def test_backward_compatibility(ds_repo_id: str, policy_name: str, policy_kwargs
    https://github.com/huggingface/lerobot/pull/1127.

    """
+
    # NOTE: ACT policy has different randomness, after PyTorch 2.7.0
    if policy_name == "act" and version.parse(torch.__version__) < version.parse("2.7.0"):
        pytest.skip(f"Skipping act policy test with PyTorch {torch.__version__}. Requires PyTorch >= 2.7.0")