Feat: Improve hub integration (#1382)

* feat(policies): Initial setup to push policies to hub with tags and model card

* feat: add dataset that is used to train

* Add model template summary

* fix: Update link model_card template

* fix: remove print

* fix: change import name

* fix: add model summary in template

* fix: minor text

* fix: comments Lucain

* fix: feedback steven

* fix: restructure push to hub

* fix: remove unneeded changes

* fix: import

* fix: import 2

* Add MANIFEST.in

* fix: feedback pr

* Fix tests

* tests: Add smolvla end-to-end test

* Fix: smolvla test

* fix test name

* fix policy tests

* Add push to hub false policy tests

* Do push to hub cleaner

* fix(ci): add push_to_hub false in tests

---------

Co-authored-by: Steven Palma <steven.palma@huggingface.co>
This commit is contained in:
Pepijn
2025-06-26 14:36:16 +02:00
committed by GitHub
parent a989c79558
commit 0b2285d1ec
13 changed files with 206 additions and 101 deletions

2
MANIFEST.in Normal file
View File

@@ -0,0 +1,2 @@
include lerobot/templates/lerobot_modelcard_template.md
include lerobot/common/datasets/card_template.md

View File

@@ -40,6 +40,8 @@ test-end-to-end:
${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval
${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train
${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval
${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-train
${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval
test-act-ete-train:
python lerobot/scripts/train.py \
@@ -48,6 +50,7 @@ test-act-ete-train:
--policy.n_action_steps=20 \
--policy.chunk_size=20 \
--policy.device=$(DEVICE) \
--policy.push_to_hub=false \
--env.type=aloha \
--env.episode_length=5 \
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
@@ -85,6 +88,7 @@ test-diffusion-ete-train:
--policy.diffusion_step_embed_dim=32 \
--policy.num_inference_steps=10 \
--policy.device=$(DEVICE) \
--policy.push_to_hub=false \
--env.type=pusht \
--env.episode_length=5 \
--dataset.repo_id=lerobot/pusht \
@@ -114,6 +118,7 @@ test-tdmpc-ete-train:
python lerobot/scripts/train.py \
--policy.type=tdmpc \
--policy.device=$(DEVICE) \
--policy.push_to_hub=false \
--env.type=xarm \
--env.task=XarmLift-v0 \
--env.episode_length=5 \
@@ -140,3 +145,36 @@ test-tdmpc-ete-eval:
--env.task=XarmLift-v0 \
--eval.n_episodes=1 \
--eval.batch_size=1
test-smolvla-ete-train:
python lerobot/scripts/train.py \
--policy.type=smolvla \
--policy.n_action_steps=20 \
--policy.chunk_size=20 \
--policy.device=$(DEVICE) \
--policy.push_to_hub=false \
--env.type=aloha \
--env.episode_length=5 \
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
--dataset.image_transforms.enable=true \
--dataset.episodes="[0]" \
--batch_size=2 \
--steps=4 \
--eval_freq=2 \
--eval.n_episodes=1 \
--eval.batch_size=1 \
--save_freq=2 \
--save_checkpoint=true \
--log_freq=1 \
--wandb.enable=false \
--output_dir=tests/outputs/smolvla/
test-smolvla-ete-eval:
python lerobot/scripts/eval.py \
--policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \
--policy.device=$(DEVICE) \
--env.type=aloha \
--env.episode_length=5 \
--eval.n_episodes=1 \
--eval.batch_size=1

View File

@@ -255,7 +255,8 @@ python lerobot/scripts/train.py \
--output_dir=outputs/train/act_so101_test \
--job_name=act_so101_test \
--policy.device=cuda \
--wandb.enable=true
--wandb.enable=true \
--policy.repo_id=${HF_USER}/my_policy
```
Let's explain the command:
@@ -273,6 +274,10 @@ python lerobot/scripts/train.py \
--resume=true
```
If you do not want to push your model to the hub after training use `--policy.push_to_hub=false`.
Additionally you can provide extra `tags` or specify a `license` for your model or make the model repo `private` by adding this: `--policy.private=true --policy.tags=\[ppo,rl\] --policy.license=mit`
#### Train using Collab
If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act).

View File

@@ -14,12 +14,14 @@
import abc
import logging
import os
from importlib.resources import files
from pathlib import Path
from typing import Type, TypeVar
from tempfile import TemporaryDirectory
from typing import List, Type, TypeVar
import packaging
import safetensors
from huggingface_hub import hf_hub_download
from huggingface_hub import HfApi, ModelCard, ModelCardData, hf_hub_download
from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
from huggingface_hub.errors import HfHubHTTPError
from safetensors.torch import load_model as load_model_as_safetensor
@@ -28,20 +30,10 @@ from torch import Tensor, nn
from lerobot.common.utils.hub import HubMixin
from lerobot.configs.policies import PreTrainedConfig
from lerobot.configs.train import TrainPipelineConfig
T = TypeVar("T", bound="PreTrainedPolicy")
DEFAULT_POLICY_CARD = """
---
# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
# Doc / guide: https://huggingface.co/docs/hub/model-cards
{{ card_data }}
---
This policy has been pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot):
- Docs: {{ docs_url | default("[More Information Needed]", true) }}
"""
class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
"""
@@ -150,16 +142,6 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
safetensors.torch.load_model(model, model_file, strict=strict, device=map_location)
return model
# def generate_model_card(self, *args, **kwargs) -> ModelCard:
# card = ModelCard.from_template(
# card_data=self._hub_mixin_info.model_card_data,
# template_str=self._hub_mixin_info.model_card_template,
# repo_url=self._hub_mixin_info.repo_url,
# docs_url=self._hub_mixin_info.docs_url,
# **kwargs,
# )
# return card
@abc.abstractmethod
def get_optim_params(self) -> dict:
"""
@@ -197,3 +179,56 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
with caching.
"""
raise NotImplementedError
def push_model_to_hub(
self,
cfg: TrainPipelineConfig,
):
api = HfApi()
repo_id = api.create_repo(
repo_id=self.config.repo_id, private=self.config.private, exist_ok=True
).repo_id
# Push the files to the repo in a single commit
with TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
saved_path = Path(tmp) / repo_id
self.save_pretrained(saved_path) # Calls _save_pretrained and stores model tensors
card = self.generate_model_card(
cfg.dataset.repo_id, self.config.type, self.config.license, self.config.tags
)
card.save(str(saved_path / "README.md"))
cfg.save_pretrained(saved_path) # Calls _save_pretrained and stores train config
commit_info = api.upload_folder(
repo_id=repo_id,
repo_type="model",
folder_path=saved_path,
commit_message="Upload policy weights, train config and readme",
allow_patterns=["*.safetensors", "*.json", "*.yaml", "*.md"],
ignore_patterns=["*.tmp", "*.log"],
)
logging.info(f"Model pushed to {commit_info.repo_url.url}")
def generate_model_card(
self, dataset_repo_id: str, model_type: str, license: str | None, tags: List[str] | None
) -> ModelCard:
base_model = "lerobot/smolvla_base" if model_type == "smolvla" else None # Set a base model
card_data = ModelCardData(
license=license or "apache-2.0",
library_name="lerobot",
pipeline_tag="robotics",
tags=list(set(tags or []).union({"robotics", "lerobot", model_type})),
model_name=model_type,
datasets=dataset_repo_id,
base_model=base_model,
)
template_card = files("lerobot.templates").joinpath("lerobot_modelcard_template.md").read_text()
card = ModelCard.from_template(card_data, template_str=template_card)
card.validate()
return card

View File

@@ -60,6 +60,16 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
# automatic gradient scaling is used.
use_amp: bool = False
push_to_hub: bool = True
repo_id: str | None = None
# Upload on private repository on the Hugging Face hub.
private: bool | None = None
# Add tags to your policy on the hub.
tags: list[str] | None = None
# Add tags to your policy on the hub.
license: str | None = None
def __post_init__(self):
self.pretrained_path = None
if not self.device or not is_torch_device_available(self.device):

View File

@@ -116,6 +116,11 @@ class TrainPipelineConfig(HubMixin):
self.optimizer = self.policy.get_optimizer_preset()
self.scheduler = self.policy.get_scheduler_preset()
if self.policy.push_to_hub and not self.policy.repo_id:
raise ValueError(
"'policy.repo_id' argument missing. Please specify it to push the model to the hub."
)
@classmethod
def __get_path_fields__(cls) -> list[str]:
"""This enables the parser to load config from the policy using `--policy.path=local/dir`"""

View File

@@ -65,7 +65,10 @@ from lerobot.common.robots import ( # noqa: F401
from lerobot.common.teleoperators import ( # noqa: F401
Teleoperator,
TeleoperatorConfig,
koch_leader,
make_teleoperator_from_config,
so100_leader,
so101_leader,
)
from lerobot.common.utils.control_utils import (
init_keyboard_listener,
@@ -84,8 +87,6 @@ from lerobot.common.utils.visualization_utils import _init_rerun
from lerobot.configs import parser
from lerobot.configs.policies import PreTrainedConfig
from .common.teleoperators import koch_leader, so100_leader, so101_leader # noqa: F401
@dataclass
class DatasetRecordConfig:

View File

@@ -1,71 +0,0 @@
#!/usr/bin/env python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Once you have trained a policy with our training script (lerobot/scripts/train.py), use this script to push it
to the hub.
Example:
```bash
python lerobot/scripts/push_pretrained.py \
--pretrained_path=outputs/train/act_aloha_sim_transfer_cube_human/checkpoints/last/pretrained_model \
--repo_id=lerobot/act_aloha_sim_transfer_cube_human
```
"""
from dataclasses import dataclass
from pathlib import Path
import draccus
from huggingface_hub import HfApi
@dataclass
class PushPreTrainedConfig:
pretrained_path: Path
repo_id: str
branch: str | None = None
private: bool = False
exist_ok: bool = False
@draccus.wrap()
def main(cfg: PushPreTrainedConfig):
hub_api = HfApi()
hub_api.create_repo(
repo_id=cfg.repo_id,
private=cfg.private,
repo_type="model",
exist_ok=cfg.exist_ok,
)
if cfg.branch:
hub_api.create_branch(
repo_id=cfg.repo_id,
branch=cfg.branch,
repo_type="model",
exist_ok=cfg.exist_ok,
)
hub_api.upload_folder(
repo_id=cfg.repo_id,
folder_path=cfg.pretrained_path,
repo_type="model",
revision=cfg.branch,
)
if __name__ == "__main__":
main()

View File

@@ -282,6 +282,9 @@ def train(cfg: TrainPipelineConfig):
eval_env.close()
logging.info("End of training")
if cfg.policy.push_to_hub:
policy.push_model_to_hub(cfg)
if __name__ == "__main__":
init_logging()

View File

@@ -0,0 +1,74 @@
---
# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
# Doc / guide: https://huggingface.co/docs/hub/model-cards
{{ card_data }}
---
# Model Card for {{ model_name | default("Model ID", true) }}
<!-- Provide a quick summary of what the model is/does. -->
{% if model_name == "smolvla" %}
[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware.
{% elif model_name == "act" %}
[Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates.
{% elif model_name == "tdmpc" %}
[TD-MPC](https://huggingface.co/papers/2203.04955) combines model-free and model-based approaches to improve sample efficiency and performance in continuous control tasks by using a learned latent dynamics model and terminal value function.
{% elif model_name == "diffusion" %}
[Diffusion Policy](https://huggingface.co/papers/2303.04137) treats visuomotor control as a generative diffusion process, producing smooth, multi-step action trajectories that excel at contact-rich manipulation.
{% elif model_name == "vqbet" %}
[VQ-BET](https://huggingface.co/papers/2403.03181) combines vector-quantised action tokens with Behaviour Transformers to discretise control and achieve data-efficient imitation across diverse skills.
{% elif model_name == "pi0" %}
[Pi0](https://huggingface.co/papers/2410.24164) is a generalist vision-language-action transformer that converts multimodal observations and text instructions into robot actions for zero-shot task transfer.
{% elif model_name == "pi0fast" %}
[Pi0-Fast](https://huggingface.co/papers/2501.09747) is a variant of Pi0 that uses a new tokenization method called FAST, which enables training of an autoregressive vision-language-action policy for high-frequency robotic tasks with improved performance and reduced training time.
{% elif model_name == "sac" %}
[Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) is an entropy-regularised actor-critic algorithm offering stable, sample-efficient learning in continuous-control environments.
{% elif model_name == "reward_classifier" %}
A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable.
{% else %}
_Model type not recognized — please update this template._
{% endif %}
This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index).
---
## How to Get Started with the Model
For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy).
Below is the short version on how to train and run inference/eval:
### Train from scratch
```bash
python lerobot/scripts/train.py \
--dataset.repo_id=${HF_USER}/<dataset> \
--policy.type=act \
--output_dir=outputs/train/<desired_policy_repo_id> \
--job_name=lerobot_training \
--policy.device=cuda \
--policy.repo_id=${HF_USER}/<desired_policy_repo_id>
--wandb.enable=true
```
*Writes checkpoints to `outputs/train/<desired_policy_repo_id>/checkpoints/`.*
### Evaluate the policy/run inference
```bash
python -m lerobot.record \
--robot.type=so100_follower \
--dataset.repo_id=<hf_user>/eval_<dataset> \
--policy.path=<hf_user>/<desired_policy_repo_id> \
--episodes=10
```
Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint.
---
## Model Details
* **License:** {{ license | default("\[More Information Needed]", true) }}

View File

@@ -32,7 +32,7 @@ def get_policy_stats(ds_repo_id: str, policy_name: str, policy_kwargs: dict):
train_cfg = TrainPipelineConfig(
# TODO(rcadene, aliberts): remove dataset download
dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]),
policy=make_policy_config(policy_name, **policy_kwargs),
policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs),
)
train_cfg.validate() # Needed for auto-setting some parameters

View File

@@ -338,8 +338,9 @@ def test_factory(env_name, repo_id, policy_name):
# TODO(rcadene, aliberts): remove dataset download
dataset=DatasetConfig(repo_id=repo_id, episodes=[0]),
env=make_env_config(env_name),
policy=make_policy_config(policy_name),
policy=make_policy_config(policy_name, push_to_hub=False),
)
cfg.validate()
dataset = make_dataset(cfg)
delta_timestamps = dataset.delta_timestamps

View File

@@ -142,9 +142,10 @@ def test_policy(ds_repo_id, env_name, env_kwargs, policy_name, policy_kwargs):
train_cfg = TrainPipelineConfig(
# TODO(rcadene, aliberts): remove dataset download
dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]),
policy=make_policy_config(policy_name, **policy_kwargs),
policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs),
env=make_env_config(env_name, **env_kwargs),
)
train_cfg.validate()
# Check that we can make the policy object.
dataset = make_dataset(train_cfg)
@@ -213,7 +214,7 @@ def test_act_backbone_lr():
cfg = TrainPipelineConfig(
# TODO(rcadene, aliberts): remove dataset download
dataset=DatasetConfig(repo_id="lerobot/aloha_sim_insertion_scripted", episodes=[0]),
policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001),
policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001, push_to_hub=False),
)
cfg.validate() # Needed for auto-setting some parameters
@@ -415,6 +416,7 @@ def test_backward_compatibility(ds_repo_id: str, policy_name: str, policy_kwargs
https://github.com/huggingface/lerobot/pull/1127.
"""
# NOTE: ACT policy has different randomness, after PyTorch 2.7.0
if policy_name == "act" and version.parse(torch.__version__) < version.parse("2.7.0"):
pytest.skip(f"Skipping act policy test with PyTorch {torch.__version__}. Requires PyTorch >= 2.7.0")