Feat: Improve hub integration (#1382)
* feat(policies): Initial setup to push policies to hub with tags and model card * feat: add dataset that is used to train * Add model template summary * fix: Update link model_card template * fix: remove print * fix: change import name * fix: add model summary in template * fix: minor text * fix: comments Lucain * fix: feedback steven * fix: restructure push to hub * fix: remove unneeded changes * fix: import * fix: import 2 * Add MANIFEST.in * fix: feedback pr * Fix tests * tests: Add smolvla end-to-end test * Fix: smolvla test * fix test name * fix policy tests * Add push to hub false policy tests * Do push to hub cleaner * fix(ci): add push_to_hub false in tests --------- Co-authored-by: Steven Palma <steven.palma@huggingface.co>
This commit is contained in:
2
MANIFEST.in
Normal file
2
MANIFEST.in
Normal file
@@ -0,0 +1,2 @@
|
||||
include lerobot/templates/lerobot_modelcard_template.md
|
||||
include lerobot/common/datasets/card_template.md
|
||||
38
Makefile
38
Makefile
@@ -40,6 +40,8 @@ test-end-to-end:
|
||||
${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval
|
||||
${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train
|
||||
${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval
|
||||
${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-train
|
||||
${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval
|
||||
|
||||
test-act-ete-train:
|
||||
python lerobot/scripts/train.py \
|
||||
@@ -48,6 +50,7 @@ test-act-ete-train:
|
||||
--policy.n_action_steps=20 \
|
||||
--policy.chunk_size=20 \
|
||||
--policy.device=$(DEVICE) \
|
||||
--policy.push_to_hub=false \
|
||||
--env.type=aloha \
|
||||
--env.episode_length=5 \
|
||||
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
|
||||
@@ -85,6 +88,7 @@ test-diffusion-ete-train:
|
||||
--policy.diffusion_step_embed_dim=32 \
|
||||
--policy.num_inference_steps=10 \
|
||||
--policy.device=$(DEVICE) \
|
||||
--policy.push_to_hub=false \
|
||||
--env.type=pusht \
|
||||
--env.episode_length=5 \
|
||||
--dataset.repo_id=lerobot/pusht \
|
||||
@@ -114,6 +118,7 @@ test-tdmpc-ete-train:
|
||||
python lerobot/scripts/train.py \
|
||||
--policy.type=tdmpc \
|
||||
--policy.device=$(DEVICE) \
|
||||
--policy.push_to_hub=false \
|
||||
--env.type=xarm \
|
||||
--env.task=XarmLift-v0 \
|
||||
--env.episode_length=5 \
|
||||
@@ -140,3 +145,36 @@ test-tdmpc-ete-eval:
|
||||
--env.task=XarmLift-v0 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1
|
||||
|
||||
|
||||
test-smolvla-ete-train:
|
||||
python lerobot/scripts/train.py \
|
||||
--policy.type=smolvla \
|
||||
--policy.n_action_steps=20 \
|
||||
--policy.chunk_size=20 \
|
||||
--policy.device=$(DEVICE) \
|
||||
--policy.push_to_hub=false \
|
||||
--env.type=aloha \
|
||||
--env.episode_length=5 \
|
||||
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
|
||||
--dataset.image_transforms.enable=true \
|
||||
--dataset.episodes="[0]" \
|
||||
--batch_size=2 \
|
||||
--steps=4 \
|
||||
--eval_freq=2 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1 \
|
||||
--save_freq=2 \
|
||||
--save_checkpoint=true \
|
||||
--log_freq=1 \
|
||||
--wandb.enable=false \
|
||||
--output_dir=tests/outputs/smolvla/
|
||||
|
||||
test-smolvla-ete-eval:
|
||||
python lerobot/scripts/eval.py \
|
||||
--policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \
|
||||
--policy.device=$(DEVICE) \
|
||||
--env.type=aloha \
|
||||
--env.episode_length=5 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1
|
||||
|
||||
@@ -255,7 +255,8 @@ python lerobot/scripts/train.py \
|
||||
--output_dir=outputs/train/act_so101_test \
|
||||
--job_name=act_so101_test \
|
||||
--policy.device=cuda \
|
||||
--wandb.enable=true
|
||||
--wandb.enable=true \
|
||||
--policy.repo_id=${HF_USER}/my_policy
|
||||
```
|
||||
|
||||
Let's explain the command:
|
||||
@@ -273,6 +274,10 @@ python lerobot/scripts/train.py \
|
||||
--resume=true
|
||||
```
|
||||
|
||||
If you do not want to push your model to the hub after training use `--policy.push_to_hub=false`.
|
||||
|
||||
Additionally you can provide extra `tags` or specify a `license` for your model or make the model repo `private` by adding this: `--policy.private=true --policy.tags=\[ppo,rl\] --policy.license=mit`
|
||||
|
||||
#### Train using Collab
|
||||
If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act).
|
||||
|
||||
|
||||
@@ -14,12 +14,14 @@
|
||||
import abc
|
||||
import logging
|
||||
import os
|
||||
from importlib.resources import files
|
||||
from pathlib import Path
|
||||
from typing import Type, TypeVar
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import List, Type, TypeVar
|
||||
|
||||
import packaging
|
||||
import safetensors
|
||||
from huggingface_hub import hf_hub_download
|
||||
from huggingface_hub import HfApi, ModelCard, ModelCardData, hf_hub_download
|
||||
from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
|
||||
from huggingface_hub.errors import HfHubHTTPError
|
||||
from safetensors.torch import load_model as load_model_as_safetensor
|
||||
@@ -28,20 +30,10 @@ from torch import Tensor, nn
|
||||
|
||||
from lerobot.common.utils.hub import HubMixin
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
from lerobot.configs.train import TrainPipelineConfig
|
||||
|
||||
T = TypeVar("T", bound="PreTrainedPolicy")
|
||||
|
||||
DEFAULT_POLICY_CARD = """
|
||||
---
|
||||
# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
||||
# Doc / guide: https://huggingface.co/docs/hub/model-cards
|
||||
{{ card_data }}
|
||||
---
|
||||
|
||||
This policy has been pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot):
|
||||
- Docs: {{ docs_url | default("[More Information Needed]", true) }}
|
||||
"""
|
||||
|
||||
|
||||
class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
|
||||
"""
|
||||
@@ -150,16 +142,6 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
|
||||
safetensors.torch.load_model(model, model_file, strict=strict, device=map_location)
|
||||
return model
|
||||
|
||||
# def generate_model_card(self, *args, **kwargs) -> ModelCard:
|
||||
# card = ModelCard.from_template(
|
||||
# card_data=self._hub_mixin_info.model_card_data,
|
||||
# template_str=self._hub_mixin_info.model_card_template,
|
||||
# repo_url=self._hub_mixin_info.repo_url,
|
||||
# docs_url=self._hub_mixin_info.docs_url,
|
||||
# **kwargs,
|
||||
# )
|
||||
# return card
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_optim_params(self) -> dict:
|
||||
"""
|
||||
@@ -197,3 +179,56 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
|
||||
with caching.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def push_model_to_hub(
|
||||
self,
|
||||
cfg: TrainPipelineConfig,
|
||||
):
|
||||
api = HfApi()
|
||||
repo_id = api.create_repo(
|
||||
repo_id=self.config.repo_id, private=self.config.private, exist_ok=True
|
||||
).repo_id
|
||||
|
||||
# Push the files to the repo in a single commit
|
||||
with TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
|
||||
saved_path = Path(tmp) / repo_id
|
||||
|
||||
self.save_pretrained(saved_path) # Calls _save_pretrained and stores model tensors
|
||||
|
||||
card = self.generate_model_card(
|
||||
cfg.dataset.repo_id, self.config.type, self.config.license, self.config.tags
|
||||
)
|
||||
card.save(str(saved_path / "README.md"))
|
||||
|
||||
cfg.save_pretrained(saved_path) # Calls _save_pretrained and stores train config
|
||||
|
||||
commit_info = api.upload_folder(
|
||||
repo_id=repo_id,
|
||||
repo_type="model",
|
||||
folder_path=saved_path,
|
||||
commit_message="Upload policy weights, train config and readme",
|
||||
allow_patterns=["*.safetensors", "*.json", "*.yaml", "*.md"],
|
||||
ignore_patterns=["*.tmp", "*.log"],
|
||||
)
|
||||
|
||||
logging.info(f"Model pushed to {commit_info.repo_url.url}")
|
||||
|
||||
def generate_model_card(
|
||||
self, dataset_repo_id: str, model_type: str, license: str | None, tags: List[str] | None
|
||||
) -> ModelCard:
|
||||
base_model = "lerobot/smolvla_base" if model_type == "smolvla" else None # Set a base model
|
||||
|
||||
card_data = ModelCardData(
|
||||
license=license or "apache-2.0",
|
||||
library_name="lerobot",
|
||||
pipeline_tag="robotics",
|
||||
tags=list(set(tags or []).union({"robotics", "lerobot", model_type})),
|
||||
model_name=model_type,
|
||||
datasets=dataset_repo_id,
|
||||
base_model=base_model,
|
||||
)
|
||||
|
||||
template_card = files("lerobot.templates").joinpath("lerobot_modelcard_template.md").read_text()
|
||||
card = ModelCard.from_template(card_data, template_str=template_card)
|
||||
card.validate()
|
||||
return card
|
||||
|
||||
@@ -60,6 +60,16 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
|
||||
# automatic gradient scaling is used.
|
||||
use_amp: bool = False
|
||||
|
||||
push_to_hub: bool = True
|
||||
repo_id: str | None = None
|
||||
|
||||
# Upload on private repository on the Hugging Face hub.
|
||||
private: bool | None = None
|
||||
# Add tags to your policy on the hub.
|
||||
tags: list[str] | None = None
|
||||
# Add tags to your policy on the hub.
|
||||
license: str | None = None
|
||||
|
||||
def __post_init__(self):
|
||||
self.pretrained_path = None
|
||||
if not self.device or not is_torch_device_available(self.device):
|
||||
|
||||
@@ -116,6 +116,11 @@ class TrainPipelineConfig(HubMixin):
|
||||
self.optimizer = self.policy.get_optimizer_preset()
|
||||
self.scheduler = self.policy.get_scheduler_preset()
|
||||
|
||||
if self.policy.push_to_hub and not self.policy.repo_id:
|
||||
raise ValueError(
|
||||
"'policy.repo_id' argument missing. Please specify it to push the model to the hub."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def __get_path_fields__(cls) -> list[str]:
|
||||
"""This enables the parser to load config from the policy using `--policy.path=local/dir`"""
|
||||
|
||||
@@ -65,7 +65,10 @@ from lerobot.common.robots import ( # noqa: F401
|
||||
from lerobot.common.teleoperators import ( # noqa: F401
|
||||
Teleoperator,
|
||||
TeleoperatorConfig,
|
||||
koch_leader,
|
||||
make_teleoperator_from_config,
|
||||
so100_leader,
|
||||
so101_leader,
|
||||
)
|
||||
from lerobot.common.utils.control_utils import (
|
||||
init_keyboard_listener,
|
||||
@@ -84,8 +87,6 @@ from lerobot.common.utils.visualization_utils import _init_rerun
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
|
||||
from .common.teleoperators import koch_leader, so100_leader, so101_leader # noqa: F401
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatasetRecordConfig:
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Once you have trained a policy with our training script (lerobot/scripts/train.py), use this script to push it
|
||||
to the hub.
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/push_pretrained.py \
|
||||
--pretrained_path=outputs/train/act_aloha_sim_transfer_cube_human/checkpoints/last/pretrained_model \
|
||||
--repo_id=lerobot/act_aloha_sim_transfer_cube_human
|
||||
```
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import draccus
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
|
||||
@dataclass
|
||||
class PushPreTrainedConfig:
|
||||
pretrained_path: Path
|
||||
repo_id: str
|
||||
branch: str | None = None
|
||||
private: bool = False
|
||||
exist_ok: bool = False
|
||||
|
||||
|
||||
@draccus.wrap()
|
||||
def main(cfg: PushPreTrainedConfig):
|
||||
hub_api = HfApi()
|
||||
hub_api.create_repo(
|
||||
repo_id=cfg.repo_id,
|
||||
private=cfg.private,
|
||||
repo_type="model",
|
||||
exist_ok=cfg.exist_ok,
|
||||
)
|
||||
if cfg.branch:
|
||||
hub_api.create_branch(
|
||||
repo_id=cfg.repo_id,
|
||||
branch=cfg.branch,
|
||||
repo_type="model",
|
||||
exist_ok=cfg.exist_ok,
|
||||
)
|
||||
|
||||
hub_api.upload_folder(
|
||||
repo_id=cfg.repo_id,
|
||||
folder_path=cfg.pretrained_path,
|
||||
repo_type="model",
|
||||
revision=cfg.branch,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -282,6 +282,9 @@ def train(cfg: TrainPipelineConfig):
|
||||
eval_env.close()
|
||||
logging.info("End of training")
|
||||
|
||||
if cfg.policy.push_to_hub:
|
||||
policy.push_model_to_hub(cfg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
init_logging()
|
||||
|
||||
74
lerobot/templates/lerobot_modelcard_template.md
Normal file
74
lerobot/templates/lerobot_modelcard_template.md
Normal file
@@ -0,0 +1,74 @@
|
||||
---
|
||||
# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
||||
# Doc / guide: https://huggingface.co/docs/hub/model-cards
|
||||
{{ card_data }}
|
||||
---
|
||||
|
||||
# Model Card for {{ model_name | default("Model ID", true) }}
|
||||
|
||||
<!-- Provide a quick summary of what the model is/does. -->
|
||||
|
||||
{% if model_name == "smolvla" %}
|
||||
[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware.
|
||||
{% elif model_name == "act" %}
|
||||
[Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates.
|
||||
{% elif model_name == "tdmpc" %}
|
||||
[TD-MPC](https://huggingface.co/papers/2203.04955) combines model-free and model-based approaches to improve sample efficiency and performance in continuous control tasks by using a learned latent dynamics model and terminal value function.
|
||||
{% elif model_name == "diffusion" %}
|
||||
[Diffusion Policy](https://huggingface.co/papers/2303.04137) treats visuomotor control as a generative diffusion process, producing smooth, multi-step action trajectories that excel at contact-rich manipulation.
|
||||
{% elif model_name == "vqbet" %}
|
||||
[VQ-BET](https://huggingface.co/papers/2403.03181) combines vector-quantised action tokens with Behaviour Transformers to discretise control and achieve data-efficient imitation across diverse skills.
|
||||
{% elif model_name == "pi0" %}
|
||||
[Pi0](https://huggingface.co/papers/2410.24164) is a generalist vision-language-action transformer that converts multimodal observations and text instructions into robot actions for zero-shot task transfer.
|
||||
{% elif model_name == "pi0fast" %}
|
||||
[Pi0-Fast](https://huggingface.co/papers/2501.09747) is a variant of Pi0 that uses a new tokenization method called FAST, which enables training of an autoregressive vision-language-action policy for high-frequency robotic tasks with improved performance and reduced training time.
|
||||
{% elif model_name == "sac" %}
|
||||
[Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) is an entropy-regularised actor-critic algorithm offering stable, sample-efficient learning in continuous-control environments.
|
||||
{% elif model_name == "reward_classifier" %}
|
||||
A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable.
|
||||
{% else %}
|
||||
_Model type not recognized — please update this template._
|
||||
{% endif %}
|
||||
|
||||
This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
|
||||
See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index).
|
||||
|
||||
---
|
||||
|
||||
## How to Get Started with the Model
|
||||
|
||||
For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy).
|
||||
Below is the short version on how to train and run inference/eval:
|
||||
|
||||
### Train from scratch
|
||||
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
--dataset.repo_id=${HF_USER}/<dataset> \
|
||||
--policy.type=act \
|
||||
--output_dir=outputs/train/<desired_policy_repo_id> \
|
||||
--job_name=lerobot_training \
|
||||
--policy.device=cuda \
|
||||
--policy.repo_id=${HF_USER}/<desired_policy_repo_id>
|
||||
--wandb.enable=true
|
||||
```
|
||||
|
||||
*Writes checkpoints to `outputs/train/<desired_policy_repo_id>/checkpoints/`.*
|
||||
|
||||
### Evaluate the policy/run inference
|
||||
|
||||
```bash
|
||||
python -m lerobot.record \
|
||||
--robot.type=so100_follower \
|
||||
--dataset.repo_id=<hf_user>/eval_<dataset> \
|
||||
--policy.path=<hf_user>/<desired_policy_repo_id> \
|
||||
--episodes=10
|
||||
```
|
||||
|
||||
Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint.
|
||||
|
||||
---
|
||||
|
||||
## Model Details
|
||||
|
||||
* **License:** {{ license | default("\[More Information Needed]", true) }}
|
||||
@@ -32,7 +32,7 @@ def get_policy_stats(ds_repo_id: str, policy_name: str, policy_kwargs: dict):
|
||||
train_cfg = TrainPipelineConfig(
|
||||
# TODO(rcadene, aliberts): remove dataset download
|
||||
dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]),
|
||||
policy=make_policy_config(policy_name, **policy_kwargs),
|
||||
policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs),
|
||||
)
|
||||
train_cfg.validate() # Needed for auto-setting some parameters
|
||||
|
||||
|
||||
@@ -338,8 +338,9 @@ def test_factory(env_name, repo_id, policy_name):
|
||||
# TODO(rcadene, aliberts): remove dataset download
|
||||
dataset=DatasetConfig(repo_id=repo_id, episodes=[0]),
|
||||
env=make_env_config(env_name),
|
||||
policy=make_policy_config(policy_name),
|
||||
policy=make_policy_config(policy_name, push_to_hub=False),
|
||||
)
|
||||
cfg.validate()
|
||||
|
||||
dataset = make_dataset(cfg)
|
||||
delta_timestamps = dataset.delta_timestamps
|
||||
|
||||
@@ -142,9 +142,10 @@ def test_policy(ds_repo_id, env_name, env_kwargs, policy_name, policy_kwargs):
|
||||
train_cfg = TrainPipelineConfig(
|
||||
# TODO(rcadene, aliberts): remove dataset download
|
||||
dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]),
|
||||
policy=make_policy_config(policy_name, **policy_kwargs),
|
||||
policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs),
|
||||
env=make_env_config(env_name, **env_kwargs),
|
||||
)
|
||||
train_cfg.validate()
|
||||
|
||||
# Check that we can make the policy object.
|
||||
dataset = make_dataset(train_cfg)
|
||||
@@ -213,7 +214,7 @@ def test_act_backbone_lr():
|
||||
cfg = TrainPipelineConfig(
|
||||
# TODO(rcadene, aliberts): remove dataset download
|
||||
dataset=DatasetConfig(repo_id="lerobot/aloha_sim_insertion_scripted", episodes=[0]),
|
||||
policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001),
|
||||
policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001, push_to_hub=False),
|
||||
)
|
||||
cfg.validate() # Needed for auto-setting some parameters
|
||||
|
||||
@@ -415,6 +416,7 @@ def test_backward_compatibility(ds_repo_id: str, policy_name: str, policy_kwargs
|
||||
https://github.com/huggingface/lerobot/pull/1127.
|
||||
|
||||
"""
|
||||
|
||||
# NOTE: ACT policy has different randomness, after PyTorch 2.7.0
|
||||
if policy_name == "act" and version.parse(torch.__version__) < version.parse("2.7.0"):
|
||||
pytest.skip(f"Skipping act policy test with PyTorch {torch.__version__}. Requires PyTorch >= 2.7.0")
|
||||
|
||||
Reference in New Issue
Block a user