diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..d2608ca0 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include lerobot/templates/lerobot_modelcard_template.md +include lerobot/common/datasets/card_template.md diff --git a/Makefile b/Makefile index c82483cc..9457dbe6 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,8 @@ test-end-to-end: ${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval ${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train ${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval + ${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-train + ${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval test-act-ete-train: python lerobot/scripts/train.py \ @@ -48,6 +50,7 @@ test-act-ete-train: --policy.n_action_steps=20 \ --policy.chunk_size=20 \ --policy.device=$(DEVICE) \ + --policy.push_to_hub=false \ --env.type=aloha \ --env.episode_length=5 \ --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \ @@ -85,6 +88,7 @@ test-diffusion-ete-train: --policy.diffusion_step_embed_dim=32 \ --policy.num_inference_steps=10 \ --policy.device=$(DEVICE) \ + --policy.push_to_hub=false \ --env.type=pusht \ --env.episode_length=5 \ --dataset.repo_id=lerobot/pusht \ @@ -114,6 +118,7 @@ test-tdmpc-ete-train: python lerobot/scripts/train.py \ --policy.type=tdmpc \ --policy.device=$(DEVICE) \ + --policy.push_to_hub=false \ --env.type=xarm \ --env.task=XarmLift-v0 \ --env.episode_length=5 \ @@ -140,3 +145,36 @@ test-tdmpc-ete-eval: --env.task=XarmLift-v0 \ --eval.n_episodes=1 \ --eval.batch_size=1 + + +test-smolvla-ete-train: + python lerobot/scripts/train.py \ + --policy.type=smolvla \ + --policy.n_action_steps=20 \ + --policy.chunk_size=20 \ + --policy.device=$(DEVICE) \ + --policy.push_to_hub=false \ + --env.type=aloha \ + --env.episode_length=5 \ + --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \ + --dataset.image_transforms.enable=true \ + --dataset.episodes="[0]" \ + --batch_size=2 \ + --steps=4 \ + --eval_freq=2 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 \ + --save_freq=2 \ + --save_checkpoint=true \ + --log_freq=1 \ + --wandb.enable=false \ + --output_dir=tests/outputs/smolvla/ + +test-smolvla-ete-eval: + python lerobot/scripts/eval.py \ + --policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \ + --policy.device=$(DEVICE) \ + --env.type=aloha \ + --env.episode_length=5 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx index f3b4b1a2..fb99797e 100644 --- a/docs/source/il_robots.mdx +++ b/docs/source/il_robots.mdx @@ -255,7 +255,8 @@ python lerobot/scripts/train.py \ --output_dir=outputs/train/act_so101_test \ --job_name=act_so101_test \ --policy.device=cuda \ - --wandb.enable=true + --wandb.enable=true \ + --policy.repo_id=${HF_USER}/my_policy ``` Let's explain the command: @@ -273,6 +274,10 @@ python lerobot/scripts/train.py \ --resume=true ``` +If you do not want to push your model to the hub after training use `--policy.push_to_hub=false`. + +Additionally you can provide extra `tags` or specify a `license` for your model or make the model repo `private` by adding this: `--policy.private=true --policy.tags=\[ppo,rl\] --policy.license=mit` + #### Train using Collab If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act). diff --git a/lerobot/common/policies/pretrained.py b/lerobot/common/policies/pretrained.py index da4ef157..58eef9ba 100644 --- a/lerobot/common/policies/pretrained.py +++ b/lerobot/common/policies/pretrained.py @@ -14,12 +14,14 @@ import abc import logging import os +from importlib.resources import files from pathlib import Path -from typing import Type, TypeVar +from tempfile import TemporaryDirectory +from typing import List, Type, TypeVar import packaging import safetensors -from huggingface_hub import hf_hub_download +from huggingface_hub import HfApi, ModelCard, ModelCardData, hf_hub_download from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE from huggingface_hub.errors import HfHubHTTPError from safetensors.torch import load_model as load_model_as_safetensor @@ -28,20 +30,10 @@ from torch import Tensor, nn from lerobot.common.utils.hub import HubMixin from lerobot.configs.policies import PreTrainedConfig +from lerobot.configs.train import TrainPipelineConfig T = TypeVar("T", bound="PreTrainedPolicy") -DEFAULT_POLICY_CARD = """ ---- -# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 -# Doc / guide: https://huggingface.co/docs/hub/model-cards -{{ card_data }} ---- - -This policy has been pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot): -- Docs: {{ docs_url | default("[More Information Needed]", true) }} -""" - class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC): """ @@ -150,16 +142,6 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC): safetensors.torch.load_model(model, model_file, strict=strict, device=map_location) return model - # def generate_model_card(self, *args, **kwargs) -> ModelCard: - # card = ModelCard.from_template( - # card_data=self._hub_mixin_info.model_card_data, - # template_str=self._hub_mixin_info.model_card_template, - # repo_url=self._hub_mixin_info.repo_url, - # docs_url=self._hub_mixin_info.docs_url, - # **kwargs, - # ) - # return card - @abc.abstractmethod def get_optim_params(self) -> dict: """ @@ -197,3 +179,56 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC): with caching. """ raise NotImplementedError + + def push_model_to_hub( + self, + cfg: TrainPipelineConfig, + ): + api = HfApi() + repo_id = api.create_repo( + repo_id=self.config.repo_id, private=self.config.private, exist_ok=True + ).repo_id + + # Push the files to the repo in a single commit + with TemporaryDirectory(ignore_cleanup_errors=True) as tmp: + saved_path = Path(tmp) / repo_id + + self.save_pretrained(saved_path) # Calls _save_pretrained and stores model tensors + + card = self.generate_model_card( + cfg.dataset.repo_id, self.config.type, self.config.license, self.config.tags + ) + card.save(str(saved_path / "README.md")) + + cfg.save_pretrained(saved_path) # Calls _save_pretrained and stores train config + + commit_info = api.upload_folder( + repo_id=repo_id, + repo_type="model", + folder_path=saved_path, + commit_message="Upload policy weights, train config and readme", + allow_patterns=["*.safetensors", "*.json", "*.yaml", "*.md"], + ignore_patterns=["*.tmp", "*.log"], + ) + + logging.info(f"Model pushed to {commit_info.repo_url.url}") + + def generate_model_card( + self, dataset_repo_id: str, model_type: str, license: str | None, tags: List[str] | None + ) -> ModelCard: + base_model = "lerobot/smolvla_base" if model_type == "smolvla" else None # Set a base model + + card_data = ModelCardData( + license=license or "apache-2.0", + library_name="lerobot", + pipeline_tag="robotics", + tags=list(set(tags or []).union({"robotics", "lerobot", model_type})), + model_name=model_type, + datasets=dataset_repo_id, + base_model=base_model, + ) + + template_card = files("lerobot.templates").joinpath("lerobot_modelcard_template.md").read_text() + card = ModelCard.from_template(card_data, template_str=template_card) + card.validate() + return card diff --git a/lerobot/configs/policies.py b/lerobot/configs/policies.py index 1302db1f..9e7f3dd5 100644 --- a/lerobot/configs/policies.py +++ b/lerobot/configs/policies.py @@ -60,6 +60,16 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): # automatic gradient scaling is used. use_amp: bool = False + push_to_hub: bool = True + repo_id: str | None = None + + # Upload on private repository on the Hugging Face hub. + private: bool | None = None + # Add tags to your policy on the hub. + tags: list[str] | None = None + # Add tags to your policy on the hub. + license: str | None = None + def __post_init__(self): self.pretrained_path = None if not self.device or not is_torch_device_available(self.device): diff --git a/lerobot/configs/train.py b/lerobot/configs/train.py index 96a460bd..377fb8a9 100644 --- a/lerobot/configs/train.py +++ b/lerobot/configs/train.py @@ -116,6 +116,11 @@ class TrainPipelineConfig(HubMixin): self.optimizer = self.policy.get_optimizer_preset() self.scheduler = self.policy.get_scheduler_preset() + if self.policy.push_to_hub and not self.policy.repo_id: + raise ValueError( + "'policy.repo_id' argument missing. Please specify it to push the model to the hub." + ) + @classmethod def __get_path_fields__(cls) -> list[str]: """This enables the parser to load config from the policy using `--policy.path=local/dir`""" diff --git a/lerobot/record.py b/lerobot/record.py index 2f443c20..766cfb35 100644 --- a/lerobot/record.py +++ b/lerobot/record.py @@ -65,7 +65,10 @@ from lerobot.common.robots import ( # noqa: F401 from lerobot.common.teleoperators import ( # noqa: F401 Teleoperator, TeleoperatorConfig, + koch_leader, make_teleoperator_from_config, + so100_leader, + so101_leader, ) from lerobot.common.utils.control_utils import ( init_keyboard_listener, @@ -84,8 +87,6 @@ from lerobot.common.utils.visualization_utils import _init_rerun from lerobot.configs import parser from lerobot.configs.policies import PreTrainedConfig -from .common.teleoperators import koch_leader, so100_leader, so101_leader # noqa: F401 - @dataclass class DatasetRecordConfig: diff --git a/lerobot/scripts/push_pretrained.py b/lerobot/scripts/push_pretrained.py deleted file mode 100644 index e3c683f9..00000000 --- a/lerobot/scripts/push_pretrained.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Once you have trained a policy with our training script (lerobot/scripts/train.py), use this script to push it -to the hub. - -Example: - -```bash -python lerobot/scripts/push_pretrained.py \ - --pretrained_path=outputs/train/act_aloha_sim_transfer_cube_human/checkpoints/last/pretrained_model \ - --repo_id=lerobot/act_aloha_sim_transfer_cube_human -``` -""" - -from dataclasses import dataclass -from pathlib import Path - -import draccus -from huggingface_hub import HfApi - - -@dataclass -class PushPreTrainedConfig: - pretrained_path: Path - repo_id: str - branch: str | None = None - private: bool = False - exist_ok: bool = False - - -@draccus.wrap() -def main(cfg: PushPreTrainedConfig): - hub_api = HfApi() - hub_api.create_repo( - repo_id=cfg.repo_id, - private=cfg.private, - repo_type="model", - exist_ok=cfg.exist_ok, - ) - if cfg.branch: - hub_api.create_branch( - repo_id=cfg.repo_id, - branch=cfg.branch, - repo_type="model", - exist_ok=cfg.exist_ok, - ) - - hub_api.upload_folder( - repo_id=cfg.repo_id, - folder_path=cfg.pretrained_path, - repo_type="model", - revision=cfg.branch, - ) - - -if __name__ == "__main__": - main() diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 0de247be..bdb17dae 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -282,6 +282,9 @@ def train(cfg: TrainPipelineConfig): eval_env.close() logging.info("End of training") + if cfg.policy.push_to_hub: + policy.push_model_to_hub(cfg) + if __name__ == "__main__": init_logging() diff --git a/lerobot/templates/lerobot_modelcard_template.md b/lerobot/templates/lerobot_modelcard_template.md new file mode 100644 index 00000000..ca5c182a --- /dev/null +++ b/lerobot/templates/lerobot_modelcard_template.md @@ -0,0 +1,74 @@ +--- +# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 +# Doc / guide: https://huggingface.co/docs/hub/model-cards +{{ card_data }} +--- + +# Model Card for {{ model_name | default("Model ID", true) }} + + + +{% if model_name == "smolvla" %} +[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware. +{% elif model_name == "act" %} +[Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates. +{% elif model_name == "tdmpc" %} +[TD-MPC](https://huggingface.co/papers/2203.04955) combines model-free and model-based approaches to improve sample efficiency and performance in continuous control tasks by using a learned latent dynamics model and terminal value function. +{% elif model_name == "diffusion" %} +[Diffusion Policy](https://huggingface.co/papers/2303.04137) treats visuomotor control as a generative diffusion process, producing smooth, multi-step action trajectories that excel at contact-rich manipulation. +{% elif model_name == "vqbet" %} +[VQ-BET](https://huggingface.co/papers/2403.03181) combines vector-quantised action tokens with Behaviour Transformers to discretise control and achieve data-efficient imitation across diverse skills. +{% elif model_name == "pi0" %} +[Pi0](https://huggingface.co/papers/2410.24164) is a generalist vision-language-action transformer that converts multimodal observations and text instructions into robot actions for zero-shot task transfer. +{% elif model_name == "pi0fast" %} +[Pi0-Fast](https://huggingface.co/papers/2501.09747) is a variant of Pi0 that uses a new tokenization method called FAST, which enables training of an autoregressive vision-language-action policy for high-frequency robotic tasks with improved performance and reduced training time. +{% elif model_name == "sac" %} +[Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) is an entropy-regularised actor-critic algorithm offering stable, sample-efficient learning in continuous-control environments. +{% elif model_name == "reward_classifier" %} +A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable. +{% else %} +_Model type not recognized — please update this template._ +{% endif %} + +This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot). +See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index). + +--- + +## How to Get Started with the Model + +For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy). +Below is the short version on how to train and run inference/eval: + +### Train from scratch + +```bash +python lerobot/scripts/train.py \ + --dataset.repo_id=${HF_USER}/ \ + --policy.type=act \ + --output_dir=outputs/train/ \ + --job_name=lerobot_training \ + --policy.device=cuda \ + --policy.repo_id=${HF_USER}/ + --wandb.enable=true +``` + +*Writes checkpoints to `outputs/train//checkpoints/`.* + +### Evaluate the policy/run inference + +```bash +python -m lerobot.record \ + --robot.type=so100_follower \ + --dataset.repo_id=/eval_ \ + --policy.path=/ \ + --episodes=10 +``` + +Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint. + +--- + +## Model Details + +* **License:** {{ license | default("\[More Information Needed]", true) }} diff --git a/tests/artifacts/policies/save_policy_to_safetensors.py b/tests/artifacts/policies/save_policy_to_safetensors.py index 106f0dc0..785f296c 100644 --- a/tests/artifacts/policies/save_policy_to_safetensors.py +++ b/tests/artifacts/policies/save_policy_to_safetensors.py @@ -32,7 +32,7 @@ def get_policy_stats(ds_repo_id: str, policy_name: str, policy_kwargs: dict): train_cfg = TrainPipelineConfig( # TODO(rcadene, aliberts): remove dataset download dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]), - policy=make_policy_config(policy_name, **policy_kwargs), + policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs), ) train_cfg.validate() # Needed for auto-setting some parameters diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py index 55a417c3..b4fca77c 100644 --- a/tests/datasets/test_datasets.py +++ b/tests/datasets/test_datasets.py @@ -338,8 +338,9 @@ def test_factory(env_name, repo_id, policy_name): # TODO(rcadene, aliberts): remove dataset download dataset=DatasetConfig(repo_id=repo_id, episodes=[0]), env=make_env_config(env_name), - policy=make_policy_config(policy_name), + policy=make_policy_config(policy_name, push_to_hub=False), ) + cfg.validate() dataset = make_dataset(cfg) delta_timestamps = dataset.delta_timestamps diff --git a/tests/policies/test_policies.py b/tests/policies/test_policies.py index dff5975a..1b40c663 100644 --- a/tests/policies/test_policies.py +++ b/tests/policies/test_policies.py @@ -142,9 +142,10 @@ def test_policy(ds_repo_id, env_name, env_kwargs, policy_name, policy_kwargs): train_cfg = TrainPipelineConfig( # TODO(rcadene, aliberts): remove dataset download dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]), - policy=make_policy_config(policy_name, **policy_kwargs), + policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs), env=make_env_config(env_name, **env_kwargs), ) + train_cfg.validate() # Check that we can make the policy object. dataset = make_dataset(train_cfg) @@ -213,7 +214,7 @@ def test_act_backbone_lr(): cfg = TrainPipelineConfig( # TODO(rcadene, aliberts): remove dataset download dataset=DatasetConfig(repo_id="lerobot/aloha_sim_insertion_scripted", episodes=[0]), - policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001), + policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001, push_to_hub=False), ) cfg.validate() # Needed for auto-setting some parameters @@ -415,6 +416,7 @@ def test_backward_compatibility(ds_repo_id: str, policy_name: str, policy_kwargs https://github.com/huggingface/lerobot/pull/1127. """ + # NOTE: ACT policy has different randomness, after PyTorch 2.7.0 if policy_name == "act" and version.parse(torch.__version__) < version.parse("2.7.0"): pytest.skip(f"Skipping act policy test with PyTorch {torch.__version__}. Requires PyTorch >= 2.7.0")