Add API Examples (#2289)

* (unscrewing things up) (#2288) * fix: expose a function explicitly building a frame for inference * fix: first make dataset frame, then make ready for inference * fix: reducing reliance on lerobot record for policy's ouptuts too * fix: encapsulating squeezing out + device handling from predict action * fix: remove duplicated call to build_inference_frame and add a function to only perform data type handling (whole conversion is: keys matching + data type conversion) * refactor(envs): add custom-observation-size (#2167) * fix: add MockMotorBus to MockRobot * rl: first drafts * add: all components of HIL SERL * fix: actor block works * fix: less friction, less friction * add: hil-serl complete example * fix: dataset names * fix: restructuring example folder * fix: act works but found bug in how ACT works * fix: same path for both pre and postprocessors * fix: paths * add: example usage for act * add: using ACT example * fix: training examples * fix: using examples * fix: camera index * fix: rename workflows into tutorial so that the path of the files is lerobot/examples/tutorial/... * fix: upload everything in one repo * fix: model name * fix: simplify model path * add: VLAs example --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: minor fix using named attributes * fix: change model to act * fix: named attributes for inference frame building * fix: minor fixes to smolvla * fix: small changes to pi0 * remove: old file that should have never been committed (ups sorry sorry) --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com>
2025-10-23 14:18:13 +02:00
parent be46bdea8f
commit 326aca0a48
10 changed files with 920 additions and 0 deletions
--- a/examples/tutorial/diffusion/diffusion_training_example.py
+++ b/examples/tutorial/diffusion/diffusion_training_example.py
@@ -0,0 +1,99 @@
+"""This script demonstrates how to train Diffusion Policy on a real-world dataset."""
+
+from pathlib import Path
+
+import torch
+
+from lerobot.configs.types import FeatureType
+from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.datasets.utils import dataset_to_policy_features
+from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
+from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
+from lerobot.policies.factory import make_pre_post_processors
+
+
+def make_delta_timestamps(delta_indices: list[int] | None, fps: int) -> list[float]:
+    if delta_indices is None:
+        return [0]
+
+    return [i / fps for i in delta_indices]
+
+
+output_directory = Path("outputs/robot_learning_tutorial/diffusion")
+output_directory.mkdir(parents=True, exist_ok=True)
+
+# Select your device
+device = torch.device("mps")  # or "cuda" or "cpu"
+
+dataset_id = "lerobot/svla_so101_pickplace"
+
+# This specifies the inputs the model will be expecting and the outputs it will produce
+dataset_metadata = LeRobotDatasetMetadata(dataset_id)
+features = dataset_to_policy_features(dataset_metadata.features)
+
+output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
+input_features = {key: ft for key, ft in features.items() if key not in output_features}
+
+cfg = DiffusionConfig(input_features=input_features, output_features=output_features)
+policy = DiffusionPolicy(cfg)
+preprocessor, postprocessor = make_pre_post_processors(cfg, dataset_stats=dataset_metadata.stats)
+
+policy.train()
+policy.to(device)
+
+# To perform action chunking, ACT expects a given number of actions as targets
+delta_timestamps = {
+    "observation.state": make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps),
+    "action": make_delta_timestamps(cfg.action_delta_indices, dataset_metadata.fps),
+}
+
+# add image features if they are present
+delta_timestamps |= {
+    k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps) for k in cfg.image_features
+}
+
+# Instantiate the dataset
+dataset = LeRobotDataset(dataset_id, delta_timestamps=delta_timestamps)
+
+# Create the optimizer and dataloader for offline training
+optimizer = cfg.get_optimizer_preset().build(policy.parameters())
+batch_size = 32
+dataloader = torch.utils.data.DataLoader(
+    dataset,
+    batch_size=batch_size,
+    shuffle=True,
+    pin_memory=device.type != "cpu",
+    drop_last=True,
+)
+
+# Number of training steps and logging frequency
+training_steps = 1
+log_freq = 1
+
+# Run training loop
+step = 0
+done = False
+while not done:
+    for batch in dataloader:
+        batch = preprocessor(batch)
+        loss, _ = policy.forward(batch)
+        loss.backward()
+        optimizer.step()
+        optimizer.zero_grad()
+
+        if step % log_freq == 0:
+            print(f"step: {step} loss: {loss.item():.3f}")
+        step += 1
+        if step >= training_steps:
+            done = True
+            break
+
+# Save the policy checkpoint, alongside the pre/post processors
+policy.save_pretrained(output_directory)
+preprocessor.save_pretrained(output_directory)
+postprocessor.save_pretrained(output_directory)
+
+# Save all assets to the Hub
+policy.push_to_hub("fracapuano/robot_learning_tutorial_diffusion")
+preprocessor.push_to_hub("fracapuano/robot_learning_tutorial_diffusion")
+postprocessor.push_to_hub("fracapuano/robot_learning_tutorial_diffusion")
--- a/examples/tutorial/diffusion/diffusion_using_example.py
+++ b/examples/tutorial/diffusion/diffusion_using_example.py
@@ -0,0 +1,60 @@
+import torch
+
+from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
+from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
+from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.utils import build_inference_frame, make_robot_action
+from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig
+from lerobot.robots.so100_follower.so100_follower import SO100Follower
+
+device = torch.device("mps")  # or "cuda" or "cpu"
+model_id = "fracapuano/robot_learning_tutorial_diffusion"
+
+model = DiffusionPolicy.from_pretrained(model_id)
+
+dataset_id = "lerobot/svla_so101_pickplace"
+# This only downloads the metadata for the dataset, ~10s of MB even for large-scale datasets
+dataset_metadata = LeRobotDatasetMetadata(dataset_id)
+preprocess, postprocess = make_pre_post_processors(
+    model.config, model_id, dataset_stats=dataset_metadata.stats
+)
+
+MAX_EPISODES = 5
+MAX_STEPS_PER_EPISODE = 20
+
+
+# # find ports using lerobot-find-port
+follower_port = ...  # something like "/dev/tty.usbmodem58760431631"
+
+# # the robot ids are used the load the right calibration files
+follower_id = ...  # something like "follower_so100"
+
+# Robot and environment configuration
+# Camera keys must match the name and resolutions of the ones used for training!
+# You can check the camera keys expected by a model in the info.json card on the model card on the Hub
+camera_config = {
+    "side": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30),
+    "up": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30),
+}
+
+robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config)
+robot = SO100Follower(robot_cfg)
+robot.connect()
+
+
+for _ in range(MAX_EPISODES):
+    for _ in range(MAX_STEPS_PER_EPISODE):
+        obs = robot.get_observation()
+        obs_frame = build_inference_frame(
+            observation=obs, ds_features=dataset_metadata.features, device=device
+        )
+
+        obs = preprocess(obs_frame)
+
+        action = model.select_action(obs)
+        action = postprocess(action)
+        action = make_robot_action(action, dataset_metadata.features)
+        robot.send_action(action)
+
+    print("Episode finished! Starting new episode...")