* (unscrewing things up) (#2288) * fix: expose a function explicitly building a frame for inference * fix: first make dataset frame, then make ready for inference * fix: reducing reliance on lerobot record for policy's ouptuts too * fix: encapsulating squeezing out + device handling from predict action * fix: remove duplicated call to build_inference_frame and add a function to only perform data type handling (whole conversion is: keys matching + data type conversion) * refactor(envs): add custom-observation-size (#2167) * fix: add MockMotorBus to MockRobot * rl: first drafts * add: all components of HIL SERL * fix: actor block works * fix: less friction, less friction * add: hil-serl complete example * fix: dataset names * fix: restructuring example folder * fix: act works but found bug in how ACT works * fix: same path for both pre and postprocessors * fix: paths * add: example usage for act * add: using ACT example * fix: training examples * fix: using examples * fix: camera index * fix: rename workflows into tutorial so that the path of the files is lerobot/examples/tutorial/... * fix: upload everything in one repo * fix: model name * fix: simplify model path * add: VLAs example --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: minor fix using named attributes * fix: change model to act * fix: named attributes for inference frame building * fix: minor fixes to smolvla * fix: small changes to pi0 * remove: old file that should have never been committed (ups sorry sorry) --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com>
99 lines
3.2 KiB
Python
99 lines
3.2 KiB
Python
"""This script demonstrates how to train ACT Policy on a real-world dataset."""
|
|
|
|
from pathlib import Path
|
|
|
|
import torch
|
|
|
|
from lerobot.configs.types import FeatureType
|
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
|
from lerobot.datasets.utils import dataset_to_policy_features
|
|
from lerobot.policies.act.configuration_act import ACTConfig
|
|
from lerobot.policies.act.modeling_act import ACTPolicy
|
|
from lerobot.policies.factory import make_pre_post_processors
|
|
|
|
|
|
def make_delta_timestamps(delta_indices: list[int] | None, fps: int) -> list[float]:
|
|
if delta_indices is None:
|
|
return [0]
|
|
|
|
return [i / fps for i in delta_indices]
|
|
|
|
|
|
output_directory = Path("outputs/robot_learning_tutorial/act")
|
|
output_directory.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Select your device
|
|
device = torch.device("mps") # or "cuda" or "cpu"
|
|
|
|
dataset_id = "lerobot/svla_so101_pickplace"
|
|
|
|
# This specifies the inputs the model will be expecting and the outputs it will produce
|
|
dataset_metadata = LeRobotDatasetMetadata(dataset_id)
|
|
features = dataset_to_policy_features(dataset_metadata.features)
|
|
|
|
output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
|
|
input_features = {key: ft for key, ft in features.items() if key not in output_features}
|
|
|
|
cfg = ACTConfig(input_features=input_features, output_features=output_features)
|
|
policy = ACTPolicy(cfg)
|
|
preprocessor, postprocessor = make_pre_post_processors(cfg, dataset_stats=dataset_metadata.stats)
|
|
|
|
policy.train()
|
|
policy.to(device)
|
|
|
|
# To perform action chunking, ACT expects a given number of actions as targets
|
|
delta_timestamps = {
|
|
"action": make_delta_timestamps(cfg.action_delta_indices, dataset_metadata.fps),
|
|
}
|
|
|
|
# add image features if they are present
|
|
delta_timestamps |= {
|
|
k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps) for k in cfg.image_features
|
|
}
|
|
|
|
# Instantiate the dataset
|
|
dataset = LeRobotDataset(dataset_id, delta_timestamps=delta_timestamps)
|
|
|
|
# Create the optimizer and dataloader for offline training
|
|
optimizer = cfg.get_optimizer_preset().build(policy.parameters())
|
|
batch_size = 32
|
|
dataloader = torch.utils.data.DataLoader(
|
|
dataset,
|
|
batch_size=batch_size,
|
|
shuffle=True,
|
|
pin_memory=device.type != "cpu",
|
|
drop_last=True,
|
|
)
|
|
|
|
# Number of training steps and logging frequency
|
|
training_steps = 1
|
|
log_freq = 1
|
|
|
|
# Run training loop
|
|
step = 0
|
|
done = False
|
|
while not done:
|
|
for batch in dataloader:
|
|
batch = preprocessor(batch)
|
|
loss, _ = policy.forward(batch)
|
|
loss.backward()
|
|
optimizer.step()
|
|
optimizer.zero_grad()
|
|
|
|
if step % log_freq == 0:
|
|
print(f"step: {step} loss: {loss.item():.3f}")
|
|
step += 1
|
|
if step >= training_steps:
|
|
done = True
|
|
break
|
|
|
|
# Save the policy checkpoint, alongside the pre/post processors
|
|
policy.save_pretrained(output_directory)
|
|
preprocessor.save_pretrained(output_directory)
|
|
postprocessor.save_pretrained(output_directory)
|
|
|
|
# Save all assets to the Hub
|
|
policy.push_to_hub("fracapuano/robot_learning_tutorial_act")
|
|
preprocessor.push_to_hub("fracapuano/robot_learning_tutorial_act")
|
|
postprocessor.push_to_hub("fracapuano/robot_learning_tutorial_act")
|