* (unscrewing things up) (#2288) * fix: expose a function explicitly building a frame for inference * fix: first make dataset frame, then make ready for inference * fix: reducing reliance on lerobot record for policy's ouptuts too * fix: encapsulating squeezing out + device handling from predict action * fix: remove duplicated call to build_inference_frame and add a function to only perform data type handling (whole conversion is: keys matching + data type conversion) * refactor(envs): add custom-observation-size (#2167) * fix: add MockMotorBus to MockRobot * rl: first drafts * add: all components of HIL SERL * fix: actor block works * fix: less friction, less friction * add: hil-serl complete example * fix: dataset names * fix: restructuring example folder * fix: act works but found bug in how ACT works * fix: same path for both pre and postprocessors * fix: paths * add: example usage for act * add: using ACT example * fix: training examples * fix: using examples * fix: camera index * fix: rename workflows into tutorial so that the path of the files is lerobot/examples/tutorial/... * fix: upload everything in one repo * fix: model name * fix: simplify model path * add: VLAs example --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: minor fix using named attributes * fix: change model to act * fix: named attributes for inference frame building * fix: minor fixes to smolvla * fix: small changes to pi0 * remove: old file that should have never been committed (ups sorry sorry) --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com>
63 lines
1.9 KiB
Python
63 lines
1.9 KiB
Python
import torch
|
|
|
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
|
from lerobot.policies.factory import make_policy, make_pre_post_processors
|
|
from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig
|
|
|
|
# Device to use for training
|
|
device = "mps" # or "cuda", or "cpu"
|
|
|
|
# Load the dataset used for training
|
|
repo_id = "lerobot/example_hil_serl_dataset"
|
|
dataset = LeRobotDataset(repo_id)
|
|
|
|
# Configure the policy to extract features from the image frames
|
|
camera_keys = dataset.meta.camera_keys
|
|
|
|
config = RewardClassifierConfig(
|
|
num_cameras=len(camera_keys),
|
|
device=device,
|
|
# backbone model to extract features from the image frames
|
|
model_name="microsoft/resnet-18",
|
|
)
|
|
|
|
# Make policy, preprocessor, and optimizer
|
|
policy = make_policy(config, ds_meta=dataset.meta)
|
|
optimizer = config.get_optimizer_preset().build(policy.parameters())
|
|
preprocessor, _ = make_pre_post_processors(policy_cfg=config, dataset_stats=dataset.meta.stats)
|
|
|
|
|
|
classifier_id = "fracapuano/reward_classifier_hil_serl_example"
|
|
|
|
# Instantiate a dataloader
|
|
dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)
|
|
|
|
# Training loop
|
|
num_epochs = 5
|
|
for epoch in range(num_epochs):
|
|
total_loss = 0
|
|
total_accuracy = 0
|
|
for batch in dataloader:
|
|
# Preprocess the batch and move it to the correct device.
|
|
batch = preprocessor(batch)
|
|
|
|
# Forward pass
|
|
loss, output_dict = policy.forward(batch)
|
|
|
|
# Backward pass and optimization
|
|
optimizer.zero_grad()
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
total_loss += loss.item()
|
|
total_accuracy += output_dict["accuracy"]
|
|
|
|
avg_loss = total_loss / len(dataloader)
|
|
avg_accuracy = total_accuracy / len(dataloader)
|
|
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.2f}%")
|
|
|
|
print("Training finished!")
|
|
|
|
# You can now save the trained policy.
|
|
policy.push_to_hub(classifier_id)
|