Add docs for LeRobot Image transforms (#1972)
* Remove unused scripts, add docs for image transforms and add example * fix(examples): move train_policy.py under examples, remove outdated readme parts * remove script thats copied to train folder * remove outdated links to examples and example tests
This commit is contained in:
@@ -1,139 +0,0 @@
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This script demonstrates how to evaluate a pretrained policy from the HuggingFace Hub or from your local
|
||||
training outputs directory. In the latter case, you might want to run examples/3_train_policy.py first.
|
||||
|
||||
It requires the installation of the 'gym_pusht' simulation environment. Install it by running:
|
||||
```bash
|
||||
pip install -e ".[pusht]"
|
||||
```
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import gym_pusht # noqa: F401
|
||||
import gymnasium as gym
|
||||
import imageio
|
||||
import numpy
|
||||
import torch
|
||||
|
||||
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
|
||||
# Create a directory to store the video of the evaluation
|
||||
output_directory = Path("outputs/eval/example_pusht_diffusion")
|
||||
output_directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Select your device
|
||||
device = "cuda"
|
||||
|
||||
# Provide the [hugging face repo id](https://huggingface.co/lerobot/diffusion_pusht):
|
||||
pretrained_policy_path = "lerobot/diffusion_pusht"
|
||||
# OR a path to a local outputs/train folder.
|
||||
# pretrained_policy_path = Path("outputs/train/example_pusht_diffusion")
|
||||
|
||||
policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)
|
||||
|
||||
# Initialize evaluation environment to render two observation types:
|
||||
# an image of the scene and state/position of the agent. The environment
|
||||
# also automatically stops running after 300 interactions/steps.
|
||||
env = gym.make(
|
||||
"gym_pusht/PushT-v0",
|
||||
obs_type="pixels_agent_pos",
|
||||
max_episode_steps=300,
|
||||
)
|
||||
|
||||
# We can verify that the shapes of the features expected by the policy match the ones from the observations
|
||||
# produced by the environment
|
||||
print(policy.config.input_features)
|
||||
print(env.observation_space)
|
||||
|
||||
# Similarly, we can check that the actions produced by the policy will match the actions expected by the
|
||||
# environment
|
||||
print(policy.config.output_features)
|
||||
print(env.action_space)
|
||||
|
||||
# Reset the policy and environments to prepare for rollout
|
||||
policy.reset()
|
||||
numpy_observation, info = env.reset(seed=42)
|
||||
|
||||
# Prepare to collect every rewards and all the frames of the episode,
|
||||
# from initial state to final state.
|
||||
rewards = []
|
||||
frames = []
|
||||
|
||||
# Render frame of the initial state
|
||||
frames.append(env.render())
|
||||
|
||||
step = 0
|
||||
done = False
|
||||
while not done:
|
||||
# Prepare observation for the policy running in Pytorch
|
||||
state = torch.from_numpy(numpy_observation["agent_pos"])
|
||||
image = torch.from_numpy(numpy_observation["pixels"])
|
||||
|
||||
# Convert to float32 with image from channel first in [0,255]
|
||||
# to channel last in [0,1]
|
||||
state = state.to(torch.float32)
|
||||
image = image.to(torch.float32) / 255
|
||||
image = image.permute(2, 0, 1)
|
||||
|
||||
# Send data tensors from CPU to GPU
|
||||
state = state.to(device, non_blocking=True)
|
||||
image = image.to(device, non_blocking=True)
|
||||
|
||||
# Add extra (empty) batch dimension, required to forward the policy
|
||||
state = state.unsqueeze(0)
|
||||
image = image.unsqueeze(0)
|
||||
|
||||
# Create the policy input dictionary
|
||||
observation = {
|
||||
"observation.state": state,
|
||||
"observation.image": image,
|
||||
}
|
||||
|
||||
# Predict the next action with respect to the current observation
|
||||
with torch.inference_mode():
|
||||
action = policy.select_action(observation)
|
||||
|
||||
# Prepare the action for the environment
|
||||
numpy_action = action.squeeze(0).to("cpu").numpy()
|
||||
|
||||
# Step through the environment and receive a new observation
|
||||
numpy_observation, reward, terminated, truncated, info = env.step(numpy_action)
|
||||
print(f"{step=} {reward=} {terminated=}")
|
||||
|
||||
# Keep track of all the rewards and frames
|
||||
rewards.append(reward)
|
||||
frames.append(env.render())
|
||||
|
||||
# The rollout is considered done when the success state is reached (i.e. terminated is True),
|
||||
# or the maximum number of iterations is reached (i.e. truncated is True)
|
||||
done = terminated | truncated | done
|
||||
step += 1
|
||||
|
||||
if terminated:
|
||||
print("Success!")
|
||||
else:
|
||||
print("Failure!")
|
||||
|
||||
# Get the speed of environment (i.e. its number of frames per second).
|
||||
fps = env.metadata["render_fps"]
|
||||
|
||||
# Encode all frames into a mp4 video.
|
||||
video_path = output_directory / "rollout.mp4"
|
||||
imageio.mimsave(str(video_path), numpy.stack(frames), fps=fps)
|
||||
|
||||
print(f"Video of the evaluation is available in '{video_path}'.")
|
||||
@@ -1,311 +0,0 @@
|
||||
This tutorial will explain the training script, how to use it, and particularly how to configure everything needed for the training run.
|
||||
|
||||
> **Note:** The following assumes you're running these commands on a machine equipped with a cuda GPU. If you don't have one (or if you're using a Mac), you can add `--policy.device=cpu` (`--policy.device=mps` respectively). However, be advised that the code executes much slower on cpu.
|
||||
|
||||
## The training script
|
||||
|
||||
LeRobot offers a training script at [`lerobot/scripts/train.py`](../src/lerobot/scripts/train.py). At a high level it does the following:
|
||||
|
||||
- Initialize/load a configuration for the following steps using.
|
||||
- Instantiates a dataset.
|
||||
- (Optional) Instantiates a simulation environment corresponding to that dataset.
|
||||
- Instantiates a policy.
|
||||
- Runs a standard training loop with forward pass, backward pass, optimization step, and occasional logging, evaluation (of the policy on the environment), and checkpointing.
|
||||
|
||||
## Overview of the configuration system
|
||||
|
||||
In the training script, the main function `train` expects a `TrainPipelineConfig` object:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
# train.py
|
||||
@parser.wrap()
|
||||
def train(cfg: TrainPipelineConfig):
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
You can inspect the `TrainPipelineConfig` defined in [`lerobot/configs/train.py`](../src/lerobot/configs/train.py) (which is heavily commented and meant to be a reference to understand any option)
|
||||
|
||||
When running the script, inputs for the command line are parsed thanks to the `@parser.wrap()` decorator and an instance of this class is automatically generated. Under the hood, this is done with [Draccus](https://github.com/dlwh/draccus) which is a tool dedicated to this purpose. If you're familiar with Hydra, Draccus can similarly load configurations from config files (.json, .yaml) and also override their values through command line inputs. Unlike Hydra, these configurations are pre-defined in the code through dataclasses rather than being defined entirely in config files. This allows for more rigorous serialization/deserialization, typing, and to manipulate configuration as objects directly in the code and not as dictionaries or namespaces (which enables nice features in an IDE such as autocomplete, jump-to-def, etc.)
|
||||
|
||||
Let's have a look at a simplified example. Amongst other attributes, the training config has the following attributes:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
@dataclass
|
||||
class TrainPipelineConfig:
|
||||
dataset: DatasetConfig
|
||||
env: envs.EnvConfig | None = None
|
||||
policy: PreTrainedConfig | None = None
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
in which `DatasetConfig` for example is defined as such:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
@dataclass
|
||||
class DatasetConfig:
|
||||
repo_id: str
|
||||
episodes: list[int] | None = None
|
||||
video_backend: str = "pyav"
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
This creates a hierarchical relationship where, for example assuming we have a `cfg` instance of `TrainPipelineConfig`, we can access the `repo_id` value with `cfg.dataset.repo_id`.
|
||||
From the command line, we can specify this value by using a very similar syntax `--dataset.repo_id=repo/id`.
|
||||
|
||||
By default, every field takes its default value specified in the dataclass. If a field doesn't have a default value, it needs to be specified either from the command line or from a config file – which path is also given in the command line (more in this below). In the example above, the `dataset` field doesn't have a default value which means it must be specified.
|
||||
|
||||
## Specifying values from the CLI
|
||||
|
||||
Let's say that we want to train [Diffusion Policy](../src/lerobot/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=lerobot/pusht \
|
||||
--policy.type=diffusion \
|
||||
--env.type=pusht
|
||||
```
|
||||
|
||||
Let's break this down:
|
||||
|
||||
- To specify the dataset, we just need to specify its `repo_id` on the hub which is the only required argument in the `DatasetConfig`. The rest of the fields have default values and in this case we are fine with those so we can just add the option `--dataset.repo_id=lerobot/pusht`.
|
||||
- To specify the policy, we can just select diffusion policy using `--policy` appended with `.type`. Here, `.type` is a special argument which allows us to select config classes inheriting from `draccus.ChoiceRegistry` and that have been decorated with the `register_subclass()` method. To have a better explanation of this feature, have a look at this [Draccus demo](https://github.com/dlwh/draccus?tab=readme-ov-file#more-flexible-configuration-with-choice-types). In our code, we use this mechanism mainly to select policies, environments, robots, and some other components like optimizers. The policies available to select are located in [lerobot/policies](../src/lerobot/policies)
|
||||
- Similarly, we select the environment with `--env.type=pusht`. The different environment configs are available in [`lerobot/envs/configs.py`](../src/lerobot/envs/configs.py)
|
||||
|
||||
Let's see another example. Let's say you've been training [ACT](../src/lerobot/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--policy.type=act \
|
||||
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
|
||||
--env.type=aloha \
|
||||
--output_dir=outputs/train/act_aloha_insertion
|
||||
```
|
||||
|
||||
> Notice we added `--output_dir` to explicitly tell where to write outputs from this run (checkpoints, training state, configs etc.). This is not mandatory and if you don't specify it, a default directory will be created from the current date and time, env.type and policy.type. This will typically look like `outputs/train/2025-01-24/16-10-05_aloha_act`.
|
||||
|
||||
We now want to train a different policy for aloha on another task. We'll change the dataset and use [lerobot/aloha_sim_transfer_cube_human](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human) instead. Of course, we also need to change the task of the environment as well to match this other task.
|
||||
Looking at the [`AlohaEnv`](../src/lerobot/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--policy.type=act \
|
||||
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
|
||||
--env.type=aloha \
|
||||
--env.task=AlohaTransferCube-v0 \
|
||||
--output_dir=outputs/train/act_aloha_transfer
|
||||
```
|
||||
|
||||
## Loading from a config file
|
||||
|
||||
Now, let's assume that we want to reproduce the run just above. That run has produced a `train_config.json` file in its checkpoints, which serializes the `TrainPipelineConfig` instance it used:
|
||||
|
||||
```json
|
||||
{
|
||||
"dataset": {
|
||||
"repo_id": "lerobot/aloha_sim_transfer_cube_human",
|
||||
"episodes": null,
|
||||
...
|
||||
},
|
||||
"env": {
|
||||
"type": "aloha",
|
||||
"task": "AlohaTransferCube-v0",
|
||||
"fps": 50,
|
||||
...
|
||||
},
|
||||
"policy": {
|
||||
"type": "act",
|
||||
"n_obs_steps": 1,
|
||||
...
|
||||
},
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
We can then simply load the config values from this file using:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
|
||||
--output_dir=outputs/train/act_aloha_transfer_2
|
||||
```
|
||||
|
||||
`--config_path` is also a special argument which allows to initialize the config from a local config file. It can point to a directory that contains `train_config.json` or to the config file itself directly.
|
||||
|
||||
Similarly to Hydra, we can still override some parameters in the CLI if we want to, e.g.:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
|
||||
--output_dir=outputs/train/act_aloha_transfer_2
|
||||
--policy.n_action_steps=80
|
||||
```
|
||||
|
||||
> Note: While `--output_dir` is not required in general, in this case we need to specify it since it will otherwise take the value from the `train_config.json` (which is `outputs/train/act_aloha_transfer`). In order to prevent accidental deletion of previous run checkpoints, we raise an error if you're trying to write in an existing directory. This is not the case when resuming a run, which is what you'll learn next.
|
||||
|
||||
`--config_path` can also accept the repo_id of a repo on the hub that contains a `train_config.json` file, e.g. running:
|
||||
|
||||
```bash
|
||||
lerobot-train --config_path=lerobot/diffusion_pusht
|
||||
```
|
||||
|
||||
will start a training run with the same configuration used for training [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht)
|
||||
|
||||
## Resume training
|
||||
|
||||
Being able to resume a training run is important in case it crashed or aborted for any reason. We'll demonstrate how to do that here.
|
||||
|
||||
Let's reuse the command from the previous run and add a few more options:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--policy.type=act \
|
||||
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
|
||||
--env.type=aloha \
|
||||
--env.task=AlohaTransferCube-v0 \
|
||||
--log_freq=25 \
|
||||
--save_freq=100 \
|
||||
--output_dir=outputs/train/run_resumption
|
||||
```
|
||||
|
||||
Here we've taken care to set up the log frequency and checkpointing frequency to low numbers so we can showcase resumption. You should be able to see some logging and have a first checkpoint within 1 minute (depending on hardware). Wait for the first checkpoint to happen, you should see a line that looks like this in your terminal:
|
||||
|
||||
```
|
||||
INFO 2025-01-24 16:10:56 ts/train.py:263 Checkpoint policy after step 100
|
||||
```
|
||||
|
||||
Now let's simulate a crash by killing the process (hit `ctrl`+`c`). We can then simply resume this run from the last checkpoint available with:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
|
||||
--resume=true
|
||||
```
|
||||
|
||||
You should see from the logging that your training picks up from where it left off.
|
||||
|
||||
Another reason for which you might want to resume a run is simply to extend training and add more training steps. The number of training steps is set by the option `--steps`, which is 100 000 by default.
|
||||
You could double the number of steps of the previous run with:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
|
||||
--resume=true \
|
||||
--steps=200000
|
||||
```
|
||||
|
||||
## Outputs of a run
|
||||
|
||||
In the output directory, there will be a folder called `checkpoints` with the following structure:
|
||||
|
||||
```bash
|
||||
outputs/train/run_resumption/checkpoints
|
||||
├── 000100 # checkpoint_dir for training step 100
|
||||
│ ├── pretrained_model/
|
||||
│ │ ├── config.json # policy config
|
||||
│ │ ├── model.safetensors # policy weights
|
||||
│ │ └── train_config.json # train config
|
||||
│ └── training_state/
|
||||
│ ├── optimizer_param_groups.json # optimizer param groups
|
||||
│ ├── optimizer_state.safetensors # optimizer state
|
||||
│ ├── rng_state.safetensors # rng states
|
||||
│ ├── scheduler_state.json # scheduler state
|
||||
│ └── training_step.json # training step
|
||||
├── 000200
|
||||
└── last -> 000200 # symlink to the last available checkpoint
|
||||
```
|
||||
|
||||
## Fine-tuning a pre-trained policy
|
||||
|
||||
In addition to the features currently in Draccus, we've added a special `.path` argument for the policy, which allows to load a policy as you would with `PreTrainedPolicy.from_pretrained()`. In that case, `path` can be a local directory that contains a checkpoint or a repo_id pointing to a pretrained policy on the hub.
|
||||
|
||||
For example, we could fine-tune a [policy pre-trained on the aloha transfer task](https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human) on the aloha insertion task. We can achieve this with:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--policy.path=lerobot/act_aloha_sim_transfer_cube_human \
|
||||
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
|
||||
--env.type=aloha \
|
||||
--env.task=AlohaInsertion-v0
|
||||
```
|
||||
|
||||
When doing so, keep in mind that the features of the fine-tuning dataset would have to match the input/output features of the pretrained policy.
|
||||
|
||||
## Typical logs and metrics
|
||||
|
||||
When you start the training process, you will first see your full configuration being printed in the terminal. You can check it to make sure that you configured your run correctly. The final configuration will also be saved with the checkpoint.
|
||||
|
||||
After that, you will see training log like this one:
|
||||
|
||||
```
|
||||
INFO 2024-08-14 13:35:12 ts/train.py:192 step:0 smpl:64 ep:1 epch:0.00 loss:1.112 grdn:15.387 lr:2.0e-07 updt_s:1.738 data_s:4.774
|
||||
```
|
||||
|
||||
or evaluation log:
|
||||
|
||||
```
|
||||
INFO 2024-08-14 13:38:45 ts/train.py:226 step:100 smpl:6K ep:52 epch:0.25 ∑rwrd:20.693 success:0.0% eval_s:120.266
|
||||
```
|
||||
|
||||
These logs will also be saved in wandb if `wandb.enable` is set to `true`. Here are the meaning of some abbreviations:
|
||||
|
||||
- `smpl`: number of samples seen during training.
|
||||
- `ep`: number of episodes seen during training. An episode contains multiple samples in a complete manipulation task.
|
||||
- `epch`: number of time all unique samples are seen (epoch).
|
||||
- `grdn`: gradient norm.
|
||||
- `∑rwrd`: compute the sum of rewards in every evaluation episode and then take an average of them.
|
||||
- `success`: average success rate of eval episodes. Reward and success are usually different except for the sparsing reward setting, where reward=1 only when the task is completed successfully.
|
||||
- `eval_s`: time to evaluate the policy in the environment, in second.
|
||||
- `updt_s`: time to update the network parameters, in second.
|
||||
- `data_s`: time to load a batch of data, in second.
|
||||
|
||||
Some metrics are useful for initial performance profiling. For example, if you find the current GPU utilization is low via the `nvidia-smi` command and `data_s` sometimes is too high, you may need to modify batch size or number of dataloading workers to accelerate dataloading. We also recommend [pytorch profiler](https://github.com/huggingface/lerobot?tab=readme-ov-file#improve-your-code-with-profiling) for detailed performance probing.
|
||||
|
||||
## In short
|
||||
|
||||
We'll summarize here the main use cases to remember from this tutorial.
|
||||
|
||||
#### Train a policy from scratch – CLI
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--policy.type=act \ # <- select 'act' policy
|
||||
--env.type=pusht \ # <- select 'pusht' environment
|
||||
--dataset.repo_id=lerobot/pusht # <- train on this dataset
|
||||
```
|
||||
|
||||
#### Train a policy from scratch - config file + CLI
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--config_path=path/to/pretrained_model \ # <- can also be a repo_id
|
||||
--policy.n_action_steps=80 # <- you may still override values
|
||||
```
|
||||
|
||||
#### Resume/continue a training run
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--config_path=checkpoint/pretrained_model/ \
|
||||
--resume=true \
|
||||
--steps=200000 # <- you can change some training parameters
|
||||
```
|
||||
|
||||
#### Fine-tuning
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--policy.path=lerobot/act_aloha_sim_transfer_cube_human \ # <- can also be a local path to a checkpoint
|
||||
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
|
||||
--env.type=aloha \
|
||||
--env.task=AlohaInsertion-v0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Now that you know the basics of how to train a policy, you might want to know how to apply this knowledge to actual robots, or how to record your own datasets and train policies on your specific task?
|
||||
If that's the case, head over to the next tutorial [`7_get_started_with_real_robot.md`](./7_get_started_with_real_robot.md).
|
||||
|
||||
Or in the meantime, happy training! 🤗
|
||||
@@ -136,7 +136,7 @@ print(f"{dataset[0]['action'].shape=}\n") # (64, c)
|
||||
# PyTorch datasets.
|
||||
dataloader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
num_workers=0,
|
||||
num_workers=4,
|
||||
batch_size=32,
|
||||
shuffle=True,
|
||||
)
|
||||
177
examples/dataset/use_dataset_image_transforms.py
Normal file
177
examples/dataset/use_dataset_image_transforms.py
Normal file
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This example demonstrates how to use image transforms with LeRobot datasets for data augmentation during training.
|
||||
|
||||
Image transforms are applied to camera frames to improve model robustness and generalization. They are applied
|
||||
at training time only, not during dataset recording, allowing you to experiment with different augmentations
|
||||
without re-recording data.
|
||||
"""
|
||||
|
||||
import torch
|
||||
from torchvision.transforms import v2
|
||||
from torchvision.transforms.functional import to_pil_image
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.transforms import ImageTransformConfig, ImageTransforms, ImageTransformsConfig
|
||||
|
||||
|
||||
def save_image(tensor, filename):
|
||||
"""Helper function to save a tensor as an image file."""
|
||||
if tensor.dim() == 3: # [C, H, W]
|
||||
if tensor.max() > 1.0:
|
||||
tensor = tensor / 255.0
|
||||
tensor = torch.clamp(tensor, 0.0, 1.0)
|
||||
pil_image = to_pil_image(tensor)
|
||||
pil_image.save(filename)
|
||||
print(f"Saved: {filename}")
|
||||
else:
|
||||
print(f"Skipped {filename}: unexpected tensor shape {tensor.shape}")
|
||||
|
||||
|
||||
def example_1_default_transforms():
|
||||
"""Example 1: Use default transform configuration and save original vs transformed images"""
|
||||
print("\n Example 1: Default Transform Configuration with Image Saving")
|
||||
|
||||
repo_id = "pepijn223/record_main_0" # Example dataset
|
||||
|
||||
try:
|
||||
# Load dataset without transforms (original)
|
||||
dataset_original = LeRobotDataset(repo_id=repo_id)
|
||||
|
||||
# Load dataset with transforms enabled
|
||||
transforms_config = ImageTransformsConfig(
|
||||
enable=True, # Enable transforms (disabled by default)
|
||||
max_num_transforms=2, # Apply up to 2 transforms per frame
|
||||
random_order=False, # Apply in standard order
|
||||
)
|
||||
dataset_with_transforms = LeRobotDataset(
|
||||
repo_id=repo_id, image_transforms=ImageTransforms(transforms_config)
|
||||
)
|
||||
|
||||
# Save original and transformed images for comparison
|
||||
if len(dataset_original) > 0:
|
||||
frame_idx = 0 # Use first frame
|
||||
original_sample = dataset_original[frame_idx]
|
||||
transformed_sample = dataset_with_transforms[frame_idx]
|
||||
|
||||
print(f"Saving comparison images (frame {frame_idx}):")
|
||||
|
||||
for cam_key in dataset_original.meta.camera_keys:
|
||||
if cam_key in original_sample and cam_key in transformed_sample:
|
||||
cam_name = cam_key.replace(".", "_").replace("/", "_")
|
||||
|
||||
# Save original and transformed images
|
||||
save_image(original_sample[cam_key], f"{cam_name}_original.png")
|
||||
save_image(transformed_sample[cam_key], f"{cam_name}_transformed.png")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Could not load dataset '{repo_id}': {e}")
|
||||
|
||||
|
||||
def example_2_custom_transforms():
|
||||
"""Example 2: Create custom transform configuration and save examples"""
|
||||
print("\n Example 2: Custom Transform Configuration")
|
||||
|
||||
repo_id = "pepijn223/record_main_0" # Example dataset
|
||||
|
||||
try:
|
||||
# Create custom transform configuration with strong effects
|
||||
custom_transforms_config = ImageTransformsConfig(
|
||||
enable=True,
|
||||
max_num_transforms=2, # Apply up to 2 transforms per frame
|
||||
random_order=True, # Apply transforms in random order
|
||||
tfs={
|
||||
"brightness": ImageTransformConfig(
|
||||
weight=1.0,
|
||||
type="ColorJitter",
|
||||
kwargs={"brightness": (0.5, 1.5)}, # Strong brightness range
|
||||
),
|
||||
"contrast": ImageTransformConfig(
|
||||
weight=1.0, # Higher weight = more likely to be selected
|
||||
type="ColorJitter",
|
||||
kwargs={"contrast": (0.6, 1.4)}, # Strong contrast
|
||||
),
|
||||
"sharpness": ImageTransformConfig(
|
||||
weight=0.5, # Lower weight = less likely to be selected
|
||||
type="SharpnessJitter",
|
||||
kwargs={"sharpness": (0.2, 2.0)}, # Strong sharpness variation
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
dataset_with_custom_transforms = LeRobotDataset(
|
||||
repo_id=repo_id, image_transforms=ImageTransforms(custom_transforms_config)
|
||||
)
|
||||
|
||||
# Save examples with strong transforms
|
||||
if len(dataset_with_custom_transforms) > 0:
|
||||
sample = dataset_with_custom_transforms[0]
|
||||
print("Saving custom transform examples:")
|
||||
|
||||
for cam_key in dataset_with_custom_transforms.meta.camera_keys:
|
||||
if cam_key in sample:
|
||||
cam_name = cam_key.replace(".", "_").replace("/", "_")
|
||||
save_image(sample[cam_key], f"{cam_name}_custom_transforms.png")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Could not load dataset '{repo_id}': {e}")
|
||||
|
||||
|
||||
def example_3_torchvision_transforms():
|
||||
"""Example 3: Use pure torchvision transforms and save examples"""
|
||||
print("\n Example 3: Pure Torchvision Transforms")
|
||||
|
||||
repo_id = "pepijn223/record_main_0" # Example dataset
|
||||
|
||||
try:
|
||||
# Create torchvision transform pipeline
|
||||
torchvision_transforms = v2.Compose(
|
||||
[
|
||||
v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
|
||||
v2.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
|
||||
v2.RandomRotation(degrees=10), # Small rotation
|
||||
]
|
||||
)
|
||||
|
||||
dataset_with_torchvision = LeRobotDataset(repo_id=repo_id, image_transforms=torchvision_transforms)
|
||||
|
||||
# Save examples with torchvision transforms
|
||||
if len(dataset_with_torchvision) > 0:
|
||||
sample = dataset_with_torchvision[0]
|
||||
print("Saving torchvision transform examples:")
|
||||
|
||||
for cam_key in dataset_with_torchvision.meta.camera_keys:
|
||||
if cam_key in sample:
|
||||
cam_name = cam_key.replace(".", "_").replace("/", "_")
|
||||
save_image(sample[cam_key], f"{cam_name}_torchvision.png")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Could not load dataset '{repo_id}': {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all examples"""
|
||||
print("LeRobot Dataset Image Transforms Examples")
|
||||
|
||||
example_1_default_transforms()
|
||||
example_2_custom_transforms()
|
||||
example_3_torchvision_transforms()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -12,11 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""This script demonstrates how to train Diffusion Policy on the PushT environment.
|
||||
|
||||
Once you have trained a model with this script, you can try to evaluate it on
|
||||
examples/2_evaluate_pretrained_policy.py
|
||||
"""
|
||||
"""This script demonstrates how to train Diffusion Policy on the PushT environment."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
@@ -13,11 +13,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
"""This script demonstrates how to train a Diffusion Policy on the PushT environment,
|
||||
using a dataset processed in streaming mode.
|
||||
|
||||
Once you have trained a model with this script, you can try to evaluate it on
|
||||
examples/2_evaluate_pretrained_policy.py
|
||||
"""
|
||||
using a dataset processed in streaming mode."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
Reference in New Issue
Block a user