Compare commits

..

7 Commits

Author SHA1 Message Date
Simon Alibert
c6a61e3ba2 WIP 2024-05-21 16:31:48 +02:00
Simon Alibert
62d3546f08 Move dependencies to extra 2024-05-21 16:29:44 +02:00
Simon Alibert
956f035d16 Merge remote-tracking branch 'origin/main' into user/aliberts/2024_05_14_compare_policies 2024-05-21 10:14:10 +02:00
Simon Alibert
eb530fa595 Add '--independent' flag 2024-05-16 19:31:57 +02:00
Simon Alibert
fe31b7f4b7 Merge remote-tracking branch 'origin/main' into user/aliberts/2024_05_14_compare_policies 2024-05-16 17:04:33 +02:00
Simon Alibert
8f5cfcd73d Add argparse, refactor & cleanup 2024-05-16 16:55:40 +02:00
Simon Alibert
10036c1219 WIP add score tests 2024-05-15 17:50:12 +02:00
19 changed files with 1126 additions and 842 deletions

View File

@@ -10,6 +10,7 @@ on:
env:
PYTHON_VERSION: "3.10"
# CI_SLACK_CHANNEL: ${{ secrets.CI_DOCKER_CHANNEL }}
jobs:
latest-cpu:
@@ -50,6 +51,30 @@ jobs:
tags: huggingface/lerobot-cpu
build-args: PYTHON_VERSION=${{ env.PYTHON_VERSION }}
# - name: Post to a Slack channel
# id: slack
# #uses: slackapi/slack-github-action@v1.25.0
# uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
# with:
# # Slack channel id, channel name, or user id to post message.
# # See also: https://api.slack.com/methods/chat.postMessage#channels
# channel-id: ${{ env.CI_SLACK_CHANNEL }}
# # For posting a rich message using Block Kit
# payload: |
# {
# "text": "lerobot-cpu Docker Image build result: ${{ job.status }}\n${{ github.event.pull_request.html_url || github.event.head_commit.url }}",
# "blocks": [
# {
# "type": "section",
# "text": {
# "type": "mrkdwn",
# "text": "lerobot-cpu Docker Image build result: ${{ job.status }}\n${{ github.event.pull_request.html_url || github.event.head_commit.url }}"
# }
# }
# ]
# }
# env:
# SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-cuda:
name: GPU
@@ -88,40 +113,27 @@ jobs:
tags: huggingface/lerobot-gpu
build-args: PYTHON_VERSION=${{ env.PYTHON_VERSION }}
latest-cuda-dev:
name: GPU Dev
runs-on: ubuntu-latest
steps:
- name: Cleanup disk
run: |
sudo df -h
# sudo ls -l /usr/local/lib/
# sudo ls -l /usr/share/
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
sudo df -h
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Check out code
uses: actions/checkout@v4
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
- name: Build and Push GPU dev
uses: docker/build-push-action@v5
with:
context: .
file: ./docker/lerobot-gpu-dev/Dockerfile
push: true
tags: huggingface/lerobot-gpu:dev
build-args: PYTHON_VERSION=${{ env.PYTHON_VERSION }}
# - name: Post to a Slack channel
# id: slack
# #uses: slackapi/slack-github-action@v1.25.0
# uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
# with:
# # Slack channel id, channel name, or user id to post message.
# # See also: https://api.slack.com/methods/chat.postMessage#channels
# channel-id: ${{ env.CI_SLACK_CHANNEL }}
# # For posting a rich message using Block Kit
# payload: |
# {
# "text": "lerobot-gpu Docker Image build result: ${{ job.status }}\n${{ github.event.pull_request.html_url || github.event.head_commit.url }}",
# "blocks": [
# {
# "type": "section",
# "text": {
# "type": "mrkdwn",
# "text": "lerobot-gpu Docker Image build result: ${{ job.status }}\n${{ github.event.pull_request.html_url || github.event.head_commit.url }}"
# }
# }
# ]
# }
# env:
# SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

32
.gitignore vendored
View File

@@ -2,16 +2,11 @@
logs
tmp
wandb
# Data
data
outputs
# Apple
.DS_Store
# VS Code
.vscode
rl
.DS_Store
# HPC
nautilus/*.yaml
@@ -95,7 +90,6 @@ instance/
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
@@ -108,6 +102,13 @@ ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
@@ -118,15 +119,6 @@ celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
@@ -144,9 +136,3 @@ dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/

View File

@@ -27,7 +27,6 @@ test-end-to-end:
${MAKE} test-tdmpc-ete-train
${MAKE} test-tdmpc-ete-eval
${MAKE} test-default-ete-eval
${MAKE} test-act-pusht-tutorial
test-act-ete-train:
python lerobot/scripts/train.py \
@@ -143,21 +142,3 @@ test-default-ete-eval:
eval.batch_size=1 \
env.episode_length=8 \
device=cpu \
test-act-pusht-tutorial:
cp examples/advanced/1_train_act_pusht/act_pusht.yaml lerobot/configs/policy/created_by_Makefile.yaml
python lerobot/scripts/train.py \
policy=created_by_Makefile.yaml \
env=pusht \
wandb.enable=False \
training.offline_steps=2 \
eval.n_episodes=1 \
eval.batch_size=1 \
env.episode_length=2 \
device=cpu \
training.save_model=true \
training.save_freq=2 \
training.batch_size=2 \
hydra.run.dir=tests/outputs/act_pusht/
rm lerobot/configs/policy/created_by_Makefile.yaml

View File

@@ -77,10 +77,6 @@ Install 🤗 LeRobot:
pip install .
```
> **NOTE:** Depending on your platform, If you encounter any build errors during this step
you may need to install `cmake` and `build-essential` for building some of our dependencies.
On linux: `sudo apt-get install cmake build-essential`
For simulations, 🤗 LeRobot comes with gymnasium environments that can be installed as extras:
- [aloha](https://github.com/huggingface/gym-aloha)
- [xarm](https://github.com/huggingface/gym-xarm)
@@ -103,7 +99,6 @@ wandb login
```
.
├── examples # contains demonstration examples, start here to learn about LeRobot
| └── advanced # contains even more examples for those who have mastered the basics
├── lerobot
| ├── configs # contains hydra yaml files with all options that you can override in the command line
| | ├── default.yaml # selected by default, it loads pusht environment and diffusion policy
@@ -163,10 +158,9 @@ See `python lerobot/scripts/eval.py --help` for more instructions.
### Train your own policy
Check out [example 3](./examples/3_train_policy.py) that illustrates how to train a model using our core library in python, and [example 4](./examples/4_train_policy_with_script.md) that shows how to use our training script from command line.
Check out [example 3](./examples/3_train_policy.py) that illustrates how to start training a model.
In general, you can use our training script to easily train any policy. Here is an example of training the ACT policy on trajectories collected by humans on the Aloha simulation environment for the insertion task:
```bash
python lerobot/scripts/train.py \
policy=act \
@@ -190,19 +184,7 @@ A link to the wandb logs for the run will also show up in yellow in your termina
![](media/wandb.png)
Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python lerobot/scripts/eval.py --help` for more instructions.
#### Reproduce state-of-the-art (SOTA)
We have organized our configuration files (found under [`lerobot/configs`](./lerobot/configs)) such that they reproduce SOTA results from a given model variant in their respective original works. Simply running:
```bash
python lerobot/scripts/train.py policy=diffusion env=pusht
```
reproduces SOTA results for Diffusion Policy on the PushT task.
Pretrained policies, along with reproduction details, can be found under the "Models" section of https://huggingface.co/lerobot.
Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. After training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python lerobot/scripts/eval.py --help` for more instructions.
## Contribute

View File

@@ -1,40 +0,0 @@
FROM nvidia/cuda:12.4.1-base-ubuntu22.04
# Configure image
ARG PYTHON_VERSION=3.10
ARG DEBIAN_FRONTEND=noninteractive
# Install apt dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake \
git git-lfs openssh-client \
nano vim less util-linux \
htop atop nvtop \
sed gawk grep curl wget \
tcpdump sysstat screen tmux \
libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
python${PYTHON_VERSION} python${PYTHON_VERSION}-venv \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Install gh cli tool
RUN (type -p wget >/dev/null || (apt update && apt-get install wget -y)) \
&& mkdir -p -m 755 /etc/apt/keyrings \
&& wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
&& chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
&& apt update \
&& apt install gh -y \
&& apt clean && rm -rf /var/lib/apt/lists/*
# Setup `python`
RUN ln -s /usr/bin/python3 /usr/bin/python
# Install poetry
RUN curl -sSL https://install.python-poetry.org | python -
ENV PATH="/root/.local/bin:$PATH"
RUN echo 'if [ "$HOME" != "/root" ]; then ln -sf /root/.local/bin/poetry $HOME/.local/bin/poetry; fi' >> /root/.bashrc
RUN poetry config virtualenvs.create false
RUN poetry config virtualenvs.in-project true
# Set EGL as the rendering backend for MuJoCo
ENV MUJOCO_GL="egl"

View File

@@ -4,15 +4,18 @@ FROM nvidia/cuda:12.4.1-base-ubuntu22.04
ARG PYTHON_VERSION=3.10
ARG DEBIAN_FRONTEND=noninteractive
# Install apt dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake \
git git-lfs openssh-client \
nano vim ffmpeg \
htop atop nvtop \
sed gawk grep curl wget \
tcpdump sysstat screen \
libglib2.0-0 libgl1-mesa-glx libegl1-mesa \
python${PYTHON_VERSION} python${PYTHON_VERSION}-venv \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Create virtual environment
RUN ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
RUN python -m venv /opt/venv
@@ -20,7 +23,8 @@ ENV PATH="/opt/venv/bin:$PATH"
RUN echo "source /opt/venv/bin/activate" >> /root/.bashrc
# Install LeRobot
COPY . /lerobot
RUN git lfs install
RUN git clone https://github.com/huggingface/lerobot.git
WORKDIR /lerobot
RUN pip install --upgrade --no-cache-dir pip
RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht]"

View File

@@ -1,183 +0,0 @@
This tutorial will explain the training script, how to use it, and particularly the use of Hydra to configure everything needed for the training run.
## The training script
LeRobot offers a training script at [`lerobot/scripts/train.py`](../../lerobot/scripts/train.py). At a high level it does the following:
- Loads a Hydra configuration file for the following steps (more on Hydra in a moment).
- Makes a simulation environment.
- Makes a dataset corresponding to that simulation environment.
- Makes a policy.
- Runs a standard training loop with forward pass, backward pass, optimization step, and occasional logging, evaluation (of the policy on the environment), and checkpointing.
## Basics of how we use Hydra
Explaining the ins and outs of [Hydra](https://hydra.cc/docs/intro/) is beyond the scope of this document, but here we'll share the main points you need to know.
First, `lerobot/configs` has a directory structure like this:
```
.
├── default.yaml
├── env
│ ├── aloha.yaml
│ ├── pusht.yaml
│ └── xarm.yaml
└── policy
├── act.yaml
├── diffusion.yaml
└── tdmpc.yaml
```
**_For brevity, in the rest of this document we'll drop the leading `lerobot/configs` path. So `default.yaml` really refers to `lerobot/configs/default.yaml`._**
When you run the training script with
```python
python lerobot/scripts/train.py
```
Hydra is set up to read `default.yaml` (via the `@hydra.main` decorator). If you take a look at the `@hydra.main`'s arguments you will see `config_path="../configs", config_name="default"`. At the top of `default.yaml`, is a `defaults` section which looks likes this:
```yaml
defaults:
- _self_
- env: pusht
- policy: diffusion
```
This logic tells Hydra to incorporate configuration parameters from `env/pusht.yaml` and `policy/diffusion.yaml`. _Note: Be aware of the order as any configuration parameters with the same name will be overidden. Thus, `default.yaml` is overriden by `env/pusht.yaml` which is overidden by `policy/diffusion.yaml`_.
Then, `default.yaml` also contains common configuration parameters such as `device: cuda` or `use_amp: false` (for enabling fp16 training). Some other parameters are set to `???` which indicates that they are expected to be set in additional yaml files. For instance, `training.offline_steps: ???` in `default.yaml` is set to `200000` in `diffusion.yaml`.
Thanks to this `defaults` section in `default.yaml`, if you want to train Diffusion Policy with PushT, you really only need to run:
```bash
python lerobot/scripts/train.py
```
However, you can be more explicit and launch the exact same Diffusion Policy training on PushT with:
```bash
python lerobot/scripts/train.py policy=diffusion env=pusht
```
This way of overriding defaults via the CLI is especially useful when you want to change the policy and/or environment. For instance, you can train ACT on the default Aloha environment with:
```bash
python lerobot/scripts/train.py policy=act env=aloha
```
There are two things to note here:
- Config overrides are passed as `param_name=param_value`.
- Here we have overridden the defaults section. `policy=act` tells Hydra to use `policy/act.yaml`, and `env=aloha` tells Hydra to use `env/pusht.yaml`.
_As an aside: we've set up all of our configurations so that they reproduce state-of-the-art results from papers in the literature._
## Overriding configuration parameters in the CLI
Now let's say that we want to train on a different task in the Aloha environment. If you look in `env/aloha.yaml` you will see something like:
```yaml
# lerobot/configs/env/aloha.yaml
env:
task: AlohaInsertion-v0
```
And if you look in `policy/act.yaml` you will see something like:
```yaml
# lerobot/configs/policy/act.yaml
dataset_repo_id: lerobot/aloha_sim_insertion_human
```
But our Aloha environment actually supports a cube transfer task as well. To train for this task, you could manually modify the two yaml configuration files respectively.
First, we'd need to switch to using the cube transfer task for the ALOHA environment.
```diff
# lerobot/configs/env/aloha.yaml
env:
- task: AlohaInsertion-v0
+ task: AlohaTransferCube-v0
```
Then, we'd also need to switch to using the cube transfer dataset.
```diff
# lerobot/configs/policy/act.yaml
-dataset_repo_id: lerobot/aloha_sim_insertion_human
+dataset_repo_id: lerobot/aloha_sim_transfer_cube_human
```
Then, you'd be able to run:
```bash
python lerobot/scripts/train.py policy=act env=aloha
```
and you'd be training and evaluating on the cube transfer task.
An alternative approach to editing the yaml configuration files, would be to override the defaults via the command line:
```bash
python lerobot/scripts/train.py \
policy=act \
dataset_repo_id=lerobot/aloha_sim_transfer_cube_human \
env=aloha \
env.task=AlohaTransferCube-v0
```
There's something new here. Notice the `.` delimiter used to traverse the configuration hierarchy. _But be aware that the `defaults` section is an exception. As you saw above, we didn't need to write `defaults.policy=act` in the CLI. `policy=act` was enough._
Putting all that knowledge together, here's the command that was used to train https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human.
```bash
python lerobot/scripts/train.py \
hydra.run.dir=outputs/train/act_aloha_sim_transfer_cube_human \
device=cuda
env=aloha \
env.task=AlohaTransferCube-v0 \
dataset_repo_id=lerobot/aloha_sim_transfer_cube_human \
policy=act \
training.eval_freq=10000 \
training.log_freq=250 \
training.offline_steps=100000 \
training.save_model=true \
training.save_freq=25000 \
eval.n_episodes=50 \
eval.batch_size=50 \
wandb.enable=false \
```
There's one new thing here: `hydra.run.dir=outputs/train/act_aloha_sim_transfer_cube_human`, which specifies where to save the training output.
## Using a configuration file not in `lerobot/configs`
Above we discusses the our training script is set up such that Hydra looks for `default.yaml` in `lerobot/configs`. But, if you have a configuration file elsewhere in your filesystem you may use:
```bash
python lerobot/scripts/train.py --config-dir PARENT/PATH --config-name FILE_NAME_WITHOUT_EXTENSION
```
Note: here we use regular syntax for providing CLI arguments to a Python script, not Hydra's `param_name=param_value` syntax.
As a concrete example, this becomes particularly handy when you have a folder with training outputs, and would like to re-run the training. For example, say you previously ran the training script with one of the earlier commands and have `outputs/train/my_experiment/checkpoints/pretrained_model/config.yaml`. This `config.yaml` file will have the full set of configuration parameters within it. To run the training with the same configuration again, do:
```bash
python lerobot/scripts/train.py --config-dir outputs/train/my_experiment/checkpoints/pretrained_model --config-name config
```
Note that you may still use the regular syntax for config parameter overrides (eg: by adding `training.offline_steps=200000`).
---
So far we've seen how to train Diffusion Policy for PushT and ACT for ALOHA. Now, what if we want to train ACT for PushT? Well, there are aspects of the ACT configuration that are specific to the ALOHA environments, and these happen to be incompatible with PushT. Therefore, trying to run the following will almost certainly raise an exception of sorts (eg: feature dimension mismatch):
```bash
python lerobot/scripts/train.py policy=act env=pusht dataset_repo_id=lerobot/pusht
```
Please, head on over to our [advanced tutorial on adapting policy configuration to various environments](./advanced/train_act_pusht/train_act_pusht.md) to learn more.
Or in the meantime, happy coding! 🤗

View File

@@ -1,87 +0,0 @@
# @package _global_
# Change the seed to match what PushT eval uses
# (to avoid evaluating on seeds used for generating the training data).
seed: 100000
# Change the dataset repository to the PushT one.
dataset_repo_id: lerobot/pusht
override_dataset_stats:
observation.image:
# stats from imagenet, since we use a pretrained vision model
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
training:
offline_steps: 80000
online_steps: 0
eval_freq: 10000
save_freq: 100000
log_freq: 250
save_model: true
batch_size: 8
lr: 1e-5
lr_backbone: 1e-5
weight_decay: 1e-4
grad_clip_norm: 10
online_steps_between_rollouts: 1
delta_timestamps:
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
eval:
n_episodes: 50
batch_size: 50
# See `configuration_act.py` for more details.
policy:
name: act
# Input / output structure.
n_obs_steps: 1
chunk_size: 100 # chunk_size
n_action_steps: 100
input_shapes:
observation.image: [3, 96, 96]
observation.state: ["${env.state_dim}"]
output_shapes:
action: ["${env.action_dim}"]
# Normalization / Unnormalization
input_normalization_modes:
observation.image: mean_std
# Use min_max normalization just because it's more standard.
observation.state: min_max
output_normalization_modes:
# Use min_max normalization just because it's more standard.
action: min_max
# Architecture.
# Vision backbone.
vision_backbone: resnet18
pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
replace_final_stride_with_dilation: false
# Transformer layers.
pre_norm: false
dim_model: 512
n_heads: 8
dim_feedforward: 3200
feedforward_activation: relu
n_encoder_layers: 4
# Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code
# that means only the first layer is used. Here we match the original implementation by setting this to 1.
# See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521.
n_decoder_layers: 1
# VAE.
use_vae: true
latent_dim: 32
n_vae_encoder_layers: 4
# Inference.
temporal_ensemble_momentum: null
# Training and loss computation.
dropout: 0.1
kl_weight: 10.0

View File

@@ -1,70 +0,0 @@
In this tutorial we will learn how to adapt a policy configuration to be compatible with a new environment and dataset. As a concrete example, we will adapt the default configuration for ACT to be compatible with the PushT environment and dataset.
If you haven't already read our tutorial on the [training script and configuration tooling](../4_train_policy_with_script.md) please do so prior to tackling this tutorial.
Let's get started!
Suppose we want to train ACT for PushT. Well, there are aspects of the ACT configuration that are specific to the ALOHA environments, and these happen to be incompatible with PushT. Therefore, trying to run the following will almost certainly raise an exception of sorts (eg: feature dimension mismatch):
```bash
python lerobot/scripts/train.py policy=act env=pusht dataset_repo_id=lerobot/pusht
```
We need to adapt the parameters of the ACT policy configuration to the PushT environment. The most important ones are the image keys.
ALOHA's datasets and environments typically use a variable number of cameras. In `lerobot/configs/policy/act.yaml` you may notice two relevant sections. Here we show you the minimal diff needed to adjust to PushT:
```diff
override_dataset_stats:
- observation.images.top:
+ observation.image:
# stats from imagenet, since we use a pretrained vision model
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
policy:
input_shapes:
- observation.images.top: [3, 480, 640]
+ observation.image: [3, 96, 96]
observation.state: ["${env.state_dim}"]
output_shapes:
action: ["${env.action_dim}"]
input_normalization_modes:
- observation.images.top: mean_std
+ observation.image: mean_std
observation.state: min_max
output_normalization_modes:
action: min_max
```
Here we've accounted for the following:
- PushT uses "observation.image" for its image key.
- PushT provides smaller images.
_Side note: technically we could override these via the CLI, but with many changes it gets a bit messy, and we also have a bit of a challenge in that we're using `.` in our observation keys which is treated by Hydra as a hierarchical separator_.
For your convenience, we provide [`act_pusht.yaml`](./act_pusht.yaml) in this directory. It contains the diff above, plus some other (optional) ones that are explained within. Please copy it into `lerobot/configs/policy` with:
```bash
cp examples/advanced/1_train_act_pusht/act_pusht.yaml lerobot/configs/policy/act_pusht.yaml
```
(remember from a [previous tutorial](../4_train_policy_with_script.md) that Hydra will look in the `lerobot/configs` directory). Now try running the following.
<!-- Note to contributor: are you changing this command? Note that it's tested in `Makefile`, so change it there too! -->
```bash
python lerobot/scripts/train.py policy=act_pusht env=pusht
```
Notice that this is much the same as the command that failed at the start of the tutorial, only:
- Now we are using `policy=act_pusht` to point to our new configuration file.
- We can drop `dataset_repo_id=lerobot/pusht` as the change is incorporated in our new configuration file.
Hurrah! You're now training ACT for the PushT environment.
---
The bottom line of this tutorial is that when training policies for different environments and datasets you will need to understand what parts of the policy configuration are specific to those and make changes accordingly.
Happy coding! 🤗

View File

@@ -16,12 +16,15 @@
import logging
import torch
from omegaconf import DictConfig, OmegaConf
from omegaconf import OmegaConf
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
def make_dataset(cfg: DictConfig, split="train") -> LeRobotDataset:
def make_dataset(
cfg,
split="train",
):
if cfg.env.name not in cfg.dataset_repo_id:
logging.warning(
f"There might be a mismatch between your training dataset ({cfg.dataset_repo_id=}) and your "
@@ -40,7 +43,6 @@ def make_dataset(cfg: DictConfig, split="train") -> LeRobotDataset:
cfg.dataset_repo_id,
split=split,
delta_timestamps=delta_timestamps,
n_end_keyframes_dropped=eval(cfg.training.get("n_end_keyframes_dropped", "0")),
)
if cfg.get("override_dataset_stats"):

View File

@@ -44,26 +44,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
split: str = "train",
transform: callable = None,
delta_timestamps: dict[list[float]] | None = None,
n_end_keyframes_dropped: int = 0,
):
"""
Args:
delta_timestamps: A dictionary mapping lists of relative times (Δt) to data keys. When a frame is
sampled from the underlying dataset, we treat it as a "keyframe" and load multiple frames
according to the list of Δt's. For example {"action": [-0.05, 0, 0.05]} indicates
that we want to load the current keyframe's action, as well as one from 50 ms ago, and one
50 ms into the future. The action key then contains a (3, action_dim) tensor (whereas without
`delta_timestamps` there would just be a (action_dim,) tensor. When the Δt's demand that
frames outside of an episode boundary are retrieved, a copy padding strategy is used. See
`load_previous_and_future_frames` for more details.
n_end_keyframes_dropped: Don't sample the last n items in each episode. This option is handy when
used in combination with `delta_timestamps` when, for example, the Δt's demand multiple future
frames, but we want to avoid introducing too much copy padding into the data distribution.
For example if `delta_timestamps = {"action": [0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30]}`
and we sample the last frame in the episode, we would end up padding with 6 frames worth of
copies. Instead, we might want no padding (in which case we need n=6), or we might be okay
with up to 2 frames of padding (in which case we need n=4).
"""
super().__init__()
self.repo_id = repo_id
self.version = version
@@ -84,12 +65,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
self.info = load_info(repo_id, version, root)
if self.video:
self.videos_dir = load_videos(repo_id, version, root)
# If `n_end_keyframes_dropped == 0`, `self.index` contains exactly the indices of the hf_dataset. If
# `n_end_keyframes_dropped > 0`, `self.index` contains a subset of the indices of the hf_dataset where
# we drop those indices pertaining to the last n frames of each episode.
self.index = []
for from_ix, to_ix in zip(*self.episode_data_index.values(), strict=True):
self.index.extend(list(range(from_ix, to_ix - n_end_keyframes_dropped)))
@property
def fps(self) -> int:
@@ -132,11 +107,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
@property
def num_samples(self) -> int:
"""Number of possible samples in the dataset.
This is equivalent to the number of frames in the dataset minus n_end_keyframes_dropped.
"""
return len(self.index)
"""Number of samples/frames."""
return len(self.hf_dataset)
@property
def num_episodes(self) -> int:
@@ -156,7 +128,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
return self.num_samples
def __getitem__(self, idx):
item = self.hf_dataset[self.index[idx]]
item = self.hf_dataset[idx]
if self.delta_timestamps is not None:
item = load_previous_and_future_frames(

View File

@@ -304,11 +304,7 @@ class DiffusionModel(nn.Module):
loss = F.mse_loss(pred, target, reduction="none")
# Mask loss wherever the action is padded with copies (edges of the dataset trajectory).
if self.config.do_mask_loss_for_padding:
if "action_is_pad" not in batch:
raise ValueError(
f"You need to provide 'action_is_pad' in the batch when {self.config.do_mask_loss_for_padding=}."
)
if self.config.do_mask_loss_for_padding and "action_is_pad" in batch:
in_episode_bound = ~batch["action_is_pad"]
loss = loss * in_episode_bound.unsqueeze(-1)

View File

@@ -39,21 +39,11 @@ training:
adam_weight_decay: 1.0e-6
online_steps_between_rollouts: 1
# For each training batch we want (consider n_obs_steps=2, horizon=16):
# t | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
# action | a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a
# observation | o, o, , , , , , , , , , , , , ,
# Note that at rollout we only use some of the actions (consider n_action_steps=8):
# action used | , a, a, a, a, a, a, a, a, , , , , , ,
delta_timestamps:
observation.image: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]"
observation.state: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]"
action: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps} + ${policy.horizon})]"
# The original implementation doesn't sample keyframes for the last 7 steps. This is because, as described
# above, the last 7 actions from the diffusion model are not used.
n_end_keyframes_dropped: ${policy.horizon} - ${policy.n_action_steps} - ${policy.n_obs_steps} + 1
eval:
n_episodes: 50
batch_size: 50

View File

@@ -0,0 +1,340 @@
"""Compare two policies on based on metrics computed from an eval.
Usage example:
You just made changes to a policy and you want to assess its new performance against
the reference policy (i.e. before your changes).
```
python lerobot/scripts/compare_policies.py \
output/eval/ref_policy/eval_info.json \
output/eval/new_policy/eval_info.json
```
This script can accept `eval_info.json` dicts with identical seeds between each eval episode of ref_policy and
new_policy (paired-samples) or from evals performed with different seeds (independent samples).
The script will first perform normality tests to determine if parametric tests can be used or not, then
evaluate if policies metrics are significantly different using the appropriate tests.
CAVEATS: by default, this script will compare seeds numbers to determine if samples can be considered paired.
If changes have been made to this environment in-between the ref_policy eval and the new_policy eval, you
should use the `--independent` flag to override this and not pair the samples even if they have identical
seeds.
"""
import argparse
import json
import logging
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from scipy.stats import anderson, kstest, mannwhitneyu, normaltest, shapiro, ttest_ind, ttest_rel, wilcoxon
from statsmodels.stats.contingency_tables import mcnemar
from termcolor import colored
from terminaltables import AsciiTable
def init_logging() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(message)s",
handlers=[logging.StreamHandler()],
)
logging.getLogger("matplotlib.font_manager").disabled = True
def log_section(title: str) -> None:
section_title = f"\n{'-'*21}\n {title.center(19)} \n{'-'*21}"
logging.info(section_title)
def log_test(msg: str, p_value: float):
if p_value < 0.01:
color, interpretation = "red", "H_0 Rejected"
elif 0.01 <= p_value < 0.05:
color, interpretation = "yellow", "Inconclusive"
else:
color, interpretation = "green", "H_0 Not Rejected"
logging.info(
f"{msg}, p-value = {colored(f'{p_value:.3f}', color)} -> {colored(f'{interpretation}', color, attrs=['bold'])}"
)
def get_eval_info_episodes(eval_info_path: Path) -> dict:
with open(eval_info_path) as f:
eval_info = json.load(f)
return {
"sum_rewards": np.array([ep_stat["sum_reward"] for ep_stat in eval_info["per_episode"]]),
"max_rewards": np.array([ep_stat["max_reward"] for ep_stat in eval_info["per_episode"]]),
"successes": np.array([ep_stat["success"] for ep_stat in eval_info["per_episode"]]),
"seeds": [ep_stat["seed"] for ep_stat in eval_info["per_episode"]],
"num_episodes": len(eval_info["per_episode"]),
}
def append_table_metric(table: list, metric: str, ref_sample: dict, new_sample: dict, mean_std: bool = False):
if mean_std:
ref_metric = f"{np.mean(ref_sample[metric]):.3f} ({np.std(ref_sample[metric]):.3f})"
new_metric = f"{np.mean(new_sample[metric]):.3f} ({np.std(new_sample[metric]):.3f})"
row_header = f"{metric} - mean (std)"
else:
ref_metric = ref_sample[metric]
new_metric = new_sample[metric]
row_header = metric
row = [row_header, ref_metric, new_metric]
table.append(row)
return table
def cohens_d(x, y):
return (np.mean(x) - np.mean(y)) / np.sqrt((np.std(x, ddof=1) ** 2 + np.std(y, ddof=1) ** 2) / 2)
def normality_tests(array: np.ndarray, name: str):
ap_stat, ap_p = normaltest(array)
sw_stat, sw_p = shapiro(array)
ks_stat, ks_p = kstest(array, "norm", args=(np.mean(array), np.std(array)))
ad_stat = anderson(array)
log_test(f"{name} - D'Agostino and Pearson test: statistic = {ap_stat:.3f}", ap_p)
log_test(f"{name} - Shapiro-Wilk test: statistic = {sw_stat:.3f}", sw_p)
log_test(f"{name} - Kolmogorov-Smirnov test: statistic = {ks_stat:.3f}", ks_p)
logging.info(f"{name} - Anderson-Darling test: statistic = {ad_stat.statistic:.3f}")
for i in range(len(ad_stat.critical_values)):
cv, sl = ad_stat.critical_values[i], ad_stat.significance_level[i]
logging.info(f" Critical value at {sl}%: {cv:.3f}")
return sw_p > 0.05 and ks_p > 0.05
def perform_tests(ref_sample: dict, new_sample: dict, output_dir: Path, independent: bool = False):
seeds_a, seeds_b = ref_sample["seeds"], new_sample["seeds"]
if (seeds_a == seeds_b) and not independent:
logging.info("\nSamples are paired (identical seeds).")
paired = True
else:
logging.info("\nSamples are considered independent (seeds are different).")
paired = False
table_data = [["Metric", "Ref.", "New"]]
table_data = append_table_metric(table_data, "num_episodes", ref_sample, new_sample)
table_data = append_table_metric(table_data, "successes", ref_sample, new_sample, mean_std=True)
table_data = append_table_metric(table_data, "max_rewards", ref_sample, new_sample, mean_std=True)
table_data = append_table_metric(table_data, "sum_rewards", ref_sample, new_sample, mean_std=True)
table = AsciiTable(table_data)
print(table.table)
log_section("Effect Size")
d_max_reward = cohens_d(ref_sample["max_rewards"], new_sample["max_rewards"])
d_sum_reward = cohens_d(ref_sample["sum_rewards"], new_sample["sum_rewards"])
logging.info(f"Cohen's d for Max Reward: {d_max_reward:.3f}")
logging.info(f"Cohen's d for Sum Reward: {d_sum_reward:.3f}")
if paired:
paired_sample_tests(ref_sample, new_sample)
else:
independent_sample_tests(ref_sample, new_sample)
output_dir.mkdir(exist_ok=True, parents=True)
plot_boxplot(
ref_sample["max_rewards"],
new_sample["max_rewards"],
["Ref Sample Max Reward", "New Sample Max Reward"],
"Boxplot of Max Rewards",
f"{output_dir}/boxplot_max_reward.png",
)
plot_boxplot(
ref_sample["sum_rewards"],
new_sample["sum_rewards"],
["Ref Sample Sum Reward", "New Sample Sum Reward"],
"Boxplot of Sum Rewards",
f"{output_dir}/boxplot_sum_reward.png",
)
plot_histogram(
ref_sample["max_rewards"],
new_sample["max_rewards"],
["Ref Sample Max Reward", "New Sample Max Reward"],
"Histogram of Max Rewards",
f"{output_dir}/histogram_max_reward.png",
)
plot_histogram(
ref_sample["sum_rewards"],
new_sample["sum_rewards"],
["Ref Sample Sum Reward", "New Sample Sum Reward"],
"Histogram of Sum Rewards",
f"{output_dir}/histogram_sum_reward.png",
)
plot_qqplot(
ref_sample["max_rewards"],
"Q-Q Plot of Ref Sample Max Rewards",
f"{output_dir}/qqplot_sample_a_max_reward.png",
)
plot_qqplot(
new_sample["max_rewards"],
"Q-Q Plot of New Sample Max Rewards",
f"{output_dir}/qqplot_sample_b_max_reward.png",
)
plot_qqplot(
ref_sample["sum_rewards"],
"Q-Q Plot of Ref Sample Sum Rewards",
f"{output_dir}/qqplot_sample_a_sum_reward.png",
)
plot_qqplot(
new_sample["sum_rewards"],
"Q-Q Plot of New Sample Sum Rewards",
f"{output_dir}/qqplot_sample_b_sum_reward.png",
)
def paired_sample_tests(ref_sample: dict, new_sample: dict):
log_section("Normality tests")
max_reward_diff = ref_sample["max_rewards"] - new_sample["max_rewards"]
sum_reward_diff = ref_sample["sum_rewards"] - new_sample["sum_rewards"]
normal_max_reward_diff = normality_tests(max_reward_diff, "Max Reward Difference")
normal_sum_reward_diff = normality_tests(sum_reward_diff, "Sum Reward Difference")
log_section("Paired-sample tests")
if normal_max_reward_diff:
t_stat_max_reward, p_val_max_reward = ttest_rel(ref_sample["max_rewards"], new_sample["max_rewards"])
log_test(f"Paired t-test for Max Reward: t-statistic = {t_stat_max_reward:.3f}", p_val_max_reward)
else:
w_stat_max_reward, p_wilcox_max_reward = wilcoxon(
ref_sample["max_rewards"], new_sample["max_rewards"]
)
log_test(f"Wilcoxon test for Max Reward: statistic = {w_stat_max_reward:.3f}", p_wilcox_max_reward)
if normal_sum_reward_diff:
t_stat_sum_reward, p_val_sum_reward = ttest_rel(ref_sample["sum_rewards"], new_sample["sum_rewards"])
log_test(f"Paired t-test for Sum Reward: t-statistic = {t_stat_sum_reward:.3f}", p_val_sum_reward)
else:
w_stat_sum_reward, p_wilcox_sum_reward = wilcoxon(
ref_sample["sum_rewards"], new_sample["sum_rewards"]
)
log_test(f"Wilcoxon test for Sum Reward: statistic = {w_stat_sum_reward:.3f}", p_wilcox_sum_reward)
table = np.array(
[
[
np.sum((ref_sample["successes"] == 1) & (new_sample["successes"] == 1)),
np.sum((ref_sample["successes"] == 1) & (new_sample["successes"] == 0)),
],
[
np.sum((ref_sample["successes"] == 0) & (new_sample["successes"] == 1)),
np.sum((ref_sample["successes"] == 0) & (new_sample["successes"] == 0)),
],
]
)
mcnemar_result = mcnemar(table, exact=True)
log_test(f"McNemar's test for Success: statistic = {mcnemar_result.statistic:.3f}", mcnemar_result.pvalue)
def independent_sample_tests(ref_sample: dict, new_sample: dict):
log_section("Normality tests")
normal_max_rewards_a = normality_tests(ref_sample["max_rewards"], "Max Rewards Ref Sample")
normal_max_rewards_b = normality_tests(new_sample["max_rewards"], "Max Rewards New Sample")
normal_sum_rewards_a = normality_tests(ref_sample["sum_rewards"], "Sum Rewards Ref Sample")
normal_sum_rewards_b = normality_tests(new_sample["sum_rewards"], "Sum Rewards New Sample")
log_section("Independent samples tests")
table = [["Test", "max_rewards", "sum_rewards"]]
if normal_max_rewards_a and normal_max_rewards_b:
table = append_independent_test(
table, ref_sample, new_sample, ttest_ind, "Two-Sample t-test", kwargs={"equal_var": False}
)
t_stat_max_reward, p_val_max_reward = ttest_ind(
ref_sample["max_rewards"], new_sample["max_rewards"], equal_var=False
)
log_test(f"Two-Sample t-test for Max Reward: t-statistic = {t_stat_max_reward:.3f}", p_val_max_reward)
else:
table = append_independent_test(table, ref_sample, new_sample, mannwhitneyu, "Mann-Whitney U")
u_stat_max_reward, p_u_max_reward = mannwhitneyu(ref_sample["max_rewards"], new_sample["max_rewards"])
log_test(f"Mann-Whitney U test for Max Reward: U-statistic = {u_stat_max_reward:.3f}", p_u_max_reward)
if normal_sum_rewards_a and normal_sum_rewards_b:
t_stat_sum_reward, p_val_sum_reward = ttest_ind(
ref_sample["sum_rewards"], new_sample["sum_rewards"], equal_var=False
)
log_test(f"Two-Sample t-test for Sum Reward: t-statistic = {t_stat_sum_reward:.3f}", p_val_sum_reward)
else:
u_stat_sum_reward, p_u_sum_reward = mannwhitneyu(ref_sample["sum_rewards"], new_sample["sum_rewards"])
log_test(f"Mann-Whitney U test for Sum Reward: U-statistic = {u_stat_sum_reward:.3f}", p_u_sum_reward)
table = AsciiTable(table)
print(table.table)
def append_independent_test(
table: list,
ref_sample: dict,
new_sample: dict,
test: callable,
test_name: str,
kwargs: dict | None = None,
) -> list:
kwargs = {} if kwargs is None else kwargs
row = [f"{test_name}: p-value ≥ alpha"]
for metric in table[0][1:]:
_, p_val = test(ref_sample[metric], new_sample[metric], **kwargs)
alpha = 0.05
status = "" if p_val >= alpha else ""
row.append(f"{status} {p_val:.3f}{alpha}")
table.append(row)
return table
def plot_boxplot(data_a: np.ndarray, data_b: np.ndarray, labels: list[str], title: str, filename: str):
plt.boxplot([data_a, data_b], labels=labels)
plt.title(title)
plt.savefig(filename)
plt.close()
def plot_histogram(data_a: np.ndarray, data_b: np.ndarray, labels: list[str], title: str, filename: str):
plt.hist(data_a, bins=30, alpha=0.7, label=labels[0])
plt.hist(data_b, bins=30, alpha=0.7, label=labels[1])
plt.title(title)
plt.legend()
plt.savefig(filename)
plt.close()
def plot_qqplot(data: np.ndarray, title: str, filename: str):
stats.probplot(data, dist="norm", plot=plt)
plt.title(title)
plt.savefig(filename)
plt.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("ref_sample_path", type=Path, help="Path to the reference sample JSON file.")
parser.add_argument("new_sample_path", type=Path, help="Path to the new sample JSON file.")
parser.add_argument(
"--independent",
action="store_true",
help="Ignore seeds and consider samples to be independent (unpaired).",
)
parser.add_argument(
"--output_dir",
type=Path,
default=Path("outputs/compare/"),
help="Directory to save the output results. Defaults to outputs/compare/",
)
args = parser.parse_args()
init_logging()
ref_sample = get_eval_info_episodes(args.ref_sample_path)
new_sample = get_eval_info_episodes(args.new_sample_path)
perform_tests(ref_sample, new_sample, args.output_dir, args.independent)

1006
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -41,7 +41,7 @@ numba = ">=0.59.0"
torch = "^2.2.1"
opencv-python = ">=4.9.0"
diffusers = "^0.27.2"
torchvision = ">=0.17.1"
torchvision = ">=0.18.0"
h5py = ">=3.10.0"
huggingface-hub = {extras = ["hf-transfer"], version = "^0.23.0"}
gymnasium = ">=0.29.1"
@@ -58,15 +58,19 @@ imagecodecs = { version = ">=2024.1.1", optional = true }
pyav = ">=12.0.5"
moviepy = ">=1.0.3"
rerun-sdk = ">=0.15.1"
statsmodels = {version = ">=0.14.2", optional = true}
matplotlib = {version = ">=3.8.4", optional = true}
terminaltables = {version = ">=3.1.10", optional = true}
[tool.poetry.extras]
pusht = ["gym-pusht"]
xarm = ["gym-xarm"]
aloha = ["gym-aloha"]
umi = ["imagecodecs"]
compare = ["statsmodels", "matplotlib", "terminaltables"]
dev = ["pre-commit", "debugpy"]
test = ["pytest", "pytest-cov"]
umi = ["imagecodecs"]
[tool.ruff]
line-length = 110

View File

@@ -115,7 +115,6 @@ def test_compute_stats_on_xarm():
# reduce size of dataset sample on which stats compute is tested to 10 frames
dataset.hf_dataset = dataset.hf_dataset.select(range(10))
dataset.index = [i for i in dataset.index if i < 10]
# Note: we set the batch size to be smaller than the whole dataset to make sure we are testing batched
# computation of the statistics. While doing this, we also make sure it works when we don't divide the

View File

@@ -45,11 +45,11 @@ def test_example_1():
@require_package("gym_pusht")
def test_examples_basic2_basic3_advanced1():
def test_examples_2_through_4():
"""
Train a model with example 3, check the outputs.
Evaluate the trained model with example 2, check the outputs.
Calculate the validation loss with advanced example 1, check the outputs.
Calculate the validation loss with example 4, check the outputs.
"""
### Test example 3
@@ -97,7 +97,7 @@ def test_examples_basic2_basic3_advanced1():
assert Path("outputs/eval/example_pusht_diffusion/rollout.mp4").exists()
## Test example 4
file_contents = _read_file("examples/advanced/2_calculate_validation_loss.py")
file_contents = _read_file("examples/4_calculate_validation_loss.py")
# Run on a single example from the last episode, use CPU, and use the local model.
file_contents = _find_and_replace(