Compare commits
128 Commits
temp_branc
...
user/miche
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
16b905ee67 | ||
|
|
44d96a0811 | ||
|
|
4488e55e94 | ||
|
|
2945dca65b | ||
|
|
691d39aa32 | ||
|
|
56c01a2944 | ||
|
|
49bdcc094c | ||
|
|
6ad84a6561 | ||
|
|
3b5af7eb38 | ||
|
|
23f6c875b5 | ||
|
|
f56d769dfb | ||
|
|
36b9b60a0e | ||
|
|
93d9bf83c2 | ||
|
|
37da50b573 | ||
|
|
c6ad495176 | ||
|
|
f43e5d07f5 | ||
|
|
9ee8711504 | ||
|
|
6203641710 | ||
|
|
1f13bda25b | ||
|
|
acae4b49d2 | ||
|
|
c72ad49c9b | ||
|
|
eda02fade5 | ||
|
|
8546358bc5 | ||
|
|
a91b7c6163 | ||
|
|
4d15861872 | ||
|
|
f3630ad910 | ||
|
|
aed9f4036a | ||
|
|
757ea175d3 | ||
|
|
b69a132737 | ||
|
|
7b159a6b22 | ||
|
|
74270c8c91 | ||
|
|
a6762ec316 | ||
|
|
fde29e0167 | ||
|
|
56e4603d5b | ||
|
|
f6c90ca35f | ||
|
|
c2d6fb6119 | ||
|
|
95a4b59b5b | ||
|
|
16103cb8b7 | ||
|
|
e4ba084e25 | ||
|
|
ac79e8cb36 | ||
|
|
df2cb51364 | ||
|
|
7a342db9c4 | ||
|
|
6b2ec1ed77 | ||
|
|
375abd3020 | ||
|
|
293bdc7f67 | ||
|
|
79d114cc1f | ||
|
|
2650872b76 | ||
|
|
cd1509d805 | ||
|
|
5ea7c78237 | ||
|
|
443a9eec88 | ||
|
|
ab23a4fd27 | ||
|
|
1267c3e955 | ||
|
|
c70b8d0abc | ||
|
|
e1845d4dcc | ||
|
|
e69f0c5059 | ||
|
|
ff84024ee9 | ||
|
|
ee51f54cb5 | ||
|
|
fee5fa5c2e | ||
|
|
8d57093ce7 | ||
|
|
4c22de20a6 | ||
|
|
51e87f6f97 | ||
|
|
df3d2ec5df | ||
|
|
e210d795de | ||
|
|
18ffa4248b | ||
|
|
8bcf81fa24 | ||
|
|
615894d3fb | ||
|
|
450eae310b | ||
|
|
60865e8980 | ||
|
|
0d77be90ee | ||
|
|
0098bd264e | ||
|
|
1aba80d93f | ||
|
|
07570f867f | ||
|
|
b8bdbc1c5b | ||
|
|
7ae8d05326 | ||
|
|
a2a8538ac9 | ||
|
|
fb73cdb9a4 | ||
|
|
9dca233d7e | ||
|
|
c3c0141738 | ||
|
|
c72dc23c43 | ||
|
|
237a484be0 | ||
|
|
6c2cb6e107 | ||
|
|
b46db7ea73 | ||
|
|
ee52b8b782 | ||
|
|
a805458c7e | ||
|
|
e991a31061 | ||
|
|
c4c0a43de7 | ||
|
|
299451af81 | ||
|
|
9ebf8b88ec | ||
|
|
c1232a01e2 | ||
|
|
3b925c3dce | ||
|
|
e46bdb9d30 | ||
|
|
9316cf46ef | ||
|
|
ac3798bd62 | ||
|
|
bce3dc3bfa | ||
|
|
1a51505ec6 | ||
|
|
e7355ba595 | ||
|
|
91e8ce772b | ||
|
|
3a9f964429 | ||
|
|
beacb7e957 | ||
|
|
be64d54bd9 | ||
|
|
354f37aed3 | ||
|
|
d0d8193625 | ||
|
|
7242c57400 | ||
|
|
3ee3739edc | ||
|
|
ad3f112d16 | ||
|
|
50a75ad3fe | ||
|
|
c146ba936f | ||
|
|
110264000f | ||
|
|
9433ac52ec | ||
|
|
da78bbfd16 | ||
|
|
835ab5a81b | ||
|
|
f96773de10 | ||
|
|
cbc51e1341 | ||
|
|
cf633344be | ||
|
|
8bd406e607 | ||
|
|
3ea53124e0 | ||
|
|
7f680886b0 | ||
|
|
6d2bc11365 | ||
|
|
b417cebc4e | ||
|
|
3113038beb | ||
|
|
096824b5ff | ||
|
|
2d75b93ba0 | ||
|
|
21ba4b5263 | ||
|
|
028c17fd48 | ||
|
|
07e113ce21 | ||
|
|
1016a983a1 | ||
|
|
17a1214e25 | ||
|
|
ad115b6c27 |
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -21,7 +21,7 @@ Provide a simple way for the reviewer to try out your changes.
|
||||
|
||||
Examples:
|
||||
```bash
|
||||
pytest -sx tests/test_stuff.py::test_something
|
||||
DATA_DIR=tests/data pytest -sx tests/test_stuff.py::test_something
|
||||
```
|
||||
```bash
|
||||
python lerobot/scripts/train.py --some.option=true
|
||||
|
||||
8
.github/workflows/nightly-tests.yml
vendored
8
.github/workflows/nightly-tests.yml
vendored
@@ -7,8 +7,10 @@ on:
|
||||
schedule:
|
||||
- cron: "0 2 * * *"
|
||||
|
||||
# env:
|
||||
env:
|
||||
DATA_DIR: tests/data
|
||||
# SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
|
||||
|
||||
jobs:
|
||||
run_all_tests_cpu:
|
||||
name: CPU
|
||||
@@ -28,9 +30,13 @@ jobs:
|
||||
working-directory: /lerobot
|
||||
steps:
|
||||
- name: Tests
|
||||
env:
|
||||
DATA_DIR: tests/data
|
||||
run: pytest -v --cov=./lerobot --disable-warnings tests
|
||||
|
||||
- name: Tests end-to-end
|
||||
env:
|
||||
DATA_DIR: tests/data
|
||||
run: make test-end-to-end
|
||||
|
||||
|
||||
|
||||
63
.github/workflows/test.yml
vendored
63
.github/workflows/test.yml
vendored
@@ -29,6 +29,7 @@ jobs:
|
||||
name: Pytest
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DATA_DIR: tests/data
|
||||
MUJOCO_GL: egl
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -69,6 +70,7 @@ jobs:
|
||||
name: Pytest (minimal install)
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DATA_DIR: tests/data
|
||||
MUJOCO_GL: egl
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -102,38 +104,39 @@ jobs:
|
||||
&& rm -rf tests/outputs outputs
|
||||
|
||||
# TODO(aliberts, rcadene): redesign after v2 migration / removing hydra
|
||||
# end-to-end:
|
||||
# name: End-to-end
|
||||
# runs-on: ubuntu-latest
|
||||
# env:
|
||||
# MUJOCO_GL: egl
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
# with:
|
||||
# lfs: true # Ensure LFS files are pulled
|
||||
end-to-end:
|
||||
name: End-to-end
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DATA_DIR: tests/data
|
||||
MUJOCO_GL: egl
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true # Ensure LFS files are pulled
|
||||
|
||||
# - name: Install apt dependencies
|
||||
# # portaudio19-dev is needed to install pyaudio
|
||||
# run: |
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y libegl1-mesa-dev portaudio19-dev
|
||||
- name: Install apt dependencies
|
||||
# portaudio19-dev is needed to install pyaudio
|
||||
run: |
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y libegl1-mesa-dev portaudio19-dev
|
||||
|
||||
# - name: Install poetry
|
||||
# run: |
|
||||
# pipx install poetry && poetry config virtualenvs.in-project true
|
||||
# echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
|
||||
- name: Install poetry
|
||||
run: |
|
||||
pipx install poetry && poetry config virtualenvs.in-project true
|
||||
echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
|
||||
|
||||
# - name: Set up Python 3.10
|
||||
# uses: actions/setup-python@v5
|
||||
# with:
|
||||
# python-version: "3.10"
|
||||
# cache: "poetry"
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: "poetry"
|
||||
|
||||
# - name: Install poetry dependencies
|
||||
# run: |
|
||||
# poetry install --all-extras
|
||||
- name: Install poetry dependencies
|
||||
run: |
|
||||
poetry install --all-extras
|
||||
|
||||
# - name: Test end-to-end
|
||||
# run: |
|
||||
# make test-end-to-end \
|
||||
# && rm -rf outputs
|
||||
- name: Test end-to-end
|
||||
run: |
|
||||
make test-end-to-end \
|
||||
&& rm -rf outputs
|
||||
|
||||
@@ -3,7 +3,7 @@ default_language_version:
|
||||
python: python3.10
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: debug-statements
|
||||
@@ -14,11 +14,11 @@ repos:
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/asottile/pyupgrade
|
||||
rev: v3.19.0
|
||||
rev: v3.16.0
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.8.2
|
||||
rev: v0.5.2
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix]
|
||||
@@ -32,6 +32,6 @@ repos:
|
||||
- "--check"
|
||||
- "--no-update"
|
||||
- repo: https://github.com/gitleaks/gitleaks
|
||||
rev: v8.21.2
|
||||
rev: v8.18.4
|
||||
hooks:
|
||||
- id: gitleaks
|
||||
|
||||
@@ -267,7 +267,7 @@ We use `pytest` in order to run the tests. From the root of the
|
||||
repository, here's how to run tests with `pytest` for the library:
|
||||
|
||||
```bash
|
||||
python -m pytest -sv ./tests
|
||||
DATA_DIR="tests/data" python -m pytest -sv ./tests
|
||||
```
|
||||
|
||||
|
||||
|
||||
14
README.md
14
README.md
@@ -153,12 +153,10 @@ python lerobot/scripts/visualize_dataset.py \
|
||||
--episode-index 0
|
||||
```
|
||||
|
||||
or from a dataset in a local folder with the `root` option and the `--local-files-only` (in the following case the dataset will be searched for in `./my_local_data_dir/lerobot/pusht`)
|
||||
or from a dataset in a local folder with the root `DATA_DIR` environment variable (in the following case the dataset will be searched for in `./my_local_data_dir/lerobot/pusht`)
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset.py \
|
||||
DATA_DIR='./my_local_data_dir' python lerobot/scripts/visualize_dataset.py \
|
||||
--repo-id lerobot/pusht \
|
||||
--root ./my_local_data_dir \
|
||||
--local-files-only 1 \
|
||||
--episode-index 0
|
||||
```
|
||||
|
||||
@@ -210,10 +208,12 @@ dataset attributes:
|
||||
|
||||
A `LeRobotDataset` is serialised using several widespread file formats for each of its parts, namely:
|
||||
- hf_dataset stored using Hugging Face datasets library serialization to parquet
|
||||
- videos are stored in mp4 format to save space
|
||||
- metadata are stored in plain json/jsonl files
|
||||
- videos are stored in mp4 format to save space or png files
|
||||
- episode_data_index saved using `safetensor` tensor serialization format
|
||||
- stats saved using `safetensor` tensor serialization format
|
||||
- info are saved using JSON
|
||||
|
||||
Dataset can be uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can use the `local_files_only` argument and specify its location with the `root` argument if it's not in the default `~/.cache/huggingface/lerobot` location.
|
||||
Dataset can be uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can set the `DATA_DIR` environment variable to your root dataset folder as illustrated in the above section on dataset visualization.
|
||||
|
||||
### Evaluate a pretrained policy
|
||||
|
||||
|
||||
@@ -192,6 +192,7 @@ Record 2 episodes and upload your dataset to the hub:
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/so100.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/so100_test \
|
||||
--tags so100 tutorial \
|
||||
--warmup-time-s 5 \
|
||||
@@ -211,6 +212,7 @@ echo ${HF_USER}/so100_test
|
||||
If you didn't upload with `--push-to-hub 0`, you can also visualize it locally with:
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset_html.py \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/so100_test
|
||||
```
|
||||
|
||||
@@ -218,9 +220,10 @@ python lerobot/scripts/visualize_dataset_html.py \
|
||||
|
||||
Now try to replay the first episode on your robot:
|
||||
```bash
|
||||
python lerobot/scripts/control_robot.py replay \
|
||||
DATA_DIR=data python lerobot/scripts/control_robot.py replay \
|
||||
--robot-path lerobot/configs/robot/so100.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/so100_test \
|
||||
--episode 0
|
||||
```
|
||||
@@ -229,7 +232,7 @@ python lerobot/scripts/control_robot.py replay \
|
||||
|
||||
To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
DATA_DIR=data python lerobot/scripts/train.py \
|
||||
dataset_repo_id=${HF_USER}/so100_test \
|
||||
policy=act_so100_real \
|
||||
env=so100_real \
|
||||
@@ -245,6 +248,7 @@ Let's explain it:
|
||||
3. We provided an environment as argument with `env=so100_real`. This loads configurations from [`lerobot/configs/env/so100_real.yaml`](../lerobot/configs/env/so100_real.yaml).
|
||||
4. We provided `device=cuda` since we are training on a Nvidia GPU, but you can also use `device=mps` if you are using a Mac with Apple silicon, or `device=cpu` otherwise.
|
||||
5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
|
||||
6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync.
|
||||
|
||||
Training should take several hours. You will find checkpoints in `outputs/train/act_so100_test/checkpoints`.
|
||||
|
||||
@@ -255,6 +259,7 @@ You can use the `record` function from [`lerobot/scripts/control_robot.py`](../l
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/so100.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/eval_act_so100_test \
|
||||
--tags so100 tutorial eval \
|
||||
--warmup-time-s 5 \
|
||||
|
||||
@@ -192,6 +192,7 @@ Record 2 episodes and upload your dataset to the hub:
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/moss.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/moss_test \
|
||||
--tags moss tutorial \
|
||||
--warmup-time-s 5 \
|
||||
@@ -211,6 +212,7 @@ echo ${HF_USER}/moss_test
|
||||
If you didn't upload with `--push-to-hub 0`, you can also visualize it locally with:
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset_html.py \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/moss_test
|
||||
```
|
||||
|
||||
@@ -218,9 +220,10 @@ python lerobot/scripts/visualize_dataset_html.py \
|
||||
|
||||
Now try to replay the first episode on your robot:
|
||||
```bash
|
||||
python lerobot/scripts/control_robot.py replay \
|
||||
DATA_DIR=data python lerobot/scripts/control_robot.py replay \
|
||||
--robot-path lerobot/configs/robot/moss.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/moss_test \
|
||||
--episode 0
|
||||
```
|
||||
@@ -229,7 +232,7 @@ python lerobot/scripts/control_robot.py replay \
|
||||
|
||||
To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
DATA_DIR=data python lerobot/scripts/train.py \
|
||||
dataset_repo_id=${HF_USER}/moss_test \
|
||||
policy=act_moss_real \
|
||||
env=moss_real \
|
||||
@@ -245,6 +248,7 @@ Let's explain it:
|
||||
3. We provided an environment as argument with `env=moss_real`. This loads configurations from [`lerobot/configs/env/moss_real.yaml`](../lerobot/configs/env/moss_real.yaml).
|
||||
4. We provided `device=cuda` since we are training on a Nvidia GPU, but you can also use `device=mps` if you are using a Mac with Apple silicon, or `device=cpu` otherwise.
|
||||
5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
|
||||
6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync.
|
||||
|
||||
Training should take several hours. You will find checkpoints in `outputs/train/act_moss_test/checkpoints`.
|
||||
|
||||
@@ -255,6 +259,7 @@ You can use the `record` function from [`lerobot/scripts/control_robot.py`](../l
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/moss.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/eval_act_moss_test \
|
||||
--tags moss tutorial eval \
|
||||
--warmup-time-s 5 \
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
# Training a HIL-SERL Reward Classifier with LeRobot
|
||||
|
||||
This tutorial provides step-by-step instructions for training a reward classifier using LeRobot.
|
||||
|
||||
---
|
||||
|
||||
## Training Script Overview
|
||||
|
||||
LeRobot includes a ready-to-use training script located at [`lerobot/scripts/train_hilserl_classifier.py`](../../lerobot/scripts/train_hilserl_classifier.py). Here's an outline of its workflow:
|
||||
|
||||
1. **Configuration Loading**
|
||||
The script uses Hydra to load a configuration file for subsequent steps. (Details on Hydra follow below.)
|
||||
|
||||
2. **Dataset Initialization**
|
||||
It loads a `LeRobotDataset` containing images and rewards. To optimize performance, a weighted random sampler is used to balance class sampling.
|
||||
|
||||
3. **Classifier Initialization**
|
||||
A lightweight classification head is built on top of a frozen, pretrained image encoder from HuggingFace. The classifier outputs either:
|
||||
- A single probability (binary classification), or
|
||||
- Logits (multi-class classification).
|
||||
|
||||
4. **Training Loop Execution**
|
||||
The script performs:
|
||||
- Forward and backward passes,
|
||||
- Optimization steps,
|
||||
- Periodic logging, evaluation, and checkpoint saving.
|
||||
|
||||
---
|
||||
|
||||
## Configuring with Hydra
|
||||
|
||||
For detailed information about Hydra usage, refer to [`examples/4_train_policy_with_script.md`](../examples/4_train_policy_with_script.md). However, note that training the reward classifier differs slightly and requires a separate configuration file.
|
||||
|
||||
### Config File Setup
|
||||
|
||||
The default `default.yaml` cannot launch the reward classifier training directly. Instead, you need a configuration file like [`lerobot/configs/policy/hilserl_classifier.yaml`](../../lerobot/configs/policy/hilserl_classifier.yaml), with the following adjustment:
|
||||
|
||||
Replace the `dataset_repo_id` field with the identifier for your dataset, which contains images and sparse rewards:
|
||||
|
||||
```yaml
|
||||
# Example: lerobot/configs/policy/reward_classifier.yaml
|
||||
dataset_repo_id: "my_dataset_repo_id"
|
||||
## Typical logs and metrics
|
||||
```
|
||||
When you start the training process, you will first see your full configuration being printed in the terminal. You can check it to make sure that you config it correctly and your config is not overrided by other files. The final configuration will also be saved with the checkpoint.
|
||||
|
||||
After that, you will see training log like this one:
|
||||
|
||||
```
|
||||
[2024-11-29 18:26:36,999][root][INFO] -
|
||||
Epoch 5/5
|
||||
Training: 82%|██████████████████████████████████████████████████████████████████████████████▋ | 91/111 [00:50<00:09, 2.04it/s, loss=0.2999, acc=69.99%]
|
||||
```
|
||||
|
||||
or evaluation log like:
|
||||
|
||||
```
|
||||
Validation: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:20<00:00, 1.37it/s]
|
||||
```
|
||||
|
||||
### Metrics Tracking with Weights & Biases (WandB)
|
||||
|
||||
If `wandb.enable` is set to `true`, the training and evaluation logs will also be saved in WandB. This allows you to track key metrics in real-time, including:
|
||||
|
||||
- **Training Metrics**:
|
||||
- `train/accuracy`
|
||||
- `train/loss`
|
||||
- `train/dataloading_s`
|
||||
- **Evaluation Metrics**:
|
||||
- `eval/accuracy`
|
||||
- `eval/loss`
|
||||
- `eval/eval_s`
|
||||
|
||||
#### Additional Features
|
||||
|
||||
You can also log sample predictions during evaluation. Each logged sample will include:
|
||||
|
||||
- The **input image**.
|
||||
- The **predicted label**.
|
||||
- The **true label**.
|
||||
- The **classifier's "confidence" (logits/probability)**.
|
||||
|
||||
These logs can be useful for diagnosing and debugging performance issues.
|
||||
@@ -10,10 +10,10 @@ from torchvision.transforms import ToPILImage, v2
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
dataset_repo_id = "lerobot/aloha_static_screw_driver"
|
||||
dataset_repo_id = "lerobot/aloha_static_tape"
|
||||
|
||||
# Create a LeRobotDataset with no transformations
|
||||
dataset = LeRobotDataset(dataset_repo_id, episodes=[0])
|
||||
dataset = LeRobotDataset(dataset_repo_id)
|
||||
# This is equivalent to `dataset = LeRobotDataset(dataset_repo_id, image_transforms=None)`
|
||||
|
||||
# Get the index of the first observation in the first episode
|
||||
@@ -28,13 +28,12 @@ transforms = v2.Compose(
|
||||
[
|
||||
v2.ColorJitter(brightness=(0.5, 1.5)),
|
||||
v2.ColorJitter(contrast=(0.5, 1.5)),
|
||||
v2.ColorJitter(hue=(-0.1, 0.1)),
|
||||
v2.RandomAdjustSharpness(sharpness_factor=2, p=1),
|
||||
]
|
||||
)
|
||||
|
||||
# Create another LeRobotDataset with the defined transformations
|
||||
transformed_dataset = LeRobotDataset(dataset_repo_id, episodes=[0], image_transforms=transforms)
|
||||
transformed_dataset = LeRobotDataset(dataset_repo_id, image_transforms=transforms)
|
||||
|
||||
# Get a frame from the transformed dataset
|
||||
transformed_frame = transformed_dataset[first_idx][transformed_dataset.meta.camera_keys[0]]
|
||||
|
||||
@@ -29,7 +29,7 @@ For a visual walkthrough of the assembly process, you can refer to [this video t
|
||||
|
||||
## 2. Configure motors, calibrate arms, teleoperate your Koch v1.1
|
||||
|
||||
First, install the additional dependencies required for robots built with dynamixel motors like Koch v1.1 by running one of the following commands (make sure gcc is installed).
|
||||
First, install the additional dependencies required for robots built with dynamixel motors like Koch v1.1 by running one of the following commands.
|
||||
|
||||
Using `pip`:
|
||||
```bash
|
||||
@@ -778,6 +778,7 @@ Now run this to record 2 episodes:
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/koch.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/koch_test \
|
||||
--tags tutorial \
|
||||
--warmup-time-s 5 \
|
||||
@@ -786,7 +787,7 @@ python lerobot/scripts/control_robot.py record \
|
||||
--num-episodes 2
|
||||
```
|
||||
|
||||
This will write your dataset locally to `~/.cache/huggingface/lerobot/{repo-id}` (e.g. `data/cadene/koch_test`) and push it on the hub at `https://huggingface.co/datasets/{HF_USER}/{repo-id}`. Your dataset will be automatically tagged with `LeRobot` for the community to find it easily, and you can also add custom tags (in this case `tutorial` for example).
|
||||
This will write your dataset locally to `{root}/{repo-id}` (e.g. `data/cadene/koch_test`) and push it on the hub at `https://huggingface.co/datasets/{HF_USER}/{repo-id}`. Your dataset will be automatically tagged with `LeRobot` for the community to find it easily, and you can also add custom tags (in this case `tutorial` for example).
|
||||
|
||||
You can look for other LeRobot datasets on the hub by searching for `LeRobot` tags: https://huggingface.co/datasets?other=LeRobot
|
||||
|
||||
@@ -839,6 +840,7 @@ In the coming months, we plan to release a foundational model for robotics. We a
|
||||
You can visualize your dataset by running:
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset_html.py \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/koch_test
|
||||
```
|
||||
|
||||
@@ -856,6 +858,7 @@ To replay the first episode of the dataset you just recorded, run the following
|
||||
python lerobot/scripts/control_robot.py replay \
|
||||
--robot-path lerobot/configs/robot/koch.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/koch_test \
|
||||
--episode 0
|
||||
```
|
||||
@@ -868,7 +871,7 @@ Your robot should replicate movements similar to those you recorded. For example
|
||||
|
||||
To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
DATA_DIR=data python lerobot/scripts/train.py \
|
||||
dataset_repo_id=${HF_USER}/koch_test \
|
||||
policy=act_koch_real \
|
||||
env=koch_real \
|
||||
@@ -915,6 +918,7 @@ env:
|
||||
It should match your dataset (e.g. `fps: 30`) and your robot (e.g. `state_dim: 6` and `action_dim: 6`). We are still working on simplifying this in future versions of `lerobot`.
|
||||
4. We provided `device=cuda` since we are training on a Nvidia GPU, but you could use `device=mps` to train on Apple silicon.
|
||||
5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
|
||||
6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync.
|
||||
|
||||
For more information on the `train` script see the previous tutorial: [`examples/4_train_policy_with_script.md`](../examples/4_train_policy_with_script.md)
|
||||
|
||||
@@ -987,6 +991,7 @@ To this end, you can use the `record` function from [`lerobot/scripts/control_ro
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/koch.yaml \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/eval_koch_test \
|
||||
--tags tutorial eval \
|
||||
--warmup-time-s 5 \
|
||||
@@ -1005,6 +1010,7 @@ As you can see, it's almost the same command as previously used to record your t
|
||||
You can then visualize your evaluation dataset by running the same command as before but with the new inference dataset as argument:
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset.py \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/eval_koch_test
|
||||
```
|
||||
|
||||
|
||||
@@ -128,6 +128,7 @@ Record one episode:
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/stretch.yaml \
|
||||
--fps 20 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/stretch_test \
|
||||
--tags stretch tutorial \
|
||||
--warmup-time-s 3 \
|
||||
@@ -145,6 +146,7 @@ Now try to replay this episode (make sure the robot's initial position is the sa
|
||||
python lerobot/scripts/control_robot.py replay \
|
||||
--robot-path lerobot/configs/robot/stretch.yaml \
|
||||
--fps 20 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/stretch_test \
|
||||
--episode 0
|
||||
```
|
||||
|
||||
@@ -56,7 +56,7 @@ python lerobot/scripts/control_robot.py teleoperate \
|
||||
--robot-overrides max_relative_target=5
|
||||
```
|
||||
|
||||
By adding `--robot-overrides max_relative_target=5`, we override the default value for `max_relative_target` defined in `lerobot/configs/robot/aloha.yaml`. It is expected to be `5` to limit the magnitude of the movement for more safety, but the teleoperation won't be smooth. When you feel confident, you can disable this limit by adding `--robot-overrides max_relative_target=null` to the command line:
|
||||
By adding `--robot-overrides max_relative_target=5`, we override the default value for `max_relative_target` defined in `lerobot/configs/robot/aloha.yaml`. It is expected to be `5` to limit the magnitude of the movement for more safety, but the teloperation won't be smooth. When you feel confident, you can disable this limit by adding `--robot-overrides max_relative_target=null` to the command line:
|
||||
```bash
|
||||
python lerobot/scripts/control_robot.py teleoperate \
|
||||
--robot-path lerobot/configs/robot/aloha.yaml \
|
||||
@@ -84,6 +84,7 @@ python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/aloha.yaml \
|
||||
--robot-overrides max_relative_target=null \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/aloha_test \
|
||||
--tags aloha tutorial \
|
||||
--warmup-time-s 5 \
|
||||
@@ -103,6 +104,7 @@ echo ${HF_USER}/aloha_test
|
||||
If you didn't upload with `--push-to-hub 0`, you can also visualize it locally with:
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset_html.py \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/aloha_test
|
||||
```
|
||||
|
||||
@@ -117,6 +119,7 @@ python lerobot/scripts/control_robot.py replay \
|
||||
--robot-path lerobot/configs/robot/aloha.yaml \
|
||||
--robot-overrides max_relative_target=null \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/aloha_test \
|
||||
--episode 0
|
||||
```
|
||||
@@ -125,7 +128,7 @@ python lerobot/scripts/control_robot.py replay \
|
||||
|
||||
To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
DATA_DIR=data python lerobot/scripts/train.py \
|
||||
dataset_repo_id=${HF_USER}/aloha_test \
|
||||
policy=act_aloha_real \
|
||||
env=aloha_real \
|
||||
@@ -141,6 +144,7 @@ Let's explain it:
|
||||
3. We provided an environment as argument with `env=aloha_real`. This loads configurations from [`lerobot/configs/env/aloha_real.yaml`](../lerobot/configs/env/aloha_real.yaml). Note: this yaml defines 18 dimensions for the `state_dim` and `action_dim`, corresponding to 18 motors, not 14 motors as used in previous Aloha work. This is because, we include the `shoulder_shadow` and `elbow_shadow` motors for simplicity.
|
||||
4. We provided `device=cuda` since we are training on a Nvidia GPU.
|
||||
5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
|
||||
6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync.
|
||||
|
||||
Training should take several hours. You will find checkpoints in `outputs/train/act_aloha_test/checkpoints`.
|
||||
|
||||
@@ -152,6 +156,7 @@ python lerobot/scripts/control_robot.py record \
|
||||
--robot-path lerobot/configs/robot/aloha.yaml \
|
||||
--robot-overrides max_relative_target=null \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id ${HF_USER}/eval_act_aloha_test \
|
||||
--tags aloha tutorial eval \
|
||||
--warmup-time-s 5 \
|
||||
|
||||
@@ -63,7 +63,7 @@ def build_features(mode: str) -> dict:
|
||||
return features
|
||||
|
||||
|
||||
def load_raw_dataset(zarr_path: Path):
|
||||
def load_raw_dataset(zarr_path: Path, load_images: bool = True):
|
||||
try:
|
||||
from lerobot.common.datasets.push_dataset_to_hub._diffusion_policy_replay_buffer import (
|
||||
ReplayBuffer as DiffusionPolicyReplayBuffer,
|
||||
|
||||
@@ -28,7 +28,7 @@ def safe_stop_image_writer(func):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
dataset = kwargs.get("dataset")
|
||||
dataset = kwargs.get("dataset", None)
|
||||
image_writer = getattr(dataset, "image_writer", None) if dataset else None
|
||||
if image_writer is not None:
|
||||
print("Waiting for image writer to terminate...")
|
||||
|
||||
@@ -291,14 +291,14 @@ class LeRobotDatasetMetadata:
|
||||
obj.root.mkdir(parents=True, exist_ok=False)
|
||||
|
||||
if robot is not None:
|
||||
features = {**(features or {}), **get_features_from_robot(robot)}
|
||||
features = get_features_from_robot(robot, use_videos)
|
||||
robot_type = robot.robot_type
|
||||
if not all(cam.fps == fps for cam in robot.cameras.values()):
|
||||
logging.warning(
|
||||
f"Some cameras in your {robot.robot_type} robot don't have an fps matching the fps of your dataset."
|
||||
"In this case, frames from lower fps cameras will be repeated to fill in the blanks."
|
||||
)
|
||||
elif features is None:
|
||||
elif robot_type is None or features is None:
|
||||
raise ValueError(
|
||||
"Dataset features must either come from a Robot or explicitly passed upon creation."
|
||||
)
|
||||
|
||||
639
lerobot/common/datasets/push_dataset_to_hub/openx/configs.yaml
Normal file
639
lerobot/common/datasets/push_dataset_to_hub/openx/configs.yaml
Normal file
@@ -0,0 +1,639 @@
|
||||
OPENX_DATASET_CONFIGS:
|
||||
fractal20220817_data:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- base_pose_tool_reached
|
||||
- gripper_closed
|
||||
fps: 3
|
||||
|
||||
kuka:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- clip_function_input/base_pose_tool_reached
|
||||
- gripper_closed
|
||||
fps: 10
|
||||
|
||||
bridge_openx:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- EEF_state
|
||||
- gripper_state
|
||||
fps: 5
|
||||
|
||||
taco_play:
|
||||
image_obs_keys:
|
||||
- rgb_static
|
||||
- rgb_gripper
|
||||
depth_obs_keys:
|
||||
- depth_static
|
||||
- depth_gripper
|
||||
state_obs_keys:
|
||||
- state_eef
|
||||
- state_gripper
|
||||
fps: 15
|
||||
|
||||
jaco_play:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- image_wrist
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state_eef
|
||||
- state_gripper
|
||||
fps: 10
|
||||
|
||||
berkeley_cable_routing:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- top_image
|
||||
- wrist45_image
|
||||
- wrist225_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- robot_state
|
||||
fps: 10
|
||||
|
||||
roboturk:
|
||||
image_obs_keys:
|
||||
- front_rgb
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- null
|
||||
fps: 10
|
||||
|
||||
nyu_door_opening_surprising_effectiveness:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- null
|
||||
fps: 3
|
||||
|
||||
viola:
|
||||
image_obs_keys:
|
||||
- agentview_rgb
|
||||
- eye_in_hand_rgb
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- joint_states
|
||||
- gripper_states
|
||||
fps: 20
|
||||
|
||||
berkeley_autolab_ur5:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- hand_image
|
||||
depth_obs_keys:
|
||||
- image_with_depth
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 5
|
||||
|
||||
toto:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 30
|
||||
|
||||
language_table:
|
||||
image_obs_keys:
|
||||
- rgb
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- effector_translation
|
||||
fps: 10
|
||||
|
||||
columbia_cairlab_pusht_real:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- robot_state
|
||||
fps: 10
|
||||
|
||||
stanford_kuka_multimodal_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- depth_image
|
||||
state_obs_keys:
|
||||
- ee_position
|
||||
- ee_orientation
|
||||
fps: 20
|
||||
|
||||
nyu_rot_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 3
|
||||
|
||||
io_ai_tech:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- image_fisheye
|
||||
- image_left_side
|
||||
- image_right_side
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 3
|
||||
|
||||
stanford_hydra_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 10
|
||||
|
||||
austin_buds_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 20
|
||||
|
||||
nyu_franka_play_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- image_additional_view
|
||||
depth_obs_keys:
|
||||
- depth
|
||||
- depth_additional_view
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
fps: 3
|
||||
|
||||
maniskill_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- depth
|
||||
- wrist_depth
|
||||
state_obs_keys:
|
||||
- tcp_pose
|
||||
- gripper_state
|
||||
fps: 20
|
||||
|
||||
furniture_bench_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 10
|
||||
|
||||
cmu_franka_exploration_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- highres_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- null
|
||||
fps: 10
|
||||
|
||||
ucsd_kitchen_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- joint_state
|
||||
fps: 2
|
||||
|
||||
ucsd_pick_and_place_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 3
|
||||
|
||||
spoc:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- image_manipulation
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- null
|
||||
fps: 3
|
||||
|
||||
austin_sailor_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 20
|
||||
|
||||
austin_sirius_dataset_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 20
|
||||
|
||||
bc_z:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- present/xyz
|
||||
- present/axis_angle
|
||||
- present/sensed_close
|
||||
fps: 10
|
||||
|
||||
utokyo_pr2_opening_fridge_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 10
|
||||
|
||||
utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 10
|
||||
|
||||
utokyo_xarm_pick_and_place_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- image2
|
||||
- hand_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- end_effector_pose
|
||||
fps: 10
|
||||
|
||||
utokyo_xarm_bimanual_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- pose_r
|
||||
fps: 10
|
||||
|
||||
robo_net:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- image1
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 1
|
||||
|
||||
robo_set:
|
||||
image_obs_keys:
|
||||
- image_left
|
||||
- image_right
|
||||
- image_wrist
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
- state_velocity
|
||||
fps: 5
|
||||
|
||||
berkeley_mvp_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- hand_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- gripper
|
||||
- pose
|
||||
- joint_pos
|
||||
fps: 5
|
||||
|
||||
berkeley_rpt_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- hand_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- joint_pos
|
||||
- gripper
|
||||
fps: 30
|
||||
|
||||
kaist_nonprehensile_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 10
|
||||
|
||||
stanford_mask_vit_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
|
||||
tokyo_u_lsmo_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 10
|
||||
|
||||
dlr_sara_pour_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 10
|
||||
|
||||
dlr_sara_grid_clamp_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 10
|
||||
|
||||
dlr_edan_shared_control_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 5
|
||||
|
||||
asu_table_top_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 12.5
|
||||
|
||||
stanford_robocook_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image_1
|
||||
- image_2
|
||||
depth_obs_keys:
|
||||
- depth_1
|
||||
- depth_2
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 5
|
||||
|
||||
imperialcollege_sawyer_wrist_cam:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 10
|
||||
|
||||
iamlab_cmu_pickup_insert_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- joint_state
|
||||
- gripper_state
|
||||
fps: 20
|
||||
|
||||
uiuc_d3field:
|
||||
image_obs_keys:
|
||||
- image_1
|
||||
- image_2
|
||||
depth_obs_keys:
|
||||
- depth_1
|
||||
- depth_2
|
||||
state_obs_keys:
|
||||
- null
|
||||
fps: 1
|
||||
|
||||
utaustin_mutex:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 20
|
||||
|
||||
berkeley_fanuc_manipulation:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- joint_state
|
||||
- gripper_state
|
||||
fps: 10
|
||||
|
||||
cmu_playing_with_food:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- finger_vision_1
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 10
|
||||
|
||||
cmu_play_fusion:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 5
|
||||
|
||||
cmu_stretch:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- eef_state
|
||||
- gripper_state
|
||||
fps: 10
|
||||
|
||||
berkeley_gnm_recon:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
- position
|
||||
- yaw
|
||||
fps: 3
|
||||
|
||||
berkeley_gnm_cory_hall:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
- position
|
||||
- yaw
|
||||
fps: 5
|
||||
|
||||
berkeley_gnm_sac_son:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
- position
|
||||
- yaw
|
||||
fps: 10
|
||||
|
||||
droid:
|
||||
image_obs_keys:
|
||||
- exterior_image_1_left
|
||||
- exterior_image_2_left
|
||||
- wrist_image_left
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- proprio
|
||||
fps: 15
|
||||
|
||||
droid_100:
|
||||
image_obs_keys:
|
||||
- exterior_image_1_left
|
||||
- exterior_image_2_left
|
||||
- wrist_image_left
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- proprio
|
||||
fps: 15
|
||||
|
||||
fmb:
|
||||
image_obs_keys:
|
||||
- image_side_1
|
||||
- image_side_2
|
||||
- image_wrist_1
|
||||
- image_wrist_2
|
||||
depth_obs_keys:
|
||||
- image_side_1_depth
|
||||
- image_side_2_depth
|
||||
- image_wrist_1_depth
|
||||
- image_wrist_2_depth
|
||||
state_obs_keys:
|
||||
- proprio
|
||||
fps: 10
|
||||
|
||||
dobbe:
|
||||
image_obs_keys:
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- proprio
|
||||
fps: 3.75
|
||||
|
||||
usc_cloth_sim_converted_externally_to_rlds:
|
||||
image_obs_keys:
|
||||
- image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- null
|
||||
fps: 10
|
||||
|
||||
plex_robosuite:
|
||||
image_obs_keys:
|
||||
- image
|
||||
- wrist_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 20
|
||||
|
||||
conq_hose_manipulation:
|
||||
image_obs_keys:
|
||||
- frontleft_fisheye_image
|
||||
- frontright_fisheye_image
|
||||
- hand_color_image
|
||||
depth_obs_keys:
|
||||
- null
|
||||
state_obs_keys:
|
||||
- state
|
||||
fps: 30
|
||||
106
lerobot/common/datasets/push_dataset_to_hub/openx/data_utils.py
Normal file
106
lerobot/common/datasets/push_dataset_to_hub/openx/data_utils.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the Licens e.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
NOTE(YL): Adapted from:
|
||||
Octo: https://github.com/octo-models/octo/blob/main/octo/data/utils/data_utils.py
|
||||
|
||||
data_utils.py
|
||||
|
||||
Additional utils for data processing.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def binarize_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
|
||||
"""
|
||||
Converts gripper actions from continuous to binary values (0 and 1).
|
||||
|
||||
We exploit that fact that most of the time, the gripper is fully open (near 1.0) or fully closed (near 0.0). As it
|
||||
transitions between the two, it sometimes passes through a few intermediate values. We relabel those intermediate
|
||||
values based on the state that is reached _after_ those intermediate values.
|
||||
|
||||
In the edge case that the trajectory ends with an intermediate value, we give up on binarizing and relabel that
|
||||
chunk of intermediate values as the last action in the trajectory.
|
||||
|
||||
The `scan_fn` implements the following logic:
|
||||
new_actions = np.empty_like(actions)
|
||||
carry = actions[-1]
|
||||
for i in reversed(range(actions.shape[0])):
|
||||
if in_between_mask[i]:
|
||||
carry = carry
|
||||
else:
|
||||
carry = float(open_mask[i])
|
||||
new_actions[i] = carry
|
||||
"""
|
||||
open_mask, closed_mask = actions > 0.95, actions < 0.05
|
||||
in_between_mask = tf.logical_not(tf.logical_or(open_mask, closed_mask))
|
||||
is_open_float = tf.cast(open_mask, tf.float32)
|
||||
|
||||
def scan_fn(carry, i):
|
||||
return tf.cond(in_between_mask[i], lambda: tf.cast(carry, tf.float32), lambda: is_open_float[i])
|
||||
|
||||
return tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), actions[-1], reverse=True)
|
||||
|
||||
|
||||
def invert_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
|
||||
return 1 - actions
|
||||
|
||||
|
||||
def rel2abs_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
|
||||
"""
|
||||
Converts relative gripper actions (+1 for closing, -1 for opening) to absolute actions (0 = closed; 1 = open).
|
||||
|
||||
Assumes that the first relative gripper is not redundant (i.e. close when already closed)!
|
||||
"""
|
||||
# Note =>> -1 for closing, 1 for opening, 0 for no change
|
||||
opening_mask, closing_mask = actions < -0.1, actions > 0.1
|
||||
thresholded_actions = tf.where(opening_mask, 1, tf.where(closing_mask, -1, 0))
|
||||
|
||||
def scan_fn(carry, i):
|
||||
return tf.cond(thresholded_actions[i] == 0, lambda: carry, lambda: thresholded_actions[i])
|
||||
|
||||
# If no relative grasp, assumes open for whole trajectory
|
||||
start = -1 * thresholded_actions[tf.argmax(thresholded_actions != 0, axis=0)]
|
||||
start = tf.cond(start == 0, lambda: 1, lambda: start)
|
||||
|
||||
# Note =>> -1 for closed, 1 for open
|
||||
new_actions = tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), start)
|
||||
new_actions = tf.cast(new_actions, tf.float32) / 2 + 0.5
|
||||
|
||||
return new_actions
|
||||
|
||||
|
||||
# === Bridge-V2 =>> Dataset-Specific Transform ===
|
||||
def relabel_bridge_actions(traj: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Relabels actions to use reached proprioceptive state; discards last timestep (no-action)."""
|
||||
movement_actions = traj["observation"]["state"][1:, :6] - traj["observation"]["state"][:-1, :6]
|
||||
traj_truncated = tf.nest.map_structure(lambda x: x[:-1], traj)
|
||||
traj_truncated["action"] = tf.concat([movement_actions, traj["action"][:-1, -1:]], axis=1)
|
||||
|
||||
return traj_truncated
|
||||
|
||||
|
||||
# === RLDS Dataset Initialization Utilities ===
|
||||
def pprint_data_mixture(dataset_kwargs_list: List[Dict[str, Any]], dataset_weights: List[int]) -> None:
|
||||
print("\n######################################################################################")
|
||||
print(f"# Loading the following {len(dataset_kwargs_list)} datasets (incl. sampling weight):{'': >24} #")
|
||||
for dataset_kwargs, weight in zip(dataset_kwargs_list, dataset_weights, strict=False):
|
||||
pad = 80 - len(dataset_kwargs["name"])
|
||||
print(f"# {dataset_kwargs['name']}: {weight:=>{pad}f} #")
|
||||
print("######################################################################################\n")
|
||||
200
lerobot/common/datasets/push_dataset_to_hub/openx/droid_utils.py
Normal file
200
lerobot/common/datasets/push_dataset_to_hub/openx/droid_utils.py
Normal file
@@ -0,0 +1,200 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
NOTE(YL): Adapted from:
|
||||
OpenVLA: https://github.com/openvla/openvla
|
||||
|
||||
Episode transforms for DROID dataset.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
import tensorflow as tf
|
||||
import tensorflow_graphics.geometry.transformation as tfg
|
||||
|
||||
|
||||
def rmat_to_euler(rot_mat):
|
||||
return tfg.euler.from_rotation_matrix(rot_mat)
|
||||
|
||||
|
||||
def euler_to_rmat(euler):
|
||||
return tfg.rotation_matrix_3d.from_euler(euler)
|
||||
|
||||
|
||||
def invert_rmat(rot_mat):
|
||||
return tfg.rotation_matrix_3d.inverse(rot_mat)
|
||||
|
||||
|
||||
def rotmat_to_rot6d(mat):
|
||||
"""
|
||||
Converts rotation matrix to R6 rotation representation (first two rows in rotation matrix).
|
||||
Args:
|
||||
mat: rotation matrix
|
||||
|
||||
Returns: 6d vector (first two rows of rotation matrix)
|
||||
|
||||
"""
|
||||
r6 = mat[..., :2, :]
|
||||
r6_0, r6_1 = r6[..., 0, :], r6[..., 1, :]
|
||||
r6_flat = tf.concat([r6_0, r6_1], axis=-1)
|
||||
return r6_flat
|
||||
|
||||
|
||||
def velocity_act_to_wrist_frame(velocity, wrist_in_robot_frame):
|
||||
"""
|
||||
Translates velocity actions (translation + rotation) from base frame of the robot to wrist frame.
|
||||
Args:
|
||||
velocity: 6d velocity action (3 x translation, 3 x rotation)
|
||||
wrist_in_robot_frame: 6d pose of the end-effector in robot base frame
|
||||
|
||||
Returns: 9d velocity action in robot wrist frame (3 x translation, 6 x rotation as R6)
|
||||
|
||||
"""
|
||||
r_frame = euler_to_rmat(wrist_in_robot_frame[:, 3:6])
|
||||
r_frame_inv = invert_rmat(r_frame)
|
||||
|
||||
# world to wrist: dT_pi = R^-1 dT_rbt
|
||||
vel_t = (r_frame_inv @ velocity[:, :3][..., None])[..., 0]
|
||||
|
||||
# world to wrist: dR_pi = R^-1 dR_rbt R
|
||||
dr_ = euler_to_rmat(velocity[:, 3:6])
|
||||
dr_ = r_frame_inv @ (dr_ @ r_frame)
|
||||
dr_r6 = rotmat_to_rot6d(dr_)
|
||||
return tf.concat([vel_t, dr_r6], axis=-1)
|
||||
|
||||
|
||||
def rand_swap_exterior_images(img1, img2):
|
||||
"""
|
||||
Randomly swaps the two exterior images (for training with single exterior input).
|
||||
"""
|
||||
return tf.cond(tf.random.uniform(shape=[]) > 0.5, lambda: (img1, img2), lambda: (img2, img1))
|
||||
|
||||
|
||||
def droid_baseact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
DROID dataset transformation for actions expressed in *base* frame of the robot.
|
||||
"""
|
||||
dt = trajectory["action_dict"]["cartesian_velocity"][:, :3]
|
||||
dr_ = trajectory["action_dict"]["cartesian_velocity"][:, 3:6]
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
dt,
|
||||
dr_,
|
||||
1 - trajectory["action_dict"]["gripper_position"],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["observation"]["exterior_image_1_left"], trajectory["observation"]["exterior_image_2_left"] = (
|
||||
rand_swap_exterior_images(
|
||||
trajectory["observation"]["exterior_image_1_left"],
|
||||
trajectory["observation"]["exterior_image_2_left"],
|
||||
)
|
||||
)
|
||||
trajectory["observation"]["proprio"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["cartesian_position"],
|
||||
trajectory["observation"]["gripper_position"],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def droid_wristact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
DROID dataset transformation for actions expressed in *wrist* frame of the robot.
|
||||
"""
|
||||
wrist_act = velocity_act_to_wrist_frame(
|
||||
trajectory["action_dict"]["cartesian_velocity"], trajectory["observation"]["cartesian_position"]
|
||||
)
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
wrist_act,
|
||||
trajectory["action_dict"]["gripper_position"],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["observation"]["exterior_image_1_left"], trajectory["observation"]["exterior_image_2_left"] = (
|
||||
rand_swap_exterior_images(
|
||||
trajectory["observation"]["exterior_image_1_left"],
|
||||
trajectory["observation"]["exterior_image_2_left"],
|
||||
)
|
||||
)
|
||||
trajectory["observation"]["proprio"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["cartesian_position"],
|
||||
trajectory["observation"]["gripper_position"],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def droid_finetuning_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
DROID dataset transformation for actions expressed in *base* frame of the robot.
|
||||
"""
|
||||
dt = trajectory["action_dict"]["cartesian_velocity"][:, :3]
|
||||
dr_ = trajectory["action_dict"]["cartesian_velocity"][:, 3:6]
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
dt,
|
||||
dr_,
|
||||
1 - trajectory["action_dict"]["gripper_position"],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["observation"]["proprio"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["cartesian_position"],
|
||||
trajectory["observation"]["gripper_position"],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def zero_action_filter(traj: Dict) -> bool:
|
||||
"""
|
||||
Filters transitions whose actions are all-0 (only relative actions, no gripper action).
|
||||
Note: this filter is applied *after* action normalization, so need to compare to "normalized 0".
|
||||
"""
|
||||
droid_q01 = tf.convert_to_tensor(
|
||||
[
|
||||
-0.7776297926902771,
|
||||
-0.5803514122962952,
|
||||
-0.5795090794563293,
|
||||
-0.6464047729969025,
|
||||
-0.7041108310222626,
|
||||
-0.8895104378461838,
|
||||
]
|
||||
)
|
||||
droid_q99 = tf.convert_to_tensor(
|
||||
[
|
||||
0.7597932070493698,
|
||||
0.5726242214441299,
|
||||
0.7351000607013702,
|
||||
0.6705610305070877,
|
||||
0.6464948207139969,
|
||||
0.8897542208433151,
|
||||
]
|
||||
)
|
||||
droid_norm_0_act = (
|
||||
2 * (tf.zeros_like(traj["action"][:, :6]) - droid_q01) / (droid_q99 - droid_q01 + 1e-8) - 1
|
||||
)
|
||||
|
||||
return tf.reduce_any(tf.math.abs(traj["action"][:, :6] - droid_norm_0_act) > 1e-5)
|
||||
859
lerobot/common/datasets/push_dataset_to_hub/openx/transforms.py
Normal file
859
lerobot/common/datasets/push_dataset_to_hub/openx/transforms.py
Normal file
@@ -0,0 +1,859 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
NOTE(YL): Adapted from:
|
||||
OpenVLA: https://github.com/openvla/openvla
|
||||
Octo: https://github.com/octo-models/octo
|
||||
|
||||
transforms.py
|
||||
|
||||
Defines a registry of per-dataset standardization transforms for each dataset in Open-X Embodiment.
|
||||
|
||||
Transforms adopt the following structure:
|
||||
Input: Dictionary of *batched* features (i.e., has leading time dimension)
|
||||
Output: Dictionary `step` =>> {
|
||||
"observation": {
|
||||
<image_keys, depth_image_keys>
|
||||
State (in chosen state representation)
|
||||
},
|
||||
"action": Action (in chosen action representation),
|
||||
"language_instruction": str
|
||||
}
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from lerobot.common.datasets.push_dataset_to_hub.openx.data_utils import (
|
||||
binarize_gripper_actions,
|
||||
invert_gripper_actions,
|
||||
rel2abs_gripper_actions,
|
||||
relabel_bridge_actions,
|
||||
)
|
||||
|
||||
|
||||
def droid_baseact_transform_fn():
|
||||
from lerobot.common.datasets.push_dataset_to_hub.openx.droid_utils import droid_baseact_transform
|
||||
|
||||
return droid_baseact_transform
|
||||
|
||||
|
||||
def bridge_openx_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Applies to version of Bridge V2 in Open X-Embodiment mixture.
|
||||
|
||||
Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it!
|
||||
"""
|
||||
for key in trajectory:
|
||||
if key == "traj_metadata":
|
||||
continue
|
||||
elif key in ["observation", "action"]:
|
||||
for key2 in trajectory[key]:
|
||||
trajectory[key][key2] = trajectory[key][key2][1:]
|
||||
else:
|
||||
trajectory[key] = trajectory[key][1:]
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
tf.cast(trajectory["action"]["open_gripper"][:, None], tf.float32),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
trajectory = relabel_bridge_actions(trajectory)
|
||||
trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
return trajectory
|
||||
|
||||
|
||||
def bridge_orig_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Applies to original version of Bridge V2 from the official project website.
|
||||
|
||||
Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it!
|
||||
"""
|
||||
for key in trajectory:
|
||||
if key == "traj_metadata":
|
||||
continue
|
||||
elif key == "observation":
|
||||
for key2 in trajectory[key]:
|
||||
trajectory[key][key2] = trajectory[key][key2][1:]
|
||||
else:
|
||||
trajectory[key] = trajectory[key][1:]
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
[
|
||||
trajectory["action"][:, :6],
|
||||
binarize_gripper_actions(trajectory["action"][:, -1])[:, None],
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
trajectory = relabel_bridge_actions(trajectory)
|
||||
trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
return trajectory
|
||||
|
||||
|
||||
def ppgm_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = tf.concat(
|
||||
[
|
||||
trajectory["action"][:, :6],
|
||||
binarize_gripper_actions(trajectory["action"][:, -1])[:, None],
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"][:, -1:]
|
||||
return trajectory
|
||||
|
||||
|
||||
def rt1_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# make gripper action absolute action, +1 = open, 0 = close
|
||||
gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0]
|
||||
gripper_action = rel2abs_gripper_actions(gripper_action)
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
gripper_action[:, None],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def kuka_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# make gripper action absolute action, +1 = open, 0 = close
|
||||
gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0]
|
||||
gripper_action = rel2abs_gripper_actions(gripper_action)
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
gripper_action[:, None],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
# decode compressed state
|
||||
eef_value = tf.io.decode_compressed(
|
||||
trajectory["observation"]["clip_function_input/base_pose_tool_reached"],
|
||||
compression_type="ZLIB",
|
||||
)
|
||||
eef_value = tf.io.decode_raw(eef_value, tf.float32)
|
||||
trajectory["observation"]["clip_function_input/base_pose_tool_reached"] = tf.reshape(eef_value, (-1, 7))
|
||||
gripper_value = tf.io.decode_compressed(
|
||||
trajectory["observation"]["gripper_closed"], compression_type="ZLIB"
|
||||
)
|
||||
gripper_value = tf.io.decode_raw(gripper_value, tf.float32)
|
||||
trajectory["observation"]["gripper_closed"] = tf.reshape(gripper_value, (-1, 1))
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def taco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["state_eef"] = trajectory["observation"]["robot_obs"][:, :6]
|
||||
trajectory["observation"]["state_gripper"] = trajectory["observation"]["robot_obs"][:, 7:8]
|
||||
trajectory["action"] = trajectory["action"]["rel_actions_world"]
|
||||
|
||||
# invert gripper action + clip, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :6],
|
||||
tf.clip_by_value(trajectory["action"][:, -1:], 0, 1),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def jaco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["state_eef"] = trajectory["observation"]["end_effector_cartesian_pos"][:, :6]
|
||||
trajectory["observation"]["state_gripper"] = trajectory["observation"]["end_effector_cartesian_pos"][
|
||||
:, -1:
|
||||
]
|
||||
|
||||
# make gripper action absolute action, +1 = open, 0 = close
|
||||
gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0]
|
||||
gripper_action = rel2abs_gripper_actions(gripper_action)
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
tf.zeros_like(trajectory["action"]["world_vector"]),
|
||||
gripper_action[:, None],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def berkeley_cable_routing_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
tf.zeros_like(trajectory["action"]["world_vector"][:, :1]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def roboturk_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# invert absolute gripper action, +1 = open, 0 = close
|
||||
gripper_action = invert_gripper_actions(
|
||||
tf.clip_by_value(trajectory["action"]["gripper_closedness_action"], 0, 1)
|
||||
)
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
gripper_action,
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
trajectory["language_embedding"] = trajectory["observation"]["natural_language_embedding"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def nyu_door_opening_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# make gripper action absolute action, +1 = open, 0 = close
|
||||
gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0]
|
||||
gripper_action = rel2abs_gripper_actions(gripper_action)
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
gripper_action[:, None],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def viola_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# make gripper action, +1 = open, 0 = close
|
||||
gripper_action = trajectory["action"]["gripper_closedness_action"][:, None]
|
||||
gripper_action = tf.clip_by_value(gripper_action, 0, 1)
|
||||
gripper_action = invert_gripper_actions(gripper_action)
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
gripper_action,
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def berkeley_autolab_ur5_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["state"] = trajectory["observation"]["robot_state"][:, 6:14]
|
||||
|
||||
# make gripper action absolute action, +1 = open, 0 = close
|
||||
gripper_action = trajectory["action"]["gripper_closedness_action"]
|
||||
gripper_action = rel2abs_gripper_actions(gripper_action)
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
gripper_action[:, None],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def toto_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
tf.cast(trajectory["action"]["open_gripper"][:, None], tf.float32),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def language_table_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# default to "open" gripper
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"],
|
||||
tf.zeros_like(trajectory["action"]),
|
||||
tf.zeros_like(trajectory["action"]),
|
||||
tf.ones_like(trajectory["action"][:, :1]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
|
||||
# decode language instruction
|
||||
instruction_bytes = trajectory["observation"]["instruction"]
|
||||
instruction_encoded = tf.strings.unicode_encode(instruction_bytes, output_encoding="UTF-8")
|
||||
# Remove trailing padding --> convert RaggedTensor to regular Tensor.
|
||||
trajectory["language_instruction"] = tf.strings.split(instruction_encoded, "\x00")[:, :1].to_tensor()[
|
||||
:, 0
|
||||
]
|
||||
return trajectory
|
||||
|
||||
|
||||
def pusht_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["world_vector"],
|
||||
trajectory["action"]["rotation_delta"],
|
||||
trajectory["action"]["gripper_closedness_action"][:, None],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def stanford_kuka_multimodal_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["depth_image"] = trajectory["observation"]["depth_image"][..., 0]
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :3],
|
||||
tf.zeros_like(trajectory["action"][:, :3]),
|
||||
trajectory["action"][:, -1:],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def nyu_rot_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][..., :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][..., -1:]
|
||||
trajectory["action"] = trajectory["action"][..., :7]
|
||||
return trajectory
|
||||
|
||||
|
||||
def stanford_hydra_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# invert gripper action, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :6],
|
||||
invert_gripper_actions(trajectory["action"][:, -1:]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
|
||||
trajectory["observation"]["eef_state"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["state"][:, :3],
|
||||
trajectory["observation"]["state"][:, 7:10],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -3:-2]
|
||||
return trajectory
|
||||
|
||||
|
||||
def austin_buds_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# invert gripper action + clip, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :6],
|
||||
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
|
||||
trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :8]
|
||||
return trajectory
|
||||
|
||||
|
||||
def nyu_franka_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["depth"] = tf.cast(trajectory["observation"]["depth"][..., 0], tf.float32)
|
||||
trajectory["observation"]["depth_additional_view"] = tf.cast(
|
||||
trajectory["observation"]["depth_additional_view"][..., 0], tf.float32
|
||||
)
|
||||
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, -6:]
|
||||
|
||||
# clip gripper action, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, -8:-2],
|
||||
tf.clip_by_value(trajectory["action"][:, -2:-1], 0, 1),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def maniskill_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][..., 7:8]
|
||||
return trajectory
|
||||
|
||||
|
||||
def furniture_bench_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
import tensorflow_graphics.geometry.transformation as tft
|
||||
|
||||
trajectory["observation"]["state"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["state"][:, :7],
|
||||
trajectory["observation"]["state"][:, -1:],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
|
||||
# invert gripper action + clip, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :3],
|
||||
tft.euler.from_quaternion(trajectory["action"][:, 3:7]),
|
||||
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def cmu_franka_exploration_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = trajectory["action"][..., :-1]
|
||||
return trajectory
|
||||
|
||||
|
||||
def ucsd_kitchen_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :7]
|
||||
trajectory["action"] = trajectory["action"][..., :-1]
|
||||
return trajectory
|
||||
|
||||
|
||||
def ucsd_pick_place_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :3],
|
||||
tf.zeros_like(trajectory["action"][:, :3]),
|
||||
trajectory["action"][:, -1:],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def austin_sailor_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# invert gripper action + clip, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :6],
|
||||
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def austin_sirius_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# invert gripper action + clip, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :6],
|
||||
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def bc_z_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"]["future/xyz_residual"][:, :3],
|
||||
trajectory["action"]["future/axis_angle_residual"][:, :3],
|
||||
invert_gripper_actions(tf.cast(trajectory["action"]["future/target_close"][:, :1], tf.float32)),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def tokyo_pr2_opening_fridge_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
trajectory["action"] = trajectory["action"][..., :-1]
|
||||
return trajectory
|
||||
|
||||
|
||||
def tokyo_pr2_tabletop_manipulation_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
trajectory["action"] = trajectory["action"][..., :-1]
|
||||
return trajectory
|
||||
|
||||
|
||||
def utokyo_xarm_bimanual_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = trajectory["action"][..., -7:]
|
||||
return trajectory
|
||||
|
||||
|
||||
def robo_net_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["state"][:, :4],
|
||||
tf.zeros_like(trajectory["observation"]["state"][:, :2]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :4],
|
||||
tf.zeros_like(trajectory["action"][:, :2]),
|
||||
trajectory["action"][:, -1:],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def berkeley_mvp_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
trajectory["observation"]["state"] = tf.concat((
|
||||
tf.cast(trajectory["observation"]["gripper"][:, None], tf.float32),
|
||||
trajectory["observation"]["pose"],
|
||||
trajectory["observation"]["joint_pos"],),
|
||||
axis=-1,)
|
||||
"""
|
||||
trajectory["observation"]["gripper"] = tf.cast(trajectory["observation"]["gripper"][:, None], tf.float32)
|
||||
return trajectory
|
||||
|
||||
|
||||
def berkeley_rpt_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["gripper"] = tf.cast(trajectory["observation"]["gripper"][:, None], tf.float32)
|
||||
return trajectory
|
||||
|
||||
|
||||
def kaist_nonprehensible_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["state"] = trajectory["observation"]["state"][:, -7:]
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :6],
|
||||
tf.zeros_like(trajectory["action"][:, :1]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def stanford_mask_vit_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["end_effector_pose"][:, :4],
|
||||
tf.zeros_like(trajectory["observation"]["end_effector_pose"][:, :2]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["end_effector_pose"][:, -1:]
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :4],
|
||||
tf.zeros_like(trajectory["action"][:, :2]),
|
||||
trajectory["action"][:, -1:],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def tokyo_lsmo_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
return trajectory
|
||||
|
||||
|
||||
def dlr_sara_grid_clamp_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :6]
|
||||
return trajectory
|
||||
|
||||
|
||||
def dlr_edan_shared_control_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# invert gripper action, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :6],
|
||||
invert_gripper_actions(trajectory["action"][:, -1:]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def asu_table_top_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = trajectory["ground_truth_states"]["EE"]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
return trajectory
|
||||
|
||||
|
||||
def robocook_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
return trajectory
|
||||
|
||||
|
||||
def imperial_wristcam_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = trajectory["action"][..., :-1]
|
||||
return trajectory
|
||||
|
||||
|
||||
def iamlab_pick_insert_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
import tensorflow_graphics.geometry.transformation as tft
|
||||
|
||||
trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :7]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, 7:8]
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :3],
|
||||
tft.euler.from_quaternion(trajectory["action"][:, 3:7]),
|
||||
trajectory["action"][:, 7:8],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def uiuc_d3field_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"],
|
||||
tf.zeros_like(trajectory["action"]),
|
||||
tf.zeros_like(trajectory["action"][:, :1]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def utaustin_mutex_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :8]
|
||||
|
||||
# invert gripper action + clip, +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :6],
|
||||
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def berkeley_fanuc_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :6]
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, 6:7]
|
||||
|
||||
# dataset does not store gripper actions, so use gripper state info, invert so +1 = open, 0 = close
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"],
|
||||
invert_gripper_actions(trajectory["observation"]["gripper_state"]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def cmu_playing_with_food_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
import tensorflow_graphics.geometry.transformation as tft
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :3],
|
||||
tft.euler.from_quaternion(trajectory["action"][:, 3:7]),
|
||||
trajectory["action"][:, -1:],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def playfusion_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :3],
|
||||
trajectory["action"][:, -4:],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def cmu_stretch_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["eef_state"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["state"][:, :3],
|
||||
tf.zeros_like(trajectory["observation"]["state"][:, :3]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
|
||||
trajectory["action"] = trajectory["action"][..., :-1]
|
||||
return trajectory
|
||||
|
||||
|
||||
def gnm_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
trajectory["observation"]["state"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["position"],
|
||||
tf.zeros_like(trajectory["observation"]["state"][:, :3]),
|
||||
trajectory["observation"]["yaw"],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"],
|
||||
tf.zeros_like(trajectory["action"]),
|
||||
tf.zeros_like(trajectory["action"]),
|
||||
tf.zeros_like(trajectory["action"][:, :1]),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def fmb_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# every input feature is batched, ie has leading batch dimension
|
||||
trajectory["observation"]["proprio"] = tf.concat(
|
||||
(
|
||||
trajectory["observation"]["eef_pose"],
|
||||
trajectory["observation"]["state_gripper_pose"][..., None],
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def dobbe_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# every input feature is batched, ie has leading batch dimension
|
||||
trajectory["observation"]["proprio"] = trajectory["observation"]["state"]
|
||||
return trajectory
|
||||
|
||||
|
||||
def robo_set_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# gripper action is in -1...1 --> clip to 0...1, flip
|
||||
gripper_action = trajectory["action"][:, -1:]
|
||||
gripper_action = invert_gripper_actions(tf.clip_by_value(gripper_action, 0, 1))
|
||||
|
||||
trajectory["action"] = tf.concat(
|
||||
(
|
||||
trajectory["action"][:, :7],
|
||||
gripper_action,
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
return trajectory
|
||||
|
||||
|
||||
def identity_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return trajectory
|
||||
|
||||
|
||||
# === Registry ===
|
||||
OPENX_STANDARDIZATION_TRANSFORMS = {
|
||||
"bridge_openx": bridge_openx_dataset_transform,
|
||||
"bridge_orig": bridge_orig_dataset_transform,
|
||||
"bridge_dataset": bridge_orig_dataset_transform,
|
||||
"ppgm": ppgm_dataset_transform,
|
||||
"ppgm_static": ppgm_dataset_transform,
|
||||
"ppgm_wrist": ppgm_dataset_transform,
|
||||
"fractal20220817_data": rt1_dataset_transform,
|
||||
"kuka": kuka_dataset_transform,
|
||||
"taco_play": taco_play_dataset_transform,
|
||||
"jaco_play": jaco_play_dataset_transform,
|
||||
"berkeley_cable_routing": berkeley_cable_routing_dataset_transform,
|
||||
"roboturk": roboturk_dataset_transform,
|
||||
"nyu_door_opening_surprising_effectiveness": nyu_door_opening_dataset_transform,
|
||||
"viola": viola_dataset_transform,
|
||||
"berkeley_autolab_ur5": berkeley_autolab_ur5_dataset_transform,
|
||||
"toto": toto_dataset_transform,
|
||||
"language_table": language_table_dataset_transform,
|
||||
"columbia_cairlab_pusht_real": pusht_dataset_transform,
|
||||
"stanford_kuka_multimodal_dataset_converted_externally_to_rlds": stanford_kuka_multimodal_dataset_transform,
|
||||
"nyu_rot_dataset_converted_externally_to_rlds": nyu_rot_dataset_transform,
|
||||
"stanford_hydra_dataset_converted_externally_to_rlds": stanford_hydra_dataset_transform,
|
||||
"austin_buds_dataset_converted_externally_to_rlds": austin_buds_dataset_transform,
|
||||
"nyu_franka_play_dataset_converted_externally_to_rlds": nyu_franka_play_dataset_transform,
|
||||
"maniskill_dataset_converted_externally_to_rlds": maniskill_dataset_transform,
|
||||
"furniture_bench_dataset_converted_externally_to_rlds": furniture_bench_dataset_transform,
|
||||
"cmu_franka_exploration_dataset_converted_externally_to_rlds": cmu_franka_exploration_dataset_transform,
|
||||
"ucsd_kitchen_dataset_converted_externally_to_rlds": ucsd_kitchen_dataset_transform,
|
||||
"ucsd_pick_and_place_dataset_converted_externally_to_rlds": ucsd_pick_place_dataset_transform,
|
||||
"austin_sailor_dataset_converted_externally_to_rlds": austin_sailor_dataset_transform,
|
||||
"austin_sirius_dataset_converted_externally_to_rlds": austin_sirius_dataset_transform,
|
||||
"bc_z": bc_z_dataset_transform,
|
||||
"utokyo_pr2_opening_fridge_converted_externally_to_rlds": tokyo_pr2_opening_fridge_dataset_transform,
|
||||
"utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": tokyo_pr2_tabletop_manipulation_dataset_transform,
|
||||
"utokyo_xarm_pick_and_place_converted_externally_to_rlds": identity_transform,
|
||||
"utokyo_xarm_bimanual_converted_externally_to_rlds": utokyo_xarm_bimanual_dataset_transform,
|
||||
"robo_net": robo_net_dataset_transform,
|
||||
"berkeley_mvp_converted_externally_to_rlds": berkeley_mvp_dataset_transform,
|
||||
"berkeley_rpt_converted_externally_to_rlds": berkeley_rpt_dataset_transform,
|
||||
"kaist_nonprehensile_converted_externally_to_rlds": kaist_nonprehensible_dataset_transform,
|
||||
"stanford_mask_vit_converted_externally_to_rlds": stanford_mask_vit_dataset_transform,
|
||||
"tokyo_u_lsmo_converted_externally_to_rlds": tokyo_lsmo_dataset_transform,
|
||||
"dlr_sara_pour_converted_externally_to_rlds": identity_transform,
|
||||
"dlr_sara_grid_clamp_converted_externally_to_rlds": dlr_sara_grid_clamp_dataset_transform,
|
||||
"dlr_edan_shared_control_converted_externally_to_rlds": dlr_edan_shared_control_dataset_transform,
|
||||
"asu_table_top_converted_externally_to_rlds": asu_table_top_dataset_transform,
|
||||
"stanford_robocook_converted_externally_to_rlds": robocook_dataset_transform,
|
||||
"imperialcollege_sawyer_wrist_cam": imperial_wristcam_dataset_transform,
|
||||
"iamlab_cmu_pickup_insert_converted_externally_to_rlds": iamlab_pick_insert_dataset_transform,
|
||||
"uiuc_d3field": uiuc_d3field_dataset_transform,
|
||||
"utaustin_mutex": utaustin_mutex_dataset_transform,
|
||||
"berkeley_fanuc_manipulation": berkeley_fanuc_dataset_transform,
|
||||
"cmu_playing_with_food": cmu_playing_with_food_dataset_transform,
|
||||
"cmu_play_fusion": playfusion_dataset_transform,
|
||||
"cmu_stretch": cmu_stretch_dataset_transform,
|
||||
"berkeley_gnm_recon": gnm_dataset_transform,
|
||||
"berkeley_gnm_cory_hall": gnm_dataset_transform,
|
||||
"berkeley_gnm_sac_son": gnm_dataset_transform,
|
||||
"droid": droid_baseact_transform_fn(),
|
||||
"droid_100": droid_baseact_transform_fn(), # first 100 episodes of droid
|
||||
"fmb": fmb_transform,
|
||||
"dobbe": dobbe_dataset_transform,
|
||||
"robo_set": robo_set_dataset_transform,
|
||||
"usc_cloth_sim_converted_externally_to_rlds": identity_transform,
|
||||
"plex_robosuite": identity_transform,
|
||||
"conq_hose_manipulation": identity_transform,
|
||||
"io_ai_tech": identity_transform,
|
||||
"spoc": identity_transform,
|
||||
}
|
||||
@@ -14,16 +14,13 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
For all datasets in the RLDS format.
|
||||
For https://github.com/google-deepmind/open_x_embodiment (OPENX) datasets.
|
||||
|
||||
NOTE: You need to install tensorflow and tensorflow_datsets before running this script.
|
||||
|
||||
Example:
|
||||
python lerobot/scripts/push_dataset_to_hub.py \
|
||||
--raw-dir /path/to/data/bridge_dataset/1.0.0/ \
|
||||
--repo-id your_hub/sampled_bridge_data_v2 \
|
||||
--raw-format rlds \
|
||||
--raw-dir /hdd/tensorflow_datasets/bridge_dataset/1.0.0/ \
|
||||
--repo-id youliangtan/sampled_bridge_data_v2 \
|
||||
--raw-format openx_rlds.bridge_orig \
|
||||
--episodes 3 4 5 8 9
|
||||
|
||||
Exact dataset fps defined in openx/config.py, obtained from:
|
||||
@@ -38,10 +35,12 @@ import tensorflow as tf
|
||||
import tensorflow_datasets as tfds
|
||||
import torch
|
||||
import tqdm
|
||||
import yaml
|
||||
from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||
from lerobot.common.datasets.push_dataset_to_hub.openx.transforms import OPENX_STANDARDIZATION_TRANSFORMS
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import (
|
||||
calculate_episode_data_index,
|
||||
concatenate_episodes,
|
||||
@@ -53,6 +52,11 @@ from lerobot.common.datasets.utils import (
|
||||
)
|
||||
from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames
|
||||
|
||||
with open("lerobot/common/datasets/push_dataset_to_hub/openx/configs.yaml") as f:
|
||||
_openx_list = yaml.safe_load(f)
|
||||
|
||||
OPENX_DATASET_CONFIGS = _openx_list["OPENX_DATASET_CONFIGS"]
|
||||
|
||||
np.set_printoptions(precision=2)
|
||||
|
||||
|
||||
@@ -104,6 +108,7 @@ def load_from_raw(
|
||||
video: bool,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
openx_dataset_name: str | None = None,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
@@ -131,17 +136,16 @@ def load_from_raw(
|
||||
# we will apply the standardization transform if the dataset_name is provided
|
||||
# if the dataset name is not provided and the goal is to convert any rlds formatted dataset
|
||||
# search for 'image' keys in the observations
|
||||
image_keys = []
|
||||
state_keys = []
|
||||
observation_info = dataset_info.features["steps"]["observation"]
|
||||
for key in observation_info:
|
||||
# check whether the key is for an image or a vector observation
|
||||
if len(observation_info[key].shape) == 3:
|
||||
# only adding uint8 images discards depth images
|
||||
if observation_info[key].dtype == tf.uint8:
|
||||
image_keys.append(key)
|
||||
else:
|
||||
state_keys.append(key)
|
||||
if openx_dataset_name is not None:
|
||||
print(" - applying standardization transform for dataset: ", openx_dataset_name)
|
||||
assert openx_dataset_name in OPENX_STANDARDIZATION_TRANSFORMS
|
||||
transform_fn = OPENX_STANDARDIZATION_TRANSFORMS[openx_dataset_name]
|
||||
dataset = dataset.map(transform_fn)
|
||||
|
||||
image_keys = OPENX_DATASET_CONFIGS[openx_dataset_name]["image_obs_keys"]
|
||||
else:
|
||||
obs_keys = dataset_info.features["steps"]["observation"].keys()
|
||||
image_keys = [key for key in obs_keys if "image" in key]
|
||||
|
||||
lang_key = "language_instruction" if "language_instruction" in dataset.element_spec else None
|
||||
|
||||
@@ -189,31 +193,50 @@ def load_from_raw(
|
||||
|
||||
num_frames = episode["action"].shape[0]
|
||||
|
||||
ep_dict = {}
|
||||
for key in state_keys:
|
||||
ep_dict[f"observation.{key}"] = tf_to_torch(episode["observation"][key])
|
||||
###########################################################
|
||||
# Handle the episodic data
|
||||
|
||||
ep_dict["action"] = tf_to_torch(episode["action"])
|
||||
ep_dict["next.reward"] = tf_to_torch(episode["reward"]).float()
|
||||
ep_dict["next.done"] = tf_to_torch(episode["is_last"])
|
||||
ep_dict["is_terminal"] = tf_to_torch(episode["is_terminal"])
|
||||
ep_dict["is_first"] = tf_to_torch(episode["is_first"])
|
||||
ep_dict["discount"] = tf_to_torch(episode["discount"])
|
||||
# last step of demonstration is considered done
|
||||
done = torch.zeros(num_frames, dtype=torch.bool)
|
||||
done[-1] = True
|
||||
ep_dict = {}
|
||||
langs = [] # TODO: might be located in "observation"
|
||||
|
||||
image_array_dict = {key: [] for key in image_keys}
|
||||
|
||||
# We will create the state observation tensor by stacking the state
|
||||
# obs keys defined in the openx/configs.py
|
||||
if openx_dataset_name is not None:
|
||||
state_obs_keys = OPENX_DATASET_CONFIGS[openx_dataset_name]["state_obs_keys"]
|
||||
# stack the state observations, if is None, pad with zeros
|
||||
states = []
|
||||
for key in state_obs_keys:
|
||||
if key in episode["observation"]:
|
||||
states.append(tf_to_torch(episode["observation"][key]))
|
||||
else:
|
||||
states.append(torch.zeros(num_frames, 1)) # pad with zeros
|
||||
states = torch.cat(states, dim=1)
|
||||
# assert states.shape == (num_frames, 8), f"states shape: {states.shape}"
|
||||
else:
|
||||
states = tf_to_torch(episode["observation"]["state"])
|
||||
|
||||
actions = tf_to_torch(episode["action"])
|
||||
rewards = tf_to_torch(episode["reward"]).float()
|
||||
|
||||
# If lang_key is present, convert the entire tensor at once
|
||||
if lang_key is not None:
|
||||
ep_dict["language_instruction"] = [x.numpy().decode("utf-8") for x in episode[lang_key]]
|
||||
|
||||
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
|
||||
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames)
|
||||
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
|
||||
|
||||
image_array_dict = {key: [] for key in image_keys}
|
||||
langs = [str(x) for x in episode[lang_key]]
|
||||
|
||||
for im_key in image_keys:
|
||||
imgs = episode["observation"][im_key]
|
||||
image_array_dict[im_key] = [tf_img_convert(img) for img in imgs]
|
||||
|
||||
# simple assertions
|
||||
for item in [states, actions, rewards, done]:
|
||||
assert len(item) == num_frames
|
||||
|
||||
###########################################################
|
||||
|
||||
# loop through all cameras
|
||||
for im_key in image_keys:
|
||||
img_key = f"observation.images.{im_key}"
|
||||
@@ -239,6 +262,17 @@ def load_from_raw(
|
||||
else:
|
||||
ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array]
|
||||
|
||||
if lang_key is not None:
|
||||
ep_dict["language_instruction"] = langs
|
||||
|
||||
ep_dict["observation.state"] = states
|
||||
ep_dict["action"] = actions
|
||||
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
|
||||
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames)
|
||||
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
|
||||
ep_dict["next.reward"] = rewards
|
||||
ep_dict["next.done"] = done
|
||||
|
||||
path_ep_dict = tmp_ep_dicts_dir.joinpath(
|
||||
"ep_dict_" + "0" * (10 - len(str(ep_idx))) + str(ep_idx) + ".pt"
|
||||
)
|
||||
@@ -256,28 +290,30 @@ def load_from_raw(
|
||||
def to_hf_dataset(data_dict, video) -> Dataset:
|
||||
features = {}
|
||||
|
||||
for key in data_dict:
|
||||
# check if vector state obs
|
||||
if key.startswith("observation.") and "observation.images." not in key:
|
||||
features[key] = Sequence(length=data_dict[key].shape[1], feature=Value(dtype="float32", id=None))
|
||||
# check if image obs
|
||||
elif "observation.images." in key:
|
||||
if video:
|
||||
features[key] = VideoFrame()
|
||||
else:
|
||||
features[key] = Image()
|
||||
keys = [key for key in data_dict if "observation.images." in key]
|
||||
for key in keys:
|
||||
if video:
|
||||
features[key] = VideoFrame()
|
||||
else:
|
||||
features[key] = Image()
|
||||
|
||||
features["observation.state"] = Sequence(
|
||||
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
)
|
||||
if "observation.velocity" in data_dict:
|
||||
features["observation.velocity"] = Sequence(
|
||||
length=data_dict["observation.velocity"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
)
|
||||
if "observation.effort" in data_dict:
|
||||
features["observation.effort"] = Sequence(
|
||||
length=data_dict["observation.effort"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
)
|
||||
if "language_instruction" in data_dict:
|
||||
features["language_instruction"] = Value(dtype="string", id=None)
|
||||
|
||||
features["action"] = Sequence(
|
||||
length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
)
|
||||
|
||||
features["is_terminal"] = Value(dtype="bool", id=None)
|
||||
features["is_first"] = Value(dtype="bool", id=None)
|
||||
features["discount"] = Value(dtype="float32", id=None)
|
||||
|
||||
features["episode_index"] = Value(dtype="int64", id=None)
|
||||
features["frame_index"] = Value(dtype="int64", id=None)
|
||||
features["timestamp"] = Value(dtype="float32", id=None)
|
||||
@@ -297,8 +333,19 @@ def from_raw_to_lerobot_format(
|
||||
video: bool = True,
|
||||
episodes: list[int] | None = None,
|
||||
encoding: dict | None = None,
|
||||
openx_dataset_name: str | None = None,
|
||||
):
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding)
|
||||
"""This is a test impl for rlds conversion"""
|
||||
if openx_dataset_name is None:
|
||||
# set a default rlds frame rate if the dataset is not from openx
|
||||
fps = 30
|
||||
elif "fps" not in OPENX_DATASET_CONFIGS[openx_dataset_name]:
|
||||
raise ValueError(
|
||||
"fps for this dataset is not specified in openx/configs.py yet," "means it is not yet tested"
|
||||
)
|
||||
fps = OPENX_DATASET_CONFIGS[openx_dataset_name]["fps"]
|
||||
|
||||
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding, openx_dataset_name)
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import importlib.resources
|
||||
import json
|
||||
import logging
|
||||
import textwrap
|
||||
@@ -477,8 +476,6 @@ def create_lerobot_dataset_card(
|
||||
Note: If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses.
|
||||
"""
|
||||
card_tags = ["LeRobot"]
|
||||
card_template_path = importlib.resources.path("lerobot.common.datasets", "card_template.md")
|
||||
|
||||
if tags:
|
||||
card_tags += tags
|
||||
if dataset_info:
|
||||
@@ -496,9 +493,8 @@ def create_lerobot_dataset_card(
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
return DatasetCard.from_template(
|
||||
card_data=card_data,
|
||||
template_path=str(card_template_path),
|
||||
template_path="./lerobot/common/datasets/card_template.md",
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -25,7 +25,6 @@ from glob import glob
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import wandb
|
||||
from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
|
||||
from omegaconf import DictConfig, OmegaConf
|
||||
from termcolor import colored
|
||||
@@ -108,6 +107,8 @@ class Logger:
|
||||
self._wandb = None
|
||||
else:
|
||||
os.environ["WANDB_SILENT"] = "true"
|
||||
import wandb
|
||||
|
||||
wandb_run_id = None
|
||||
if cfg.resume:
|
||||
wandb_run_id = get_wandb_run_id_from_filesystem(self.checkpoints_dir)
|
||||
@@ -231,7 +232,7 @@ class Logger:
|
||||
# TODO(alexander-soare): Add local text log.
|
||||
if self._wandb is not None:
|
||||
for k, v in d.items():
|
||||
if not isinstance(v, (int, float, str, wandb.Table)):
|
||||
if not isinstance(v, (int, float, str)):
|
||||
logging.warning(
|
||||
f'WandB logging of key "{k}" was ignored as its type is not handled by this wrapper.'
|
||||
)
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
from dataclasses import asdict, dataclass
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClassifierConfig:
|
||||
"""Configuration for the Classifier model."""
|
||||
|
||||
num_classes: int = 2
|
||||
hidden_dim: int = 256
|
||||
dropout_rate: float = 0.1
|
||||
model_name: str = "microsoft/resnet-50"
|
||||
device: str = "cuda" if torch.cuda.is_available() else "mps"
|
||||
model_type: str = "cnn" # "transformer" or "cnn"
|
||||
|
||||
def save_pretrained(self, save_dir):
|
||||
"""Save config to json file."""
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
# Convert to dict and save as JSON
|
||||
config_dict = asdict(self)
|
||||
with open(os.path.join(save_dir, "config.json"), "w") as f:
|
||||
json.dump(config_dict, f, indent=2)
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, pretrained_model_name_or_path):
|
||||
"""Load config from json file."""
|
||||
config_file = os.path.join(pretrained_model_name_or_path, "config.json")
|
||||
|
||||
with open(config_file) as f:
|
||||
config_dict = json.load(f)
|
||||
|
||||
return cls(**config_dict)
|
||||
@@ -1,134 +0,0 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from huggingface_hub import PyTorchModelHubMixin
|
||||
from torch import Tensor, nn
|
||||
from transformers import AutoImageProcessor, AutoModel
|
||||
|
||||
from .configuration_classifier import ClassifierConfig
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ClassifierOutput:
|
||||
"""Wrapper for classifier outputs with additional metadata."""
|
||||
|
||||
def __init__(
|
||||
self, logits: Tensor, probabilities: Optional[Tensor] = None, hidden_states: Optional[Tensor] = None
|
||||
):
|
||||
self.logits = logits
|
||||
self.probabilities = probabilities
|
||||
self.hidden_states = hidden_states
|
||||
|
||||
|
||||
class Classifier(
|
||||
nn.Module,
|
||||
PyTorchModelHubMixin,
|
||||
# Add Hub metadata
|
||||
library_name="lerobot",
|
||||
repo_url="https://github.com/huggingface/lerobot",
|
||||
tags=["robotics", "vision-classifier"],
|
||||
):
|
||||
"""Image classifier built on top of a pre-trained encoder."""
|
||||
|
||||
# Add name attribute for factory
|
||||
name = "classifier"
|
||||
|
||||
def __init__(self, config: ClassifierConfig):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.processor = AutoImageProcessor.from_pretrained(self.config.model_name, trust_remote_code=True)
|
||||
encoder = AutoModel.from_pretrained(self.config.model_name, trust_remote_code=True)
|
||||
# Extract vision model if we're given a multimodal model
|
||||
if hasattr(encoder, "vision_model"):
|
||||
logging.info("Multimodal model detected - using vision encoder only")
|
||||
self.encoder = encoder.vision_model
|
||||
self.vision_config = encoder.config.vision_config
|
||||
else:
|
||||
self.encoder = encoder
|
||||
self.vision_config = getattr(encoder, "config", None)
|
||||
|
||||
# Model type from config
|
||||
self.is_cnn = self.config.model_type == "cnn"
|
||||
|
||||
# For CNNs, initialize backbone
|
||||
if self.is_cnn:
|
||||
self._setup_cnn_backbone()
|
||||
|
||||
self._freeze_encoder()
|
||||
self._build_classifier_head()
|
||||
|
||||
def _setup_cnn_backbone(self):
|
||||
"""Set up CNN encoder"""
|
||||
if hasattr(self.encoder, "fc"):
|
||||
self.feature_dim = self.encoder.fc.in_features
|
||||
self.encoder = nn.Sequential(*list(self.encoder.children())[:-1])
|
||||
elif hasattr(self.encoder.config, "hidden_sizes"):
|
||||
self.feature_dim = self.encoder.config.hidden_sizes[-1] # Last channel dimension
|
||||
else:
|
||||
raise ValueError("Unsupported CNN architecture")
|
||||
|
||||
def _freeze_encoder(self) -> None:
|
||||
"""Freeze the encoder parameters."""
|
||||
for param in self.encoder.parameters():
|
||||
param.requires_grad = False
|
||||
|
||||
def _build_classifier_head(self) -> None:
|
||||
"""Initialize the classifier head architecture."""
|
||||
# Get input dimension based on model type
|
||||
if self.is_cnn:
|
||||
input_dim = self.feature_dim
|
||||
else: # Transformer models
|
||||
if hasattr(self.encoder.config, "hidden_size"):
|
||||
input_dim = self.encoder.config.hidden_size
|
||||
else:
|
||||
raise ValueError("Unsupported transformer architecture since hidden_size is not found")
|
||||
|
||||
self.classifier_head = nn.Sequential(
|
||||
nn.Linear(input_dim, self.config.hidden_dim),
|
||||
nn.Dropout(self.config.dropout_rate),
|
||||
nn.LayerNorm(self.config.hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(self.config.hidden_dim, 1 if self.config.num_classes == 2 else self.config.num_classes),
|
||||
)
|
||||
|
||||
def _get_encoder_output(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Extract the appropriate output from the encoder."""
|
||||
# Process images with the processor (handles resizing and normalization)
|
||||
processed = self.processor(
|
||||
images=x, # LeRobotDataset already provides proper tensor format
|
||||
return_tensors="pt",
|
||||
)
|
||||
processed = processed["pixel_values"].to(x.device)
|
||||
|
||||
with torch.no_grad():
|
||||
if self.is_cnn:
|
||||
# The HF ResNet applies pooling internally
|
||||
outputs = self.encoder(processed)
|
||||
# Get pooled output directly
|
||||
features = outputs.pooler_output
|
||||
|
||||
if features.dim() > 2:
|
||||
features = features.squeeze(-1).squeeze(-1)
|
||||
return features
|
||||
else: # Transformer models
|
||||
outputs = self.encoder(processed)
|
||||
if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
|
||||
return outputs.pooler_output
|
||||
return outputs.last_hidden_state[:, 0, :]
|
||||
|
||||
def forward(self, x: torch.Tensor) -> ClassifierOutput:
|
||||
"""Forward pass of the classifier."""
|
||||
# For training, we expect input to be a tensor directly from LeRobotDataset
|
||||
encoder_output = self._get_encoder_output(x)
|
||||
logits = self.classifier_head(encoder_output)
|
||||
|
||||
if self.config.num_classes == 2:
|
||||
logits = logits.squeeze(-1)
|
||||
probabilities = torch.sigmoid(logits)
|
||||
else:
|
||||
probabilities = torch.softmax(logits, dim=-1)
|
||||
|
||||
return ClassifierOutput(logits=logits, probabilities=probabilities, hidden_states=encoder_output)
|
||||
@@ -15,7 +15,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -15,10 +15,11 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F # noqa: N812
|
||||
from huggingface_hub import PyTorchModelHubMixin
|
||||
|
||||
|
||||
class HILSerlPolicy(
|
||||
nn.Module,
|
||||
PyTorchModelHubMixin,
|
||||
|
||||
@@ -15,25 +15,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class SACConfig:
|
||||
discount = 0.99
|
||||
temperature_init = 1.0
|
||||
num_critics = 2
|
||||
critic_lr = 3e-4
|
||||
actor_lr = 3e-4
|
||||
critic_network_kwargs = {
|
||||
"hidden_dims": [256, 256],
|
||||
"activate_final": True,
|
||||
}
|
||||
actor_network_kwargs = {
|
||||
"hidden_dims": [256, 256],
|
||||
"activate_final": True,
|
||||
}
|
||||
policy_kwargs = {
|
||||
"tanh_squash_distribution": True,
|
||||
"std_parameterization": "uniform",
|
||||
}
|
||||
discount = 0.99
|
||||
@@ -15,11 +15,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# TODO: (1) better device management
|
||||
|
||||
from collections import deque
|
||||
from copy import deepcopy
|
||||
from functools import partial
|
||||
|
||||
import einops
|
||||
|
||||
@@ -31,10 +27,6 @@ from torch import Tensor
|
||||
from huggingface_hub import PyTorchModelHubMixin
|
||||
from lerobot.common.policies.normalize import Normalize, Unnormalize
|
||||
from lerobot.common.policies.sac.configuration_sac import SACConfig
|
||||
import numpy as np
|
||||
from typing import Callable, Optional, Tuple, Sequence
|
||||
|
||||
|
||||
|
||||
class SACPolicy(
|
||||
nn.Module,
|
||||
@@ -66,27 +58,12 @@ class SACPolicy(
|
||||
self.unnormalize_outputs = Unnormalize(
|
||||
config.output_shapes, config.output_normalization_modes, dataset_stats
|
||||
)
|
||||
encoder = SACObservationEncoder(config)
|
||||
# Define networks
|
||||
critic_nets = []
|
||||
for _ in range(config.num_critics):
|
||||
critic_net = Critic(
|
||||
encoder=encoder,
|
||||
network=MLP(**config.critic_network_kwargs)
|
||||
)
|
||||
critic_nets.append(critic_net)
|
||||
|
||||
self.critic_ensemble = create_critic_ensemble(critic_nets, config.num_critics)
|
||||
self.critic_target = deepcopy(self.critic_ensemble)
|
||||
self.critic_ensemble = ...
|
||||
self.critic_target = ...
|
||||
self.actor_network = ...
|
||||
|
||||
self.actor_network = Policy(
|
||||
encoder=encoder,
|
||||
network=MLP(**config.actor_network_kwargs),
|
||||
action_dim=config.output_shapes["action"][0],
|
||||
**config.policy_kwargs
|
||||
)
|
||||
|
||||
self.temperature = LagrangeMultiplier(init_value=config.temperature_init)
|
||||
self.temperature = ...
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
@@ -112,18 +89,12 @@ class SACPolicy(
|
||||
|
||||
Returns a dictionary with loss as a tensor, and other information as native floats.
|
||||
"""
|
||||
batch = self.normalize_inputs(batch)
|
||||
# batch shape is (b, 2, ...) where index 1 returns the current observation and
|
||||
# the next observation for caluculating the right td index.
|
||||
actions = batch["action"][:, 0]
|
||||
rewards = batch["next.reward"][:, 0]
|
||||
observations = {}
|
||||
next_observations = {}
|
||||
for k in batch:
|
||||
if k.startswith("observation."):
|
||||
observations[k] = batch[k][:, 0]
|
||||
next_observations[k] = batch[k][:, 1]
|
||||
|
||||
observation_batch =
|
||||
next_obaservation_batch =
|
||||
action_batch =
|
||||
reward_batch =
|
||||
dones_batch =
|
||||
|
||||
# perform image augmentation
|
||||
|
||||
# reward bias
|
||||
@@ -133,51 +104,34 @@ class SACPolicy(
|
||||
|
||||
# calculate critics loss
|
||||
# 1- compute actions from policy
|
||||
action_preds, log_probs = self.actor_network(observations)
|
||||
next_actions = ..
|
||||
# 2- compute q targets
|
||||
q_targets = self.target_qs(next_observations, action_preds)
|
||||
q_targets = self.target_qs(next_obaservation_batch, next_actions)
|
||||
|
||||
# critics subsample size
|
||||
min_q = q_targets.min(dim=0)
|
||||
|
||||
# backup entropy
|
||||
td_target = rewards + self.discount * min_q
|
||||
td_target = reward_batch + self.discount * min_q
|
||||
|
||||
# 3- compute predicted qs
|
||||
q_preds = self.critic_ensemble(observations, actions)
|
||||
q_preds = self.critic_ensemble(observation_batch, action_batch)
|
||||
|
||||
# 4- Calculate loss
|
||||
# Compute state-action value loss (TD loss) for all of the Q functions in the ensemble.
|
||||
critics_loss = (
|
||||
F.mse_loss(
|
||||
q_preds,
|
||||
einops.repeat(td_target, "t b -> e t b", e=q_preds.shape[0]),
|
||||
reduction="none",
|
||||
).sum(0) # sum over ensemble
|
||||
# `q_preds_ensemble` depends on the first observation and the actions.
|
||||
* ~batch["observation.state_is_pad"][0]
|
||||
* ~batch["action_is_pad"]
|
||||
# q_targets depends on the reward and the next observations.
|
||||
* ~batch["next.reward_is_pad"]
|
||||
* ~batch["observation.state_is_pad"][1:]
|
||||
).sum(0).mean()
|
||||
|
||||
critics_loss = F.mse_loss(q_preds,
|
||||
einops.repeat(td_target, "b -> e b", e=q_preds.shape[0])) # dones masks
|
||||
|
||||
# calculate actors loss
|
||||
# 1- temperature
|
||||
temperature = self.temperature()
|
||||
|
||||
# 2- get actions (batch_size, action_dim) and log probs (batch_size,)
|
||||
actions, log_probs = self.actor_network(observations) \
|
||||
actions, log_probs = self.actor_network(observation_batch)
|
||||
|
||||
# 3- get q-value predictions
|
||||
with torch.no_grad():
|
||||
q_preds = self.critic_ensemble(observations, actions, return_type="mean")
|
||||
actor_loss = (
|
||||
-(q_preds - temperature * log_probs).mean()
|
||||
* ~batch["observation.state_is_pad"][0]
|
||||
* ~batch["action_is_pad"]
|
||||
).mean()
|
||||
|
||||
q_preds = self.critic_ensemble(observation_batch, actions, return_type="mean")
|
||||
actor_loss = -(q_preds - temperature * log_probs).mean()
|
||||
|
||||
# calculate temperature loss
|
||||
# 1- calculate entropy
|
||||
@@ -187,8 +141,8 @@ class SACPolicy(
|
||||
loss = critics_loss + actor_loss + temperature_loss
|
||||
|
||||
return {
|
||||
"critics_loss": critics_loss.item(),
|
||||
"actor_loss": actor_loss.item(),
|
||||
"Q_value_loss": critics_loss.item(),
|
||||
"pi_loss": actor_loss.item(),
|
||||
"temperature_loss": temperature_loss.item(),
|
||||
"temperature": temperature.item(),
|
||||
"entropy": entropy.item(),
|
||||
@@ -200,484 +154,3 @@ class SACPolicy(
|
||||
self.critic_target.lerp_(self.critic_ensemble, self.config.critic_target_update_weight)
|
||||
#for target_param, param in zip(self.critic_target.parameters(), self.critic_ensemble.parameters()):
|
||||
# target_param.data.copy_(target_param.data * (1.0 - self.config.critic_target_update_weight) + param.data * self.critic_target_update_weight)
|
||||
|
||||
|
||||
class MLP(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
config: SACConfig,
|
||||
activations: Callable[[torch.Tensor], torch.Tensor] | str = nn.SiLU(),
|
||||
activate_final: bool = False,
|
||||
dropout_rate: Optional[float] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.activate_final = config.activate_final
|
||||
layers = []
|
||||
|
||||
for i, size in enumerate(config.network_hidden_dims):
|
||||
layers.append(nn.Linear(config.network_hidden_dims[i-1] if i > 0 else config.network_hidden_dims[0], size))
|
||||
|
||||
if i + 1 < len(config.network_hidden_dims) or activate_final:
|
||||
if dropout_rate is not None and dropout_rate > 0:
|
||||
layers.append(nn.Dropout(p=dropout_rate))
|
||||
layers.append(nn.LayerNorm(size))
|
||||
layers.append(activations if isinstance(activations, nn.Module) else getattr(nn, activations)())
|
||||
|
||||
self.net = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x: torch.Tensor, train: bool = False) -> torch.Tensor:
|
||||
# in training mode or not. TODO: find better way to do this
|
||||
self.train(train)
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class Critic(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
encoder: Optional[nn.Module],
|
||||
network: nn.Module,
|
||||
init_final: Optional[float] = None,
|
||||
activate_final: bool = False,
|
||||
device: str = "cuda"
|
||||
):
|
||||
super().__init__()
|
||||
self.device = torch.device(device)
|
||||
self.encoder = encoder
|
||||
self.network = network
|
||||
self.init_final = init_final
|
||||
self.activate_final = activate_final
|
||||
|
||||
# Output layer
|
||||
if init_final is not None:
|
||||
if self.activate_final:
|
||||
self.output_layer = nn.Linear(network.net[-3].out_features, 1)
|
||||
else:
|
||||
self.output_layer = nn.Linear(network.net[-2].out_features, 1)
|
||||
nn.init.uniform_(self.output_layer.weight, -init_final, init_final)
|
||||
nn.init.uniform_(self.output_layer.bias, -init_final, init_final)
|
||||
else:
|
||||
if self.activate_final:
|
||||
self.output_layer = nn.Linear(network.net[-3].out_features, 1)
|
||||
else:
|
||||
self.output_layer = nn.Linear(network.net[-2].out_features, 1)
|
||||
orthogonal_init()(self.output_layer.weight)
|
||||
|
||||
self.to(self.device)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
observations: torch.Tensor,
|
||||
actions: torch.Tensor,
|
||||
train: bool = False
|
||||
) -> torch.Tensor:
|
||||
self.train(train)
|
||||
|
||||
observations = observations.to(self.device)
|
||||
actions = actions.to(self.device)
|
||||
|
||||
if self.encoder is not None:
|
||||
obs_enc = self.encoder(observations)
|
||||
else:
|
||||
obs_enc = observations
|
||||
|
||||
inputs = torch.cat([obs_enc, actions], dim=-1)
|
||||
x = self.network(inputs)
|
||||
value = self.output_layer(x)
|
||||
return value.squeeze(-1)
|
||||
|
||||
def q_value_ensemble(
|
||||
self,
|
||||
observations: torch.Tensor,
|
||||
actions: torch.Tensor,
|
||||
train: bool = False
|
||||
) -> torch.Tensor:
|
||||
observations = observations.to(self.device)
|
||||
actions = actions.to(self.device)
|
||||
|
||||
if len(actions.shape) == 3: # [batch_size, num_actions, action_dim]
|
||||
batch_size, num_actions = actions.shape[:2]
|
||||
obs_expanded = observations.unsqueeze(1).expand(-1, num_actions, -1)
|
||||
obs_flat = obs_expanded.reshape(-1, observations.shape[-1])
|
||||
actions_flat = actions.reshape(-1, actions.shape[-1])
|
||||
q_values = self(obs_flat, actions_flat, train)
|
||||
return q_values.reshape(batch_size, num_actions)
|
||||
else:
|
||||
return self(observations, actions, train)
|
||||
|
||||
|
||||
class Policy(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
encoder: Optional[nn.Module],
|
||||
network: nn.Module,
|
||||
action_dim: int,
|
||||
std_parameterization: str = "exp",
|
||||
std_min: float = 1e-5,
|
||||
std_max: float = 10.0,
|
||||
tanh_squash_distribution: bool = False,
|
||||
fixed_std: Optional[torch.Tensor] = None,
|
||||
init_final: Optional[float] = None,
|
||||
activate_final: bool = False,
|
||||
device: str = "cuda"
|
||||
):
|
||||
super().__init__()
|
||||
self.device = torch.device(device)
|
||||
self.encoder = encoder
|
||||
self.network = network
|
||||
self.action_dim = action_dim
|
||||
self.std_parameterization = std_parameterization
|
||||
self.std_min = std_min
|
||||
self.std_max = std_max
|
||||
self.tanh_squash_distribution = tanh_squash_distribution
|
||||
self.fixed_std = fixed_std.to(self.device) if fixed_std is not None else None
|
||||
self.activate_final = activate_final
|
||||
|
||||
# Mean layer
|
||||
if self.activate_final:
|
||||
self.mean_layer = nn.Linear(network.net[-3].out_features, action_dim)
|
||||
else:
|
||||
self.mean_layer = nn.Linear(network.net[-2].out_features, action_dim)
|
||||
if init_final is not None:
|
||||
nn.init.uniform_(self.mean_layer.weight, -init_final, init_final)
|
||||
nn.init.uniform_(self.mean_layer.bias, -init_final, init_final)
|
||||
else:
|
||||
orthogonal_init()(self.mean_layer.weight)
|
||||
|
||||
# Standard deviation layer or parameter
|
||||
if fixed_std is None:
|
||||
if std_parameterization == "uniform":
|
||||
self.log_stds = nn.Parameter(torch.zeros(action_dim, device=self.device))
|
||||
else:
|
||||
if self.activate_final:
|
||||
self.std_layer = nn.Linear(network.net[-3].out_features, action_dim)
|
||||
else:
|
||||
self.std_layer = nn.Linear(network.net[-2].out_features, action_dim)
|
||||
if init_final is not None:
|
||||
nn.init.uniform_(self.std_layer.weight, -init_final, init_final)
|
||||
nn.init.uniform_(self.std_layer.bias, -init_final, init_final)
|
||||
else:
|
||||
orthogonal_init()(self.std_layer.weight)
|
||||
|
||||
self.to(self.device)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
observations: torch.Tensor,
|
||||
temperature: float = 1.0,
|
||||
train: bool = False,
|
||||
non_squash_distribution: bool = False
|
||||
) -> torch.distributions.Distribution:
|
||||
self.train(train)
|
||||
|
||||
# Encode observations if encoder exists
|
||||
if self.encoder is not None:
|
||||
with torch.set_grad_enabled(train):
|
||||
obs_enc = self.encoder(observations, train=train)
|
||||
else:
|
||||
obs_enc = observations
|
||||
# Get network outputs
|
||||
outputs = self.network(obs_enc)
|
||||
means = self.mean_layer(outputs)
|
||||
|
||||
# Compute standard deviations
|
||||
if self.fixed_std is None:
|
||||
if self.std_parameterization == "exp":
|
||||
log_stds = self.std_layer(outputs)
|
||||
stds = torch.exp(log_stds)
|
||||
elif self.std_parameterization == "softplus":
|
||||
stds = torch.nn.functional.softplus(self.std_layer(outputs))
|
||||
elif self.std_parameterization == "uniform":
|
||||
stds = torch.exp(self.log_stds).expand_as(means)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid std_parameterization: {self.std_parameterization}"
|
||||
)
|
||||
else:
|
||||
assert self.std_parameterization == "fixed"
|
||||
stds = self.fixed_std.expand_as(means)
|
||||
|
||||
# Clip standard deviations and scale with temperature
|
||||
temperature = torch.tensor(temperature, device=self.device)
|
||||
stds = torch.clamp(stds, self.std_min, self.std_max) * torch.sqrt(temperature)
|
||||
|
||||
# Create distribution
|
||||
if self.tanh_squash_distribution and not non_squash_distribution:
|
||||
distribution = TanhMultivariateNormalDiag(
|
||||
loc=means,
|
||||
scale_diag=stds,
|
||||
)
|
||||
else:
|
||||
distribution = torch.distributions.Normal(
|
||||
loc=means,
|
||||
scale=stds,
|
||||
)
|
||||
|
||||
return distribution
|
||||
|
||||
def get_features(self, observations: torch.Tensor) -> torch.Tensor:
|
||||
"""Get encoded features from observations"""
|
||||
observations = observations.to(self.device)
|
||||
if self.encoder is not None:
|
||||
with torch.no_grad():
|
||||
return self.encoder(observations, train=False)
|
||||
return observations
|
||||
|
||||
|
||||
class SACObservationEncoder(nn.Module):
|
||||
"""Encode image and/or state vector observations.
|
||||
TODO(ke-wang): The original work allows for (1) stacking multiple history frames and (2) using pretrained resnet encoders.
|
||||
"""
|
||||
|
||||
def __init__(self, config: SACConfig):
|
||||
"""
|
||||
Creates encoders for pixel and/or state modalities.
|
||||
"""
|
||||
super().__init__()
|
||||
self.config = config
|
||||
|
||||
if "observation.image" in config.input_shapes:
|
||||
self.image_enc_layers = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
config.input_shapes["observation.image"][0], config.image_encoder_hidden_dim, 7, stride=2
|
||||
),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(config.image_encoder_hidden_dim, config.image_encoder_hidden_dim, 5, stride=2),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(config.image_encoder_hidden_dim, config.image_encoder_hidden_dim, 3, stride=2),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(config.image_encoder_hidden_dim, config.image_encoder_hidden_dim, 3, stride=2),
|
||||
nn.ReLU(),
|
||||
)
|
||||
dummy_batch = torch.zeros(1, *config.input_shapes["observation.image"])
|
||||
with torch.inference_mode():
|
||||
out_shape = self.image_enc_layers(dummy_batch).shape[1:]
|
||||
self.image_enc_layers.extend(
|
||||
nn.Sequential(
|
||||
nn.Flatten(),
|
||||
nn.Linear(np.prod(out_shape), config.latent_dim),
|
||||
nn.LayerNorm(config.latent_dim),
|
||||
nn.Tanh(),
|
||||
)
|
||||
)
|
||||
if "observation.state" in config.input_shapes:
|
||||
self.state_enc_layers = nn.Sequential(
|
||||
nn.Linear(config.input_shapes["observation.state"][0], config.state_encoder_hidden_dim),
|
||||
nn.ELU(),
|
||||
nn.Linear(config.state_encoder_hidden_dim, config.latent_dim),
|
||||
nn.LayerNorm(config.latent_dim),
|
||||
nn.Tanh(),
|
||||
)
|
||||
if "observation.environment_state" in config.input_shapes:
|
||||
self.env_state_enc_layers = nn.Sequential(
|
||||
nn.Linear(
|
||||
config.input_shapes["observation.environment_state"][0], config.state_encoder_hidden_dim
|
||||
),
|
||||
nn.ELU(),
|
||||
nn.Linear(config.state_encoder_hidden_dim, config.latent_dim),
|
||||
nn.LayerNorm(config.latent_dim),
|
||||
nn.Tanh(),
|
||||
)
|
||||
|
||||
def forward(self, obs_dict: dict[str, Tensor]) -> Tensor:
|
||||
"""Encode the image and/or state vector.
|
||||
|
||||
Each modality is encoded into a feature vector of size (latent_dim,) and then a uniform mean is taken
|
||||
over all features.
|
||||
"""
|
||||
feat = []
|
||||
# Concatenate all images along the channel dimension.
|
||||
image_keys = [k for k in self.config.input_shapes if k.startswith("observation.image")]
|
||||
for image_key in image_keys:
|
||||
feat.append(flatten_forward_unflatten(self.image_enc_layers, obs_dict[image_key]))
|
||||
if "observation.environment_state" in self.config.input_shapes:
|
||||
feat.append(self.env_state_enc_layers(obs_dict["observation.environment_state"]))
|
||||
if "observation.state" in self.config.input_shapes:
|
||||
feat.append(self.state_enc_layers(obs_dict["observation.state"]))
|
||||
return torch.stack(feat, dim=0).mean(0)
|
||||
|
||||
|
||||
class LagrangeMultiplier(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
init_value: float = 1.0,
|
||||
constraint_shape: Sequence[int] = (),
|
||||
device: str = "cuda"
|
||||
):
|
||||
super().__init__()
|
||||
self.device = torch.device(device)
|
||||
init_value = torch.log(torch.exp(torch.tensor(init_value, device=self.device)) - 1)
|
||||
|
||||
# Initialize the Lagrange multiplier as a parameter
|
||||
self.lagrange = nn.Parameter(
|
||||
torch.full(constraint_shape, init_value, dtype=torch.float32, device=self.device)
|
||||
)
|
||||
|
||||
self.to(self.device)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
lhs: Optional[torch.Tensor] = None,
|
||||
rhs: Optional[torch.Tensor] = None
|
||||
) -> torch.Tensor:
|
||||
# Get the multiplier value based on parameterization
|
||||
multiplier = torch.nn.functional.softplus(self.lagrange)
|
||||
|
||||
# Return the raw multiplier if no constraint values provided
|
||||
if lhs is None:
|
||||
return multiplier
|
||||
|
||||
# Move inputs to device
|
||||
lhs = lhs.to(self.device)
|
||||
if rhs is not None:
|
||||
rhs = rhs.to(self.device)
|
||||
|
||||
# Use the multiplier to compute the Lagrange penalty
|
||||
if rhs is None:
|
||||
rhs = torch.zeros_like(lhs, device=self.device)
|
||||
|
||||
diff = lhs - rhs
|
||||
|
||||
assert diff.shape == multiplier.shape, f"Shape mismatch: {diff.shape} vs {multiplier.shape}"
|
||||
|
||||
return multiplier * diff
|
||||
|
||||
|
||||
# The TanhMultivariateNormalDiag is a probability distribution that represents a transformed normal (Gaussian) distribution where:
|
||||
# 1. The base distribution is a diagonal multivariate normal distribution
|
||||
# 2. The samples from this normal distribution are transformed through a tanh function, which squashes the values to be between -1 and 1
|
||||
# 3. Optionally, the values can be further transformed to fit within arbitrary bounds [low, high] using an affine transformation
|
||||
# This type of distribution is commonly used in reinforcement learning, particularly for continuous action spaces
|
||||
class TanhMultivariateNormalDiag(torch.distributions.TransformedDistribution):
|
||||
def __init__(
|
||||
self,
|
||||
loc: torch.Tensor,
|
||||
scale_diag: torch.Tensor,
|
||||
low: Optional[torch.Tensor] = None,
|
||||
high: Optional[torch.Tensor] = None,
|
||||
):
|
||||
# Create base normal distribution
|
||||
base_distribution = torch.distributions.Normal(loc=loc, scale=scale_diag)
|
||||
|
||||
# Create list of transforms
|
||||
transforms = []
|
||||
|
||||
# Add tanh transform
|
||||
transforms.append(torch.distributions.transforms.TanhTransform())
|
||||
|
||||
# Add rescaling transform if bounds are provided
|
||||
if low is not None and high is not None:
|
||||
transforms.append(
|
||||
torch.distributions.transforms.AffineTransform(
|
||||
loc=(high + low) / 2,
|
||||
scale=(high - low) / 2
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize parent class
|
||||
super().__init__(
|
||||
base_distribution=base_distribution,
|
||||
transforms=transforms
|
||||
)
|
||||
|
||||
# Store parameters
|
||||
self.loc = loc
|
||||
self.scale_diag = scale_diag
|
||||
self.low = low
|
||||
self.high = high
|
||||
|
||||
def mode(self) -> torch.Tensor:
|
||||
"""Get the mode of the transformed distribution"""
|
||||
# The mode of a normal distribution is its mean
|
||||
mode = self.loc
|
||||
|
||||
# Apply transforms
|
||||
for transform in self.transforms:
|
||||
mode = transform(mode)
|
||||
|
||||
return mode
|
||||
|
||||
def rsample(self, sample_shape=torch.Size()) -> torch.Tensor:
|
||||
"""
|
||||
Reparameterized sample from the distribution
|
||||
"""
|
||||
# Sample from base distribution
|
||||
x = self.base_dist.rsample(sample_shape)
|
||||
|
||||
# Apply transforms
|
||||
for transform in self.transforms:
|
||||
x = transform(x)
|
||||
|
||||
return x
|
||||
|
||||
def log_prob(self, value: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Compute log probability of a value
|
||||
Includes the log det jacobian for the transforms
|
||||
"""
|
||||
# Initialize log prob
|
||||
log_prob = torch.zeros_like(value[..., 0])
|
||||
|
||||
# Inverse transforms to get back to normal distribution
|
||||
q = value
|
||||
for transform in reversed(self.transforms):
|
||||
q = transform.inv(q)
|
||||
log_prob = log_prob - transform.log_abs_det_jacobian(q, transform(q))
|
||||
|
||||
# Add base distribution log prob
|
||||
log_prob = log_prob + self.base_dist.log_prob(q).sum(-1)
|
||||
|
||||
return log_prob
|
||||
|
||||
def sample_and_log_prob(self, sample_shape=torch.Size()) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Sample from the distribution and compute log probability
|
||||
"""
|
||||
x = self.rsample(sample_shape)
|
||||
log_prob = self.log_prob(x)
|
||||
return x, log_prob
|
||||
|
||||
def entropy(self) -> torch.Tensor:
|
||||
"""
|
||||
Compute entropy of the distribution
|
||||
"""
|
||||
# Start with base distribution entropy
|
||||
entropy = self.base_dist.entropy().sum(-1)
|
||||
|
||||
# Add log det jacobian for each transform
|
||||
x = self.rsample()
|
||||
for transform in self.transforms:
|
||||
entropy = entropy + transform.log_abs_det_jacobian(x, transform(x))
|
||||
x = transform(x)
|
||||
|
||||
return entropy
|
||||
|
||||
|
||||
def create_critic_ensemble(critic_class, num_critics: int, device: str = "cuda") -> nn.ModuleList:
|
||||
"""Creates an ensemble of critic networks"""
|
||||
critics = nn.ModuleList([critic_class() for _ in range(num_critics)])
|
||||
return critics.to(device)
|
||||
|
||||
|
||||
def orthogonal_init():
|
||||
return lambda x: torch.nn.init.orthogonal_(x, gain=1.0)
|
||||
|
||||
|
||||
# borrowed from tdmpc
|
||||
def flatten_forward_unflatten(fn: Callable[[Tensor], Tensor], image_tensor: Tensor) -> Tensor:
|
||||
"""Helper to temporarily flatten extra dims at the start of the image tensor.
|
||||
|
||||
Args:
|
||||
fn: Callable that the image tensor will be passed to. It should accept (B, C, H, W) and return
|
||||
(B, *), where * is any number of dimensions.
|
||||
image_tensor: An image tensor of shape (**, C, H, W), where ** is any number of dimensions and
|
||||
can be more than 1 dimensions, generally different from *.
|
||||
Returns:
|
||||
A return value from the callable reshaped to (**, *).
|
||||
"""
|
||||
if image_tensor.ndim == 4:
|
||||
return fn(image_tensor)
|
||||
start_dims = image_tensor.shape[:-3]
|
||||
inp = torch.flatten(image_tensor, end_dim=-4)
|
||||
flat_out = fn(inp)
|
||||
return torch.reshape(flat_out, (*start_dims, *flat_out.shape[1:]))
|
||||
|
||||
|
||||
@@ -120,22 +120,14 @@ def predict_action(observation, policy, device, use_amp):
|
||||
return action
|
||||
|
||||
|
||||
def init_keyboard_listener(assign_rewards=False):
|
||||
"""
|
||||
Initializes a keyboard listener to enable early termination of an episode
|
||||
or environment reset by pressing the right arrow key ('->'). This may require
|
||||
sudo permissions to allow the terminal to monitor keyboard events.
|
||||
|
||||
Args:
|
||||
assign_rewards (bool): If True, allows annotating the collected trajectory
|
||||
with a binary reward at the end of the episode to indicate success.
|
||||
"""
|
||||
def init_keyboard_listener():
|
||||
# Allow to exit early while recording an episode or resetting the environment,
|
||||
# by tapping the right arrow key '->'. This might require a sudo permission
|
||||
# to allow your terminal to monitor keyboard events.
|
||||
events = {}
|
||||
events["exit_early"] = False
|
||||
events["rerecord_episode"] = False
|
||||
events["stop_recording"] = False
|
||||
if assign_rewards:
|
||||
events["next.reward"] = 0
|
||||
|
||||
if is_headless():
|
||||
logging.warning(
|
||||
@@ -160,13 +152,6 @@ def init_keyboard_listener(assign_rewards=False):
|
||||
print("Escape key pressed. Stopping data recording...")
|
||||
events["stop_recording"] = True
|
||||
events["exit_early"] = True
|
||||
elif assign_rewards and key == keyboard.Key.space:
|
||||
events["next.reward"] = 1 if events["next.reward"] == 0 else 0
|
||||
print(
|
||||
"Space key pressed. Assigning new reward to the subsequent frames. New reward:",
|
||||
events["next.reward"],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error handling key press: {e}")
|
||||
|
||||
@@ -199,7 +184,7 @@ def init_policy(pretrained_policy_name_or_path, policy_overrides):
|
||||
def warmup_record(
|
||||
robot,
|
||||
events,
|
||||
enable_teleoperation,
|
||||
enable_teloperation,
|
||||
warmup_time_s,
|
||||
display_cameras,
|
||||
fps,
|
||||
@@ -210,7 +195,7 @@ def warmup_record(
|
||||
display_cameras=display_cameras,
|
||||
events=events,
|
||||
fps=fps,
|
||||
teleoperate=enable_teleoperation,
|
||||
teleoperate=enable_teloperation,
|
||||
)
|
||||
|
||||
|
||||
@@ -287,8 +272,6 @@ def control_loop(
|
||||
|
||||
if dataset is not None:
|
||||
frame = {**observation, **action}
|
||||
if "next.reward" in events:
|
||||
frame["next.reward"] = events["next.reward"]
|
||||
dataset.add_frame(frame)
|
||||
|
||||
if display_cameras and not is_headless():
|
||||
@@ -318,8 +301,6 @@ def reset_environment(robot, events, reset_time_s):
|
||||
|
||||
timestamp = 0
|
||||
start_vencod_t = time.perf_counter()
|
||||
if "next.reward" in events:
|
||||
events["next.reward"] = 0
|
||||
|
||||
# Wait if necessary
|
||||
with tqdm.tqdm(total=reset_time_s, desc="Waiting") as pbar:
|
||||
@@ -372,7 +353,7 @@ def sanity_check_dataset_robot_compatibility(
|
||||
|
||||
mismatches = []
|
||||
for field, dataset_value, present_value in fields:
|
||||
diff = DeepDiff(dataset_value, present_value, exclude_regex_paths=[r".*\['info'\]$"])
|
||||
diff = DeepDiff(dataset_value, present_value)
|
||||
if diff:
|
||||
mismatches.append(f"{field}: expected {present_value}, got {dataset_value}")
|
||||
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
# @package _global_
|
||||
|
||||
defaults:
|
||||
- _self_
|
||||
|
||||
seed: 13
|
||||
dataset_repo_id: "dataset_repo_id"
|
||||
train_split_proportion: 0.8
|
||||
|
||||
# Required by logger
|
||||
env:
|
||||
name: "classifier"
|
||||
task: "binary_classification"
|
||||
|
||||
|
||||
training:
|
||||
num_epochs: 5
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
num_workers: 4
|
||||
grad_clip_norm: 10
|
||||
use_amp: true
|
||||
log_freq: 1
|
||||
eval_freq: 1 # How often to run validation (in epochs)
|
||||
save_freq: 1 # How often to save checkpoints (in epochs)
|
||||
save_checkpoint: true
|
||||
image_key: "observation.images.phone"
|
||||
label_key: "next.reward"
|
||||
|
||||
eval:
|
||||
batch_size: 16
|
||||
num_samples_to_log: 30 # Number of validation samples to log in the table
|
||||
|
||||
policy:
|
||||
name: "hilserl/classifier"
|
||||
model_name: "facebook/convnext-base-224"
|
||||
model_type: "cnn"
|
||||
|
||||
wandb:
|
||||
enable: false
|
||||
project: "classifier-training"
|
||||
entity: "wandb_entity"
|
||||
job_name: "classifier_training_0"
|
||||
disable_artifact: false
|
||||
|
||||
device: "mps"
|
||||
resume: false
|
||||
output_dir: "output"
|
||||
@@ -10,7 +10,7 @@ max_relative_target: null
|
||||
leader_arms:
|
||||
main:
|
||||
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
|
||||
port: /dev/tty.usbmodem58760430441
|
||||
port: /dev/tty.usbmodem575E0031751
|
||||
motors:
|
||||
# name: (index, model)
|
||||
shoulder_pan: [1, "xl330-m077"]
|
||||
@@ -23,7 +23,7 @@ leader_arms:
|
||||
follower_arms:
|
||||
main:
|
||||
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
|
||||
port: /dev/tty.usbmodem585A0083391
|
||||
port: /dev/tty.usbmodem575E0032081
|
||||
motors:
|
||||
# name: (index, model)
|
||||
shoulder_pan: [1, "xl430-w250"]
|
||||
|
||||
@@ -18,7 +18,7 @@ max_relative_target: null
|
||||
leader_arms:
|
||||
main:
|
||||
_target_: lerobot.common.robot_devices.motors.feetech.FeetechMotorsBus
|
||||
port: /dev/tty.usbmodem58760433331
|
||||
port: /dev/tty.usbmodem585A0077581
|
||||
motors:
|
||||
# name: (index, model)
|
||||
shoulder_pan: [1, "sts3215"]
|
||||
|
||||
@@ -29,6 +29,7 @@ python lerobot/scripts/control_robot.py teleoperate \
|
||||
```bash
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--fps 30 \
|
||||
--root tmp/data \
|
||||
--repo-id $USER/koch_test \
|
||||
--num-episodes 1 \
|
||||
--run-compute-stats 0
|
||||
@@ -37,6 +38,7 @@ python lerobot/scripts/control_robot.py record \
|
||||
- Visualize dataset:
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset.py \
|
||||
--root tmp/data \
|
||||
--repo-id $USER/koch_test \
|
||||
--episode-index 0
|
||||
```
|
||||
@@ -45,6 +47,7 @@ python lerobot/scripts/visualize_dataset.py \
|
||||
```bash
|
||||
python lerobot/scripts/control_robot.py replay \
|
||||
--fps 30 \
|
||||
--root tmp/data \
|
||||
--repo-id $USER/koch_test \
|
||||
--episode 0
|
||||
```
|
||||
@@ -54,6 +57,7 @@ python lerobot/scripts/control_robot.py replay \
|
||||
```bash
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id $USER/koch_pick_place_lego \
|
||||
--num-episodes 50 \
|
||||
--warmup-time-s 2 \
|
||||
@@ -68,12 +72,12 @@ python lerobot/scripts/control_robot.py record \
|
||||
- Tap escape key 'esc' to stop the data recording.
|
||||
This might require a sudo permission to allow your terminal to monitor keyboard events.
|
||||
|
||||
**NOTE**: You can resume/continue data recording by running the same data recording command and adding `--resume 1`.
|
||||
If the dataset you want to extend is not on the hub, you also need to add `--local-files-only 1`.
|
||||
**NOTE**: You can resume/continue data recording by running the same data recording command twice.
|
||||
To avoid resuming by deleting the dataset, use `--force-override 1`.
|
||||
|
||||
- Train on this dataset with the ACT policy:
|
||||
```bash
|
||||
python lerobot/scripts/train.py \
|
||||
DATA_DIR=data python lerobot/scripts/train.py \
|
||||
policy=act_koch_real \
|
||||
env=koch_real \
|
||||
dataset_repo_id=$USER/koch_pick_place_lego \
|
||||
@@ -84,6 +88,7 @@ python lerobot/scripts/train.py \
|
||||
```bash
|
||||
python lerobot/scripts/control_robot.py record \
|
||||
--fps 30 \
|
||||
--root data \
|
||||
--repo-id $USER/eval_act_koch_real \
|
||||
--num-episodes 10 \
|
||||
--warmup-time-s 2 \
|
||||
@@ -186,12 +191,11 @@ def teleoperate(
|
||||
@safe_disconnect
|
||||
def record(
|
||||
robot: Robot,
|
||||
root: Path,
|
||||
root: str,
|
||||
repo_id: str,
|
||||
single_task: str,
|
||||
pretrained_policy_name_or_path: str | None = None,
|
||||
policy_overrides: List[str] | None = None,
|
||||
assign_rewards: bool = False,
|
||||
fps: int | None = None,
|
||||
warmup_time_s: int | float = 2,
|
||||
episode_time_s: int | float = 10,
|
||||
@@ -200,7 +204,6 @@ def record(
|
||||
video: bool = True,
|
||||
run_compute_stats: bool = True,
|
||||
push_to_hub: bool = True,
|
||||
tags: list[str] | None = None,
|
||||
num_image_writer_processes: int = 0,
|
||||
num_image_writer_threads_per_camera: int = 4,
|
||||
display_cameras: bool = True,
|
||||
@@ -215,9 +218,6 @@ def record(
|
||||
policy = None
|
||||
device = None
|
||||
use_amp = None
|
||||
extra_features = (
|
||||
{"next.reward": {"dtype": "int64", "shape": (1,), "names": None}} if assign_rewards else None
|
||||
)
|
||||
|
||||
if single_task:
|
||||
task = single_task
|
||||
@@ -258,12 +258,12 @@ def record(
|
||||
use_videos=video,
|
||||
image_writer_processes=num_image_writer_processes,
|
||||
image_writer_threads=num_image_writer_threads_per_camera * len(robot.cameras),
|
||||
features=extra_features,
|
||||
)
|
||||
|
||||
if not robot.is_connected:
|
||||
robot.connect()
|
||||
listener, events = init_keyboard_listener(assign_rewards=assign_rewards)
|
||||
|
||||
listener, events = init_keyboard_listener()
|
||||
|
||||
# Execute a few seconds without recording to:
|
||||
# 1. teleoperate the robot to move it in starting position if no policy provided,
|
||||
@@ -331,7 +331,7 @@ def record(
|
||||
dataset.consolidate(run_compute_stats)
|
||||
|
||||
if push_to_hub:
|
||||
dataset.push_to_hub(tags=tags)
|
||||
dataset.push_to_hub()
|
||||
|
||||
log_say("Exiting", play_sounds)
|
||||
return dataset
|
||||
@@ -345,7 +345,7 @@ def replay(
|
||||
episode: int,
|
||||
fps: int | None = None,
|
||||
play_sounds: bool = True,
|
||||
local_files_only: bool = False,
|
||||
local_files_only: bool = True,
|
||||
):
|
||||
# TODO(rcadene, aliberts): refactor with control_loop, once `dataset` is an instance of LeRobotDataset
|
||||
# TODO(rcadene): Add option to record logs
|
||||
@@ -427,8 +427,8 @@ if __name__ == "__main__":
|
||||
parser_record.add_argument(
|
||||
"--root",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Root directory where the dataset will be stored (e.g. 'dataset/path').",
|
||||
default="data",
|
||||
help="Root directory where the dataset will be stored locally at '{root}/{repo_id}' (e.g. 'data/hf_username/dataset_name').",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--repo-id",
|
||||
@@ -436,12 +436,6 @@ if __name__ == "__main__":
|
||||
default="lerobot/test",
|
||||
help="Dataset identifier. By convention it should match '{hf_username}/{dataset_name}' (e.g. `lerobot/test`).",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--local-files-only",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Use local files only. By default, this script will try to fetch the dataset from the hub if it exists.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--warmup-time-s",
|
||||
type=int,
|
||||
@@ -473,12 +467,12 @@ if __name__ == "__main__":
|
||||
default=1,
|
||||
help="Upload dataset to Hugging Face hub.",
|
||||
)
|
||||
# parser_record.add_argument(
|
||||
# "--tags",
|
||||
# type=str,
|
||||
# nargs="*",
|
||||
# help="Add tags to your dataset on the hub.",
|
||||
# )
|
||||
parser_record.add_argument(
|
||||
"--tags",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help="Add tags to your dataset on the hub.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--num-image-writer-processes",
|
||||
type=int,
|
||||
@@ -501,10 +495,10 @@ if __name__ == "__main__":
|
||||
),
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--resume",
|
||||
"--force-override",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Resume recording on an existing dataset.",
|
||||
help="By default, data recording is resumed. When set to 1, delete the local directory and start data recording from scratch.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"-p",
|
||||
@@ -521,12 +515,6 @@ if __name__ == "__main__":
|
||||
nargs="*",
|
||||
help="Any key=value arguments to override config values (use dots for.nested=overrides)",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--assign-rewards",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Enables the assignation of rewards to frames (by default no assignation). When enabled, assign a 0 reward to frames until the space bar is pressed which assign a 1 reward. Press the space bar a second time to assign a 0 reward. The reward assigned is reset to 0 when the episode ends.",
|
||||
)
|
||||
|
||||
parser_replay = subparsers.add_parser("replay", parents=[base_parser])
|
||||
parser_replay.add_argument(
|
||||
@@ -535,8 +523,8 @@ if __name__ == "__main__":
|
||||
parser_replay.add_argument(
|
||||
"--root",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Root directory where the dataset will be stored (e.g. 'dataset/path').",
|
||||
default="data",
|
||||
help="Root directory where the dataset will be stored locally at '{root}/{repo_id}' (e.g. 'data/hf_username/dataset_name').",
|
||||
)
|
||||
parser_replay.add_argument(
|
||||
"--repo-id",
|
||||
@@ -544,12 +532,6 @@ if __name__ == "__main__":
|
||||
default="lerobot/test",
|
||||
help="Dataset identifier. By convention it should match '{hf_username}/{dataset_name}' (e.g. `lerobot/test`).",
|
||||
)
|
||||
parser_replay.add_argument(
|
||||
"--local-files-only",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Use local files only. By default, this script will try to fetch the dataset from the hub if it exists.",
|
||||
)
|
||||
parser_replay.add_argument("--episode", type=int, default=0, help="Index of the episode to replay.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -1,546 +0,0 @@
|
||||
"""
|
||||
Utilities to control a robot in simulation.
|
||||
|
||||
Useful to record a dataset, replay a recorded episode and record an evaluation dataset.
|
||||
|
||||
Examples of usage:
|
||||
|
||||
|
||||
- Unlimited teleoperation at a limited frequency of 30 Hz, to simulate data recording frequency.
|
||||
You can modify this value depending on how fast your simulation can run:
|
||||
```bash
|
||||
python lerobot/scripts/control_robot.py teleoperate \
|
||||
--fps 30 \
|
||||
--robot-path lerobot/configs/robot/your_robot_config.yaml \
|
||||
--sim-config lerobot/configs/env/your_sim_config.yaml
|
||||
```
|
||||
|
||||
- Record one episode in order to test replay:
|
||||
```bash
|
||||
python lerobot/scripts/control_sim_robot.py record \
|
||||
--robot-path lerobot/configs/robot/your_robot_config.yaml \
|
||||
--sim-config lerobot/configs/env/your_sim_config.yaml \
|
||||
--fps 30 \
|
||||
--repo-id $USER/robot_sim_test \
|
||||
--num-episodes 1 \
|
||||
--run-compute-stats 0
|
||||
```
|
||||
|
||||
Enable the --push-to-hub 1 to push the recorded dataset to the huggingface hub.
|
||||
|
||||
- Visualize dataset:
|
||||
```bash
|
||||
python lerobot/scripts/visualize_dataset.py \
|
||||
--repo-id $USER/robot_sim_test \
|
||||
--episode-index 0
|
||||
```
|
||||
|
||||
- Replay a sequence of test episodes:
|
||||
```bash
|
||||
python lerobot/scripts/control_sim_robot.py replay \
|
||||
--robot-path lerobot/configs/robot/your_robot_config.yaml \
|
||||
--sim-config lerobot/configs/env/your_sim_config.yaml \
|
||||
--fps 30 \
|
||||
--repo-id $USER/robot_sim_test \
|
||||
--episode 0
|
||||
```
|
||||
Note: The seed is saved, therefore, during replay we can load the same environment state as the one during collection.
|
||||
|
||||
- Record a full dataset in order to train a policy,
|
||||
30 seconds of recording for each episode, and 10 seconds to reset the environment in between episodes:
|
||||
```bash
|
||||
python lerobot/scripts/control_sim_robot.py record \
|
||||
--robot-path lerobot/configs/robot/your_robot_config.yaml \
|
||||
--sim-config lerobot/configs/env/your_sim_config.yaml \
|
||||
--fps 30 \
|
||||
--repo-id $USER/robot_sim_test \
|
||||
--num-episodes 50 \
|
||||
--episode-time-s 30 \
|
||||
```
|
||||
|
||||
**NOTE**: You can use your keyboard to control data recording flow.
|
||||
- Tap right arrow key '->' to early exit while recording an episode and go to reseting the environment.
|
||||
- Tap right arrow key '->' to early exit while reseting the environment and got to recording the next episode.
|
||||
- Tap left arrow key '<-' to early exit and re-record the current episode.
|
||||
- Tap escape key 'esc' to stop the data recording.
|
||||
This might require a sudo permission to allow your terminal to monitor keyboard events.
|
||||
|
||||
**NOTE**: You can resume/continue data recording by running the same data recording command twice.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import importlib
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import gymnasium as gym
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.robot_devices.control_utils import (
|
||||
init_keyboard_listener,
|
||||
init_policy,
|
||||
is_headless,
|
||||
log_control_info,
|
||||
predict_action,
|
||||
sanity_check_dataset_name,
|
||||
sanity_check_dataset_robot_compatibility,
|
||||
stop_recording,
|
||||
)
|
||||
from lerobot.common.robot_devices.robots.factory import make_robot
|
||||
from lerobot.common.robot_devices.robots.utils import Robot
|
||||
from lerobot.common.robot_devices.utils import busy_wait
|
||||
from lerobot.common.utils.utils import init_hydra_config, init_logging, log_say
|
||||
|
||||
DEFAULT_FEATURES = {
|
||||
"next.reward": {
|
||||
"dtype": "float32",
|
||||
"shape": (1,),
|
||||
"names": None,
|
||||
},
|
||||
"next.success": {
|
||||
"dtype": "bool",
|
||||
"shape": (1,),
|
||||
"names": None,
|
||||
},
|
||||
"seed": {
|
||||
"dtype": "int64",
|
||||
"shape": (1,),
|
||||
"names": None,
|
||||
},
|
||||
"timestamp": {
|
||||
"dtype": "float32",
|
||||
"shape": (1,),
|
||||
"names": None,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
########################################################################################
|
||||
# Utilities
|
||||
########################################################################################
|
||||
def none_or_int(value):
|
||||
if value == "None":
|
||||
return None
|
||||
return int(value)
|
||||
|
||||
|
||||
def init_sim_calibration(robot, cfg):
|
||||
# Constants necessary for transforming the joint pos of the real robot to the sim
|
||||
# depending on the robot discription used in that sim.
|
||||
start_pos = np.array(robot.leader_arms.main.calibration["start_pos"])
|
||||
axis_directions = np.array(cfg.get("axis_directions", [1]))
|
||||
offsets = np.array(cfg.get("offsets", [0])) * np.pi
|
||||
|
||||
return {"start_pos": start_pos, "axis_directions": axis_directions, "offsets": offsets}
|
||||
|
||||
|
||||
def real_positions_to_sim(real_positions, axis_directions, start_pos, offsets):
|
||||
"""Counts - starting position -> radians -> align axes -> offset"""
|
||||
return axis_directions * (real_positions - start_pos) * 2.0 * np.pi / 4096 + offsets
|
||||
|
||||
|
||||
########################################################################################
|
||||
# Control modes
|
||||
########################################################################################
|
||||
|
||||
|
||||
def teleoperate(env, robot: Robot, process_action_fn, teleop_time_s=None):
|
||||
env = env()
|
||||
env.reset()
|
||||
start_teleop_t = time.perf_counter()
|
||||
while True:
|
||||
leader_pos = robot.leader_arms.main.read("Present_Position")
|
||||
action = process_action_fn(leader_pos)
|
||||
env.step(np.expand_dims(action, 0))
|
||||
if teleop_time_s is not None and time.perf_counter() - start_teleop_t > teleop_time_s:
|
||||
print("Teleoperation processes finished.")
|
||||
break
|
||||
|
||||
|
||||
def record(
|
||||
env,
|
||||
robot: Robot,
|
||||
process_action_from_leader,
|
||||
root: Path,
|
||||
repo_id: str,
|
||||
task: str,
|
||||
fps: int | None = None,
|
||||
tags: list[str] | None = None,
|
||||
pretrained_policy_name_or_path: str = None,
|
||||
policy_overrides: bool | None = None,
|
||||
episode_time_s: int = 30,
|
||||
num_episodes: int = 50,
|
||||
video: bool = True,
|
||||
push_to_hub: bool = True,
|
||||
num_image_writer_processes: int = 0,
|
||||
num_image_writer_threads_per_camera: int = 4,
|
||||
display_cameras: bool = False,
|
||||
play_sounds: bool = True,
|
||||
resume: bool = False,
|
||||
local_files_only: bool = False,
|
||||
run_compute_stats: bool = True,
|
||||
) -> LeRobotDataset:
|
||||
# Load pretrained policy
|
||||
policy = None
|
||||
if pretrained_policy_name_or_path is not None:
|
||||
policy, policy_fps, device, use_amp = init_policy(pretrained_policy_name_or_path, policy_overrides)
|
||||
|
||||
if fps is None:
|
||||
fps = policy_fps
|
||||
logging.warning(f"No fps provided, so using the fps from policy config ({policy_fps}).")
|
||||
|
||||
if policy is None and process_action_from_leader is None:
|
||||
raise ValueError("Either policy or process_action_fn has to be set to enable control in sim.")
|
||||
|
||||
# initialize listener before sim env
|
||||
listener, events = init_keyboard_listener()
|
||||
|
||||
# create sim env
|
||||
env = env()
|
||||
|
||||
# Create empty dataset or load existing saved episodes
|
||||
num_cameras = sum([1 if "image" in key else 0 for key in env.observation_space])
|
||||
|
||||
# get image keys
|
||||
image_keys = [key for key in env.observation_space if "image" in key]
|
||||
state_keys_dict = env_cfg.state_keys
|
||||
|
||||
if resume:
|
||||
dataset = LeRobotDataset(
|
||||
repo_id,
|
||||
root=root,
|
||||
local_files_only=local_files_only,
|
||||
)
|
||||
dataset.start_image_writer(
|
||||
num_processes=num_image_writer_processes,
|
||||
num_threads=num_image_writer_threads_per_camera * num_cameras,
|
||||
)
|
||||
sanity_check_dataset_robot_compatibility(dataset, robot, fps, video)
|
||||
else:
|
||||
features = DEFAULT_FEATURES
|
||||
# add image keys to features
|
||||
for key in image_keys:
|
||||
shape = env.observation_space[key].shape
|
||||
if not key.startswith("observation.image."):
|
||||
key = "observation.image." + key
|
||||
features[key] = {"dtype": "video", "names": ["channel", "height", "width"], "shape": shape}
|
||||
|
||||
for key, obs_key in state_keys_dict.items():
|
||||
features[key] = {
|
||||
"dtype": "float32",
|
||||
"names": None,
|
||||
"shape": env.observation_space[obs_key].shape,
|
||||
}
|
||||
|
||||
features["action"] = {"dtype": "float32", "shape": env.action_space.shape, "names": None}
|
||||
|
||||
# Create empty dataset or load existing saved episodes
|
||||
sanity_check_dataset_name(repo_id, policy)
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id,
|
||||
fps,
|
||||
root=root,
|
||||
features=features,
|
||||
use_videos=video,
|
||||
image_writer_processes=num_image_writer_processes,
|
||||
image_writer_threads=num_image_writer_threads_per_camera * num_cameras,
|
||||
)
|
||||
|
||||
recorded_episodes = 0
|
||||
while True:
|
||||
log_say(f"Recording episode {dataset.num_episodes}", play_sounds)
|
||||
|
||||
if events is None:
|
||||
events = {"exit_early": False}
|
||||
|
||||
if episode_time_s is None:
|
||||
episode_time_s = float("inf")
|
||||
|
||||
timestamp = 0
|
||||
start_episode_t = time.perf_counter()
|
||||
|
||||
seed = np.random.randint(0, 1e5)
|
||||
observation, info = env.reset(seed=seed)
|
||||
|
||||
while timestamp < episode_time_s:
|
||||
start_loop_t = time.perf_counter()
|
||||
|
||||
if policy is not None:
|
||||
action = predict_action(observation, policy, device, use_amp)
|
||||
else:
|
||||
leader_pos = robot.leader_arms.main.read("Present_Position")
|
||||
action = process_action_from_leader(leader_pos)
|
||||
|
||||
observation, reward, terminated, _, info = env.step(action)
|
||||
|
||||
success = info.get("is_success", False)
|
||||
env_timestamp = info.get("timestamp", dataset.episode_buffer["size"] / fps)
|
||||
|
||||
frame = {
|
||||
"action": torch.from_numpy(action),
|
||||
"next.reward": reward,
|
||||
"next.success": success,
|
||||
"seed": seed,
|
||||
"timestamp": env_timestamp,
|
||||
}
|
||||
|
||||
for key in image_keys:
|
||||
if not key.startswith("observation.image"):
|
||||
frame["observation.image." + key] = observation[key]
|
||||
else:
|
||||
frame[key] = observation[key]
|
||||
|
||||
for key, obs_key in state_keys_dict.items():
|
||||
frame[key] = torch.from_numpy(observation[obs_key])
|
||||
|
||||
dataset.add_frame(frame)
|
||||
|
||||
if display_cameras and not is_headless():
|
||||
for key in image_keys:
|
||||
cv2.imshow(key, cv2.cvtColor(observation[key], cv2.COLOR_RGB2BGR))
|
||||
cv2.waitKey(1)
|
||||
|
||||
if fps is not None:
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
log_control_info(robot, dt_s, fps=fps)
|
||||
|
||||
timestamp = time.perf_counter() - start_episode_t
|
||||
if events["exit_early"] or terminated:
|
||||
events["exit_early"] = False
|
||||
break
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode", play_sounds)
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
dataset.save_episode(task=task)
|
||||
recorded_episodes += 1
|
||||
|
||||
if events["stop_recording"] or recorded_episodes >= num_episodes:
|
||||
break
|
||||
else:
|
||||
logging.info("Waiting for a few seconds before starting next episode recording...")
|
||||
busy_wait(3)
|
||||
|
||||
log_say("Stop recording", play_sounds, blocking=True)
|
||||
stop_recording(robot, listener, display_cameras)
|
||||
|
||||
if run_compute_stats:
|
||||
logging.info("Computing dataset statistics")
|
||||
dataset.consolidate(run_compute_stats)
|
||||
|
||||
if push_to_hub:
|
||||
dataset.push_to_hub(tags=tags)
|
||||
|
||||
log_say("Exiting", play_sounds)
|
||||
return dataset
|
||||
|
||||
|
||||
def replay(
|
||||
env, root: Path, repo_id: str, episode: int, fps: int | None = None, local_files_only: bool = True
|
||||
):
|
||||
env = env()
|
||||
|
||||
local_dir = Path(root) / repo_id
|
||||
if not local_dir.exists():
|
||||
raise ValueError(local_dir)
|
||||
|
||||
dataset = LeRobotDataset(repo_id, root=root, local_files_only=local_files_only)
|
||||
items = dataset.hf_dataset.select_columns("action")
|
||||
seeds = dataset.hf_dataset.select_columns("seed")["seed"]
|
||||
|
||||
from_idx = dataset.episode_data_index["from"][episode].item()
|
||||
to_idx = dataset.episode_data_index["to"][episode].item()
|
||||
env.reset(seed=seeds[from_idx].item())
|
||||
logging.info("Replaying episode")
|
||||
log_say("Replaying episode", play_sounds=True)
|
||||
for idx in range(from_idx, to_idx):
|
||||
start_episode_t = time.perf_counter()
|
||||
action = items[idx]["action"]
|
||||
env.step(action.unsqueeze(0).numpy())
|
||||
dt_s = time.perf_counter() - start_episode_t
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
subparsers = parser.add_subparsers(dest="mode", required=True)
|
||||
|
||||
# Set common options for all the subparsers
|
||||
base_parser = argparse.ArgumentParser(add_help=False)
|
||||
base_parser.add_argument(
|
||||
"--robot-path",
|
||||
type=str,
|
||||
default="lerobot/configs/robot/koch.yaml",
|
||||
help="Path to robot yaml file used to instantiate the robot using `make_robot` factory function.",
|
||||
)
|
||||
|
||||
base_parser.add_argument(
|
||||
"--sim-config",
|
||||
help="Path to a yaml config you want to use for initializing a sim environment based on gym ",
|
||||
)
|
||||
|
||||
parser_record = subparsers.add_parser("teleoperate", parents=[base_parser])
|
||||
|
||||
parser_record = subparsers.add_parser("record", parents=[base_parser])
|
||||
parser_record.add_argument(
|
||||
"--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)"
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--root",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Root directory where the dataset will be stored locally at '{root}/{repo_id}' (e.g. 'data/hf_username/dataset_name').",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
default="lerobot/test",
|
||||
help="Dataset identifier. By convention it should match '{hf_username}/{dataset_name}' (e.g. `lerobot/test`).",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--episode-time-s",
|
||||
type=int,
|
||||
default=60,
|
||||
help="Number of seconds for data recording for each episode.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--task",
|
||||
type=str,
|
||||
required=True,
|
||||
help="A description of the task preformed during recording that can be used as a language instruction.",
|
||||
)
|
||||
parser_record.add_argument("--num-episodes", type=int, default=50, help="Number of episodes to record.")
|
||||
parser_record.add_argument(
|
||||
"--run-compute-stats",
|
||||
type=int,
|
||||
default=1,
|
||||
help="By default, run the computation of the data statistics at the end of data collection. Compute intensive and not required to just replay an episode.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--push-to-hub",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Upload dataset to Hugging Face hub.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--tags",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help="Add tags to your dataset on the hub.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--num-image-writer-processes",
|
||||
type=int,
|
||||
default=0,
|
||||
help=(
|
||||
"Number of subprocesses handling the saving of frames as PNGs. Set to 0 to use threads only; "
|
||||
"set to ≥1 to use subprocesses, each using threads to write images. The best number of processes "
|
||||
"and threads depends on your system. We recommend 4 threads per camera with 0 processes. "
|
||||
"If fps is unstable, adjust the thread count. If still unstable, try using 1 or more subprocesses."
|
||||
),
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--num-image-writer-threads-per-camera",
|
||||
type=int,
|
||||
default=4,
|
||||
help=(
|
||||
"Number of threads writing the frames as png images on disk, per camera. "
|
||||
"Too much threads might cause unstable teleoperation fps due to main thread being blocked. "
|
||||
"Not enough threads might cause low camera fps."
|
||||
),
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--display-cameras",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Visualize image observations with opencv.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--resume",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Resume recording on an existing dataset.",
|
||||
)
|
||||
parser_replay = subparsers.add_parser("replay", parents=[base_parser])
|
||||
parser_replay.add_argument(
|
||||
"--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)"
|
||||
)
|
||||
parser_replay.add_argument(
|
||||
"--root",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Root directory where the dataset will be stored locally (e.g. 'data/hf_username/dataset_name'). By default, stored in cache folder.",
|
||||
)
|
||||
parser_replay.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
default="lerobot/test",
|
||||
help="Dataset identifier. By convention it should match '{hf_username}/{dataset_name}' (e.g. `lerobot/test`).",
|
||||
)
|
||||
parser_replay.add_argument("--episode", type=int, default=0, help="Index of the episodes to replay.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
init_logging()
|
||||
|
||||
control_mode = args.mode
|
||||
robot_path = args.robot_path
|
||||
env_config_path = args.sim_config
|
||||
kwargs = vars(args)
|
||||
del kwargs["mode"]
|
||||
del kwargs["robot_path"]
|
||||
del kwargs["sim_config"]
|
||||
|
||||
# make gym env
|
||||
env_cfg = init_hydra_config(env_config_path)
|
||||
importlib.import_module(f"gym_{env_cfg.env.name}")
|
||||
|
||||
def env_constructor():
|
||||
return gym.make(env_cfg.env.handle, disable_env_checker=True, **env_cfg.env.gym)
|
||||
|
||||
robot = None
|
||||
process_leader_actions_fn = None
|
||||
|
||||
if control_mode in ["teleoperate", "record"]:
|
||||
# make robot
|
||||
robot_overrides = ["~cameras", "~follower_arms"]
|
||||
robot_cfg = init_hydra_config(robot_path, robot_overrides)
|
||||
robot = make_robot(robot_cfg)
|
||||
robot.connect()
|
||||
|
||||
calib_kwgs = init_sim_calibration(robot, env_cfg.calibration)
|
||||
|
||||
def process_leader_actions_fn(action):
|
||||
return real_positions_to_sim(action, **calib_kwgs)
|
||||
|
||||
robot.leader_arms.main.calibration = None
|
||||
|
||||
if control_mode == "teleoperate":
|
||||
teleoperate(env_constructor, robot, process_leader_actions_fn)
|
||||
|
||||
elif control_mode == "record":
|
||||
record(env_constructor, robot, process_leader_actions_fn, **kwargs)
|
||||
|
||||
elif control_mode == "replay":
|
||||
replay(env_constructor, **kwargs)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid control mode: '{control_mode}', only valid modes are teleoperate, record and replay."
|
||||
)
|
||||
|
||||
if robot and robot.is_connected:
|
||||
# Disconnect manually to avoid a "Core dump" during process
|
||||
# termination due to camera threads not properly exiting.
|
||||
robot.disconnect()
|
||||
@@ -1,335 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Evaluate a policy by running rollouts on the real robot and computing metrics.
|
||||
|
||||
Usage examples: evaluate a checkpoint from the LeRobot training script for 10 episodes.
|
||||
|
||||
```
|
||||
python lerobot/scripts/eval_on_robot.py \
|
||||
-p outputs/train/model/checkpoints/005000/pretrained_model \
|
||||
eval.n_episodes=10
|
||||
```
|
||||
|
||||
**NOTE** (michel-aractingi): This script is incomplete and it is being prepared
|
||||
for running training on the real robot.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from tqdm import trange
|
||||
|
||||
from lerobot.common.policies.policy_protocol import Policy
|
||||
from lerobot.common.robot_devices.control_utils import busy_wait, is_headless
|
||||
from lerobot.common.robot_devices.robots.factory import Robot, make_robot
|
||||
from lerobot.common.utils.utils import (
|
||||
init_hydra_config,
|
||||
init_logging,
|
||||
log_say,
|
||||
)
|
||||
|
||||
|
||||
def rollout(robot: Robot, policy: Policy, fps: int, control_time_s: float = 20, use_amp: bool = True) -> dict:
|
||||
"""Run a batched policy rollout on the real robot.
|
||||
|
||||
The return dictionary contains:
|
||||
"robot": A a dictionary of (batch, sequence + 1, *) tensors mapped to observation
|
||||
keys. NOTE the that this has an extra sequence element relative to the other keys in the
|
||||
dictionary. This is because an extra observation is included for after the environment is
|
||||
terminated or truncated.
|
||||
"action": A (batch, sequence, action_dim) tensor of actions applied based on the observations (not
|
||||
including the last observations).
|
||||
"reward": A (batch, sequence) tensor of rewards received for applying the actions.
|
||||
"success": A (batch, sequence) tensor of success conditions (the only time this can be True is upon
|
||||
environment termination/truncation).
|
||||
"done": A (batch, sequence) tensor of **cumulative** done conditions. For any given batch element,
|
||||
the first True is followed by True's all the way till the end. This can be used for masking
|
||||
extraneous elements from the sequences above.
|
||||
|
||||
Args:
|
||||
robot: The robot class that defines the interface with the real robot.
|
||||
policy: The policy. Must be a PyTorch nn module.
|
||||
|
||||
Returns:
|
||||
The dictionary described above.
|
||||
"""
|
||||
# assert isinstance(policy, nn.Module), "Policy must be a PyTorch nn module."
|
||||
# device = get_device_from_parameters(policy)
|
||||
|
||||
# define keyboard listener
|
||||
listener, events = init_keyboard_listener()
|
||||
|
||||
# Reset the policy. TODO (michel-aractingi) add real policy evaluation once the code is ready.
|
||||
# policy.reset()
|
||||
|
||||
# Get observation from real robot
|
||||
observation = robot.capture_observation()
|
||||
|
||||
# Calculate reward. TODO (michel-aractingi)
|
||||
# in HIL-SERL it will be with a reward classifier
|
||||
reward = calculate_reward(observation)
|
||||
all_observations = []
|
||||
all_actions = []
|
||||
all_rewards = []
|
||||
all_successes = []
|
||||
|
||||
start_episode_t = time.perf_counter()
|
||||
timestamp = 0.0
|
||||
while timestamp < control_time_s:
|
||||
start_loop_t = time.perf_counter()
|
||||
|
||||
all_observations.append(deepcopy(observation))
|
||||
# observation = {key: observation[key].to(device, non_blocking=True) for key in observation}
|
||||
|
||||
# Apply the next action.
|
||||
while events["pause_policy"] and not events["human_intervention_step"]:
|
||||
busy_wait(0.5)
|
||||
|
||||
if events["human_intervention_step"]:
|
||||
# take over the robot's actions
|
||||
observation, action = robot.teleop_step(record_data=True)
|
||||
action = action["action"] # teleop step returns torch tensors but in a dict
|
||||
else:
|
||||
# explore with policy
|
||||
with torch.inference_mode():
|
||||
action = robot.follower_arms["main"].read("Present_Position")
|
||||
action = torch.from_numpy(action)
|
||||
robot.send_action(action)
|
||||
# action = predict_action(observation, policy, device, use_amp)
|
||||
|
||||
observation = robot.capture_observation()
|
||||
# Calculate reward
|
||||
# in HIL-SERL it will be with a reward classifier
|
||||
reward = calculate_reward(observation)
|
||||
|
||||
all_actions.append(action)
|
||||
all_rewards.append(torch.from_numpy(reward))
|
||||
all_successes.append(torch.tensor([False]))
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
busy_wait(1 / fps - dt_s)
|
||||
timestamp = time.perf_counter() - start_episode_t
|
||||
if events["exit_early"]:
|
||||
events["exit_early"] = False
|
||||
events["human_intervention_step"] = False
|
||||
events["pause_policy"] = False
|
||||
break
|
||||
all_observations.append(deepcopy(observation))
|
||||
|
||||
dones = torch.tensor([False] * len(all_actions))
|
||||
dones[-1] = True
|
||||
# Stack the sequence along the first dimension so that we have (batch, sequence, *) tensors.
|
||||
ret = {
|
||||
"action": torch.stack(all_actions, dim=1),
|
||||
"next.reward": torch.stack(all_rewards, dim=1),
|
||||
"next.success": torch.stack(all_successes, dim=1),
|
||||
"done": dones,
|
||||
}
|
||||
stacked_observations = {}
|
||||
for key in all_observations[0]:
|
||||
stacked_observations[key] = torch.stack([obs[key] for obs in all_observations], dim=1)
|
||||
ret["observation"] = stacked_observations
|
||||
|
||||
listener.stop()
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def eval_policy(
|
||||
robot: Robot,
|
||||
policy: torch.nn.Module,
|
||||
fps: float,
|
||||
n_episodes: int,
|
||||
control_time_s: int = 20,
|
||||
use_amp: bool = True,
|
||||
) -> dict:
|
||||
"""
|
||||
Args:
|
||||
env: The batch of environments.
|
||||
policy: The policy.
|
||||
n_episodes: The number of episodes to evaluate.
|
||||
Returns:
|
||||
Dictionary with metrics and data regarding the rollouts.
|
||||
"""
|
||||
# TODO (michel-aractingi) comment this out for testing with a fixed policy
|
||||
# assert isinstance(policy, Policy)
|
||||
# policy.eval()
|
||||
|
||||
sum_rewards = []
|
||||
max_rewards = []
|
||||
successes = []
|
||||
rollouts = []
|
||||
|
||||
start_eval = time.perf_counter()
|
||||
progbar = trange(n_episodes, desc="Evaluating policy on real robot")
|
||||
for _batch_idx in progbar:
|
||||
rollout_data = rollout(robot, policy, fps, control_time_s, use_amp)
|
||||
|
||||
rollouts.append(rollout_data)
|
||||
sum_rewards.append(sum(rollout_data["next.reward"]))
|
||||
max_rewards.append(max(rollout_data["next.reward"]))
|
||||
successes.append(rollout_data["next.success"][-1])
|
||||
|
||||
info = {
|
||||
"per_episode": [
|
||||
{
|
||||
"episode_ix": i,
|
||||
"sum_reward": sum_reward,
|
||||
"max_reward": max_reward,
|
||||
"pc_success": success * 100,
|
||||
}
|
||||
for i, (sum_reward, max_reward, success) in enumerate(
|
||||
zip(
|
||||
sum_rewards[:n_episodes],
|
||||
max_rewards[:n_episodes],
|
||||
successes[:n_episodes],
|
||||
strict=False,
|
||||
)
|
||||
)
|
||||
],
|
||||
"aggregated": {
|
||||
"avg_sum_reward": float(np.nanmean(torch.cat(sum_rewards[:n_episodes]))),
|
||||
"avg_max_reward": float(np.nanmean(torch.cat(max_rewards[:n_episodes]))),
|
||||
"pc_success": float(np.nanmean(torch.cat(successes[:n_episodes])) * 100),
|
||||
"eval_s": time.time() - start_eval,
|
||||
"eval_ep_s": (time.time() - start_eval) / n_episodes,
|
||||
},
|
||||
}
|
||||
|
||||
if robot.is_connected:
|
||||
robot.disconnect()
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def calculate_reward(observation):
|
||||
"""
|
||||
Method to calculate reward function in some way.
|
||||
In HIL-SERL this is done through defining a reward classifier
|
||||
"""
|
||||
# reward = reward_classifier(observation)
|
||||
return np.array([0.0])
|
||||
|
||||
|
||||
def init_keyboard_listener():
|
||||
# Allow to exit early while recording an episode or resetting the environment,
|
||||
# by tapping the right arrow key '->'. This might require a sudo permission
|
||||
# to allow your terminal to monitor keyboard events.
|
||||
events = {}
|
||||
events["exit_early"] = False
|
||||
events["rerecord_episode"] = False
|
||||
events["pause_policy"] = False
|
||||
events["human_intervention_step"] = False
|
||||
|
||||
if is_headless():
|
||||
logging.warning(
|
||||
"Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
|
||||
)
|
||||
listener = None
|
||||
return listener, events
|
||||
|
||||
# Only import pynput if not in a headless environment
|
||||
from pynput import keyboard
|
||||
|
||||
def on_press(key):
|
||||
try:
|
||||
if key == keyboard.Key.right:
|
||||
print("Right arrow key pressed. Exiting loop...")
|
||||
events["exit_early"] = True
|
||||
elif key == keyboard.Key.left:
|
||||
print("Left arrow key pressed. Exiting loop and rerecord the last episode...")
|
||||
events["rerecord_episode"] = True
|
||||
events["exit_early"] = True
|
||||
elif key == keyboard.Key.space:
|
||||
# check if first space press then pause the policy for the user to get ready
|
||||
# if second space press then the user is ready to start intervention
|
||||
if not events["pause_policy"]:
|
||||
print(
|
||||
"Space key pressed. Human intervention required.\n"
|
||||
"Place the leader in similar pose to the follower and press space again."
|
||||
)
|
||||
events["pause_policy"] = True
|
||||
log_say("Human intervention stage. Get ready to take over.", play_sounds=True)
|
||||
else:
|
||||
events["human_intervention_step"] = True
|
||||
print("Space key pressed. Human intervention starting.")
|
||||
log_say("Starting human intervention.", play_sounds=True)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error handling key press: {e}")
|
||||
|
||||
listener = keyboard.Listener(on_press=on_press)
|
||||
listener.start()
|
||||
|
||||
return listener, events
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
init_logging()
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument(
|
||||
"--robot-path",
|
||||
type=str,
|
||||
default="lerobot/configs/robot/koch.yaml",
|
||||
help="Path to robot yaml file used to instantiate the robot using `make_robot` factory function.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--robot-overrides",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help="Any key=value arguments to override config values (use dots for.nested=overrides)",
|
||||
)
|
||||
group.add_argument(
|
||||
"-p",
|
||||
"--pretrained-policy-name-or-path",
|
||||
help=(
|
||||
"Either the repo ID of a model hosted on the Hub or a path to a directory containing weights "
|
||||
"saved using `Policy.save_pretrained`. If not provided, the policy is initialized from scratch "
|
||||
"(useful for debugging). This argument is mutually exclusive with `--config`."
|
||||
),
|
||||
)
|
||||
group.add_argument(
|
||||
"--config",
|
||||
help=(
|
||||
"Path to a yaml config you want to use for initializing a policy from scratch (useful for "
|
||||
"debugging). This argument is mutually exclusive with `--pretrained-policy-name-or-path` (`-p`)."
|
||||
),
|
||||
)
|
||||
parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.")
|
||||
parser.add_argument(
|
||||
"--out-dir",
|
||||
help=(
|
||||
"Where to save the evaluation outputs. If not provided, outputs are saved in "
|
||||
"outputs/eval/{timestamp}_{env_name}_{policy_name}"
|
||||
),
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
robot_cfg = init_hydra_config(args.robot_path, args.robot_overrides)
|
||||
robot = make_robot(robot_cfg)
|
||||
if not robot.is_connected:
|
||||
robot.connect()
|
||||
|
||||
eval_policy(robot, None, fps=40, n_episodes=2, control_time_s=100)
|
||||
@@ -66,7 +66,7 @@ def get_from_raw_to_lerobot_format_fn(raw_format: str):
|
||||
from lerobot.common.datasets.push_dataset_to_hub.umi_zarr_format import from_raw_to_lerobot_format
|
||||
elif raw_format == "aloha_hdf5":
|
||||
from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import from_raw_to_lerobot_format
|
||||
elif raw_format in ["rlds", "openx"]:
|
||||
elif "openx_rlds" in raw_format:
|
||||
from lerobot.common.datasets.push_dataset_to_hub.openx_rlds_format import from_raw_to_lerobot_format
|
||||
elif raw_format == "dora_parquet":
|
||||
from lerobot.common.datasets.push_dataset_to_hub.dora_parquet_format import from_raw_to_lerobot_format
|
||||
@@ -204,14 +204,24 @@ def push_dataset_to_hub(
|
||||
# convert dataset from original raw format to LeRobot format
|
||||
from_raw_to_lerobot_format = get_from_raw_to_lerobot_format_fn(raw_format)
|
||||
|
||||
hf_dataset, episode_data_index, info = from_raw_to_lerobot_format(
|
||||
raw_dir,
|
||||
videos_dir,
|
||||
fps,
|
||||
video,
|
||||
episodes,
|
||||
encoding,
|
||||
)
|
||||
fmt_kwgs = {
|
||||
"raw_dir": raw_dir,
|
||||
"videos_dir": videos_dir,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
"episodes": episodes,
|
||||
"encoding": encoding,
|
||||
}
|
||||
|
||||
if "openx_rlds." in raw_format:
|
||||
# Support for official OXE dataset name inside `raw_format`.
|
||||
# For instance, `raw_format="oxe_rlds"` uses the default formating (TODO what does that mean?),
|
||||
# and `raw_format="oxe_rlds.bridge_orig"` uses the brdige_orig formating
|
||||
_, openx_dataset_name = raw_format.split(".")
|
||||
print(f"Converting dataset [{openx_dataset_name}] from 'openx_rlds' to LeRobot format.")
|
||||
fmt_kwgs["openx_dataset_name"] = openx_dataset_name
|
||||
|
||||
hf_dataset, episode_data_index, info = from_raw_to_lerobot_format(**fmt_kwgs)
|
||||
|
||||
lerobot_dataset = LeRobotDataset.from_preloaded(
|
||||
repo_id=repo_id,
|
||||
@@ -280,7 +290,7 @@ def main():
|
||||
"--raw-format",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Dataset type (e.g. `pusht_zarr`, `umi_zarr`, `aloha_hdf5`, `xarm_pkl`, `dora_parquet`, `rlds`, `openx`).",
|
||||
help="Dataset type (e.g. `pusht_zarr`, `umi_zarr`, `aloha_hdf5`, `xarm_pkl`, `dora_parquet`, `openx_rlds`).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo-id",
|
||||
|
||||
@@ -1,310 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import logging
|
||||
import time
|
||||
from contextlib import nullcontext
|
||||
from pathlib import Path
|
||||
from pprint import pformat
|
||||
|
||||
import hydra
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import wandb
|
||||
from deepdiff import DeepDiff
|
||||
from omegaconf import DictConfig, OmegaConf
|
||||
from termcolor import colored
|
||||
from torch import optim
|
||||
from torch.cuda.amp import GradScaler
|
||||
from torch.utils.data import DataLoader, WeightedRandomSampler, random_split
|
||||
from tqdm import tqdm
|
||||
|
||||
from lerobot.common.datasets.factory import resolve_delta_timestamps
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.logger import Logger
|
||||
from lerobot.common.policies.factory import _policy_cfg_from_hydra_cfg
|
||||
from lerobot.common.policies.hilserl.classifier.configuration_classifier import ClassifierConfig
|
||||
from lerobot.common.policies.hilserl.classifier.modeling_classifier import Classifier
|
||||
from lerobot.common.utils.utils import (
|
||||
format_big_number,
|
||||
get_safe_torch_device,
|
||||
init_hydra_config,
|
||||
set_global_seed,
|
||||
)
|
||||
|
||||
|
||||
def get_model(cfg, logger):
|
||||
classifier_config = _policy_cfg_from_hydra_cfg(ClassifierConfig, cfg)
|
||||
model = Classifier(classifier_config)
|
||||
if cfg.resume:
|
||||
model.load_state_dict(Classifier.from_pretrained(str(logger.last_pretrained_model_dir)).state_dict())
|
||||
return model
|
||||
|
||||
|
||||
def create_balanced_sampler(dataset, cfg):
|
||||
# Creates a weighted sampler to handle class imbalance
|
||||
|
||||
labels = torch.tensor([item[cfg.training.label_key] for item in dataset])
|
||||
_, counts = torch.unique(labels, return_counts=True)
|
||||
class_weights = 1.0 / counts.float()
|
||||
sample_weights = class_weights[labels]
|
||||
|
||||
return WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
|
||||
|
||||
|
||||
def train_epoch(model, train_loader, criterion, optimizer, grad_scaler, device, logger, step, cfg):
|
||||
# Single epoch training loop with AMP support and progress tracking
|
||||
model.train()
|
||||
correct = 0
|
||||
total = 0
|
||||
|
||||
pbar = tqdm(train_loader, desc="Training")
|
||||
for batch_idx, batch in enumerate(pbar):
|
||||
start_time = time.perf_counter()
|
||||
images = batch[cfg.training.image_key].to(device)
|
||||
labels = batch[cfg.training.label_key].float().to(device)
|
||||
|
||||
# Forward pass with optional AMP
|
||||
with torch.autocast(device_type=device.type) if cfg.training.use_amp else nullcontext():
|
||||
outputs = model(images)
|
||||
loss = criterion(outputs.logits, labels)
|
||||
|
||||
# Backward pass with gradient scaling if AMP enabled
|
||||
optimizer.zero_grad()
|
||||
if cfg.training.use_amp:
|
||||
grad_scaler.scale(loss).backward()
|
||||
grad_scaler.step(optimizer)
|
||||
grad_scaler.update()
|
||||
else:
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# Track metrics
|
||||
if model.config.num_classes == 2:
|
||||
predictions = (torch.sigmoid(outputs.logits) > 0.5).float()
|
||||
else:
|
||||
predictions = torch.argmax(outputs.logits, dim=1)
|
||||
correct += (predictions == labels).sum().item()
|
||||
total += labels.size(0)
|
||||
|
||||
current_acc = 100 * correct / total
|
||||
train_info = {
|
||||
"loss": loss.item(),
|
||||
"accuracy": current_acc,
|
||||
"dataloading_s": time.perf_counter() - start_time,
|
||||
}
|
||||
|
||||
logger.log_dict(train_info, step + batch_idx, mode="train")
|
||||
pbar.set_postfix({"loss": f"{loss.item():.4f}", "acc": f"{current_acc:.2f}%"})
|
||||
|
||||
|
||||
def validate(model, val_loader, criterion, device, logger, cfg, num_samples_to_log=8):
|
||||
# Validation loop with metric tracking and sample logging
|
||||
model.eval()
|
||||
correct = 0
|
||||
total = 0
|
||||
batch_start_time = time.perf_counter()
|
||||
samples = []
|
||||
running_loss = 0
|
||||
|
||||
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.training.use_amp else nullcontext():
|
||||
for batch in tqdm(val_loader, desc="Validation"):
|
||||
images = batch[cfg.training.image_key].to(device)
|
||||
labels = batch[cfg.training.label_key].float().to(device)
|
||||
|
||||
outputs = model(images)
|
||||
loss = criterion(outputs.logits, labels)
|
||||
|
||||
# Track metrics
|
||||
if model.config.num_classes == 2:
|
||||
predictions = (torch.sigmoid(outputs.logits) > 0.5).float()
|
||||
else:
|
||||
predictions = torch.argmax(outputs.logits, dim=1)
|
||||
correct += (predictions == labels).sum().item()
|
||||
total += labels.size(0)
|
||||
running_loss += loss.item()
|
||||
|
||||
# Log sample predictions for visualization
|
||||
if len(samples) < num_samples_to_log:
|
||||
for i in range(min(num_samples_to_log - len(samples), len(images))):
|
||||
if model.config.num_classes == 2:
|
||||
confidence = round(outputs.probabilities[i].item(), 3)
|
||||
else:
|
||||
confidence = [round(prob, 3) for prob in outputs.probabilities[i].tolist()]
|
||||
samples.append(
|
||||
{
|
||||
"image": wandb.Image(images[i].cpu()),
|
||||
"true_label": labels[i].item(),
|
||||
"predicted": predictions[i].item(),
|
||||
"confidence": confidence,
|
||||
}
|
||||
)
|
||||
|
||||
accuracy = 100 * correct / total
|
||||
avg_loss = running_loss / len(val_loader)
|
||||
|
||||
eval_info = {
|
||||
"loss": avg_loss,
|
||||
"accuracy": accuracy,
|
||||
"eval_s": time.perf_counter() - batch_start_time,
|
||||
"eval/prediction_samples": wandb.Table(
|
||||
data=[[s["image"], s["true_label"], s["predicted"], f"{s['confidence']}"] for s in samples],
|
||||
columns=["Image", "True Label", "Predicted", "Confidence"],
|
||||
)
|
||||
if logger._cfg.wandb.enable
|
||||
else None,
|
||||
}
|
||||
|
||||
return accuracy, eval_info
|
||||
|
||||
|
||||
@hydra.main(version_base="1.2", config_path="../configs", config_name="hilserl_classifier")
|
||||
def train(cfg: DictConfig) -> None:
|
||||
# Main training pipeline with support for resuming training
|
||||
logging.info(OmegaConf.to_yaml(cfg))
|
||||
|
||||
# Initialize training environment
|
||||
device = get_safe_torch_device(cfg.device, log=True)
|
||||
set_global_seed(cfg.seed)
|
||||
|
||||
out_dir = Path(cfg.output_dir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger = Logger(cfg, out_dir, cfg.wandb.job_name if cfg.wandb.enable else None)
|
||||
|
||||
# Setup dataset and dataloaders
|
||||
dataset = LeRobotDataset(cfg.dataset_repo_id)
|
||||
logging.info(f"Dataset size: {len(dataset)}")
|
||||
|
||||
train_size = int(cfg.train_split_proportion * len(dataset))
|
||||
val_size = len(dataset) - train_size
|
||||
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
|
||||
|
||||
sampler = create_balanced_sampler(train_dataset, cfg)
|
||||
train_loader = DataLoader(
|
||||
train_dataset,
|
||||
batch_size=cfg.training.batch_size,
|
||||
num_workers=cfg.training.num_workers,
|
||||
sampler=sampler,
|
||||
pin_memory=True,
|
||||
)
|
||||
|
||||
val_loader = DataLoader(
|
||||
val_dataset,
|
||||
batch_size=cfg.eval.batch_size,
|
||||
shuffle=False,
|
||||
num_workers=cfg.training.num_workers,
|
||||
pin_memory=True,
|
||||
)
|
||||
|
||||
# Resume training if requested
|
||||
step = 0
|
||||
best_val_acc = 0
|
||||
|
||||
if cfg.resume:
|
||||
if not Logger.get_last_checkpoint_dir(out_dir).exists():
|
||||
raise RuntimeError(
|
||||
"You have set resume=True, but there is no model checkpoint in "
|
||||
f"{Logger.get_last_checkpoint_dir(out_dir)}"
|
||||
)
|
||||
checkpoint_cfg_path = str(Logger.get_last_pretrained_model_dir(out_dir) / "config.yaml")
|
||||
logging.info(
|
||||
colored(
|
||||
"You have set resume=True, indicating that you wish to resume a run",
|
||||
color="yellow",
|
||||
attrs=["bold"],
|
||||
)
|
||||
)
|
||||
# Load and validate checkpoint configuration
|
||||
checkpoint_cfg = init_hydra_config(checkpoint_cfg_path)
|
||||
# Check for differences between the checkpoint configuration and provided configuration.
|
||||
# Hack to resolve the delta_timestamps ahead of time in order to properly diff.
|
||||
resolve_delta_timestamps(cfg)
|
||||
diff = DeepDiff(OmegaConf.to_container(checkpoint_cfg), OmegaConf.to_container(cfg))
|
||||
# Ignore the `resume` and parameters.
|
||||
if "values_changed" in diff and "root['resume']" in diff["values_changed"]:
|
||||
del diff["values_changed"]["root['resume']"]
|
||||
if len(diff) > 0:
|
||||
logging.warning(
|
||||
"At least one difference was detected between the checkpoint configuration and "
|
||||
f"the provided configuration: \n{pformat(diff)}\nNote that the checkpoint configuration "
|
||||
"takes precedence.",
|
||||
)
|
||||
# Use the checkpoint config instead of the provided config (but keep `resume` parameter).
|
||||
cfg = checkpoint_cfg
|
||||
cfg.resume = True
|
||||
|
||||
# Initialize model and training components
|
||||
model = get_model(cfg=cfg, logger=logger).to(device)
|
||||
|
||||
optimizer = optim.AdamW(model.parameters(), lr=cfg.training.learning_rate)
|
||||
# Use BCEWithLogitsLoss for binary classification and CrossEntropyLoss for multi-class
|
||||
criterion = nn.BCEWithLogitsLoss() if model.config.num_classes == 2 else nn.CrossEntropyLoss()
|
||||
grad_scaler = GradScaler(enabled=cfg.training.use_amp)
|
||||
|
||||
# Log model parameters
|
||||
num_learnable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||
num_total_params = sum(p.numel() for p in model.parameters())
|
||||
logging.info(f"Learnable parameters: {format_big_number(num_learnable_params)}")
|
||||
logging.info(f"Total parameters: {format_big_number(num_total_params)}")
|
||||
|
||||
if cfg.resume:
|
||||
step = logger.load_last_training_state(optimizer, None)
|
||||
|
||||
# Training loop with validation and checkpointing
|
||||
for epoch in range(cfg.training.num_epochs):
|
||||
logging.info(f"\nEpoch {epoch+1}/{cfg.training.num_epochs}")
|
||||
|
||||
train_epoch(model, train_loader, criterion, optimizer, grad_scaler, device, logger, step, cfg)
|
||||
|
||||
# Periodic validation
|
||||
if cfg.training.eval_freq > 0 and (epoch + 1) % cfg.training.eval_freq == 0:
|
||||
val_acc, eval_info = validate(
|
||||
model,
|
||||
val_loader,
|
||||
criterion,
|
||||
device,
|
||||
logger,
|
||||
cfg,
|
||||
)
|
||||
logger.log_dict(eval_info, step + len(train_loader), mode="eval")
|
||||
|
||||
# Save best model
|
||||
if val_acc > best_val_acc:
|
||||
best_val_acc = val_acc
|
||||
logger.save_checkpoint(
|
||||
train_step=step + len(train_loader),
|
||||
policy=model,
|
||||
optimizer=optimizer,
|
||||
scheduler=None,
|
||||
identifier="best",
|
||||
)
|
||||
|
||||
# Periodic checkpointing
|
||||
if cfg.training.save_checkpoint and (epoch + 1) % cfg.training.save_freq == 0:
|
||||
logger.save_checkpoint(
|
||||
train_step=step + len(train_loader),
|
||||
policy=model,
|
||||
optimizer=optimizer,
|
||||
scheduler=None,
|
||||
identifier=f"{epoch+1:06d}",
|
||||
)
|
||||
|
||||
step += len(train_loader)
|
||||
|
||||
logging.info("Training completed")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
train()
|
||||
@@ -207,17 +207,11 @@ def main():
|
||||
required=True,
|
||||
help="Episode to visualize.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--local-files-only",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Use local files only. By default, this script will try to fetch the dataset from the hub if it exists.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--root",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Root directory for the dataset stored locally (e.g. `--root data`). By default, the dataset will be loaded from hugging face cache folder, or downloaded from the hub if available.",
|
||||
help="Root directory for a dataset stored locally (e.g. `--root data`). By default, the dataset will be loaded from hugging face cache folder, or downloaded from the hub if available.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
@@ -275,10 +269,9 @@ def main():
|
||||
kwargs = vars(args)
|
||||
repo_id = kwargs.pop("repo_id")
|
||||
root = kwargs.pop("root")
|
||||
local_files_only = kwargs.pop("local_files_only")
|
||||
|
||||
logging.info("Loading dataset")
|
||||
dataset = LeRobotDataset(repo_id, root=root, local_files_only=local_files_only)
|
||||
dataset = LeRobotDataset(repo_id, root=root, local_files_only=True)
|
||||
|
||||
visualize_dataset(dataset, **vars(args))
|
||||
|
||||
|
||||
@@ -234,12 +234,6 @@ def main():
|
||||
required=True,
|
||||
help="Name of hugging face repositery containing a LeRobotDataset dataset (e.g. `lerobot/pusht` for https://huggingface.co/datasets/lerobot/pusht).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--local-files-only",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Use local files only. By default, this script will try to fetch the dataset from the hub if it exists.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--root",
|
||||
type=Path,
|
||||
@@ -288,9 +282,7 @@ def main():
|
||||
kwargs = vars(args)
|
||||
repo_id = kwargs.pop("repo_id")
|
||||
root = kwargs.pop("root")
|
||||
local_files_only = kwargs.pop("local_files_only")
|
||||
|
||||
dataset = LeRobotDataset(repo_id, root=root, local_files_only=local_files_only)
|
||||
dataset = LeRobotDataset(repo_id, root=root, local_files_only=True)
|
||||
visualize_dataset_html(dataset, **kwargs)
|
||||
|
||||
|
||||
|
||||
@@ -158,7 +158,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
assert dataset.meta.total_episodes == 2
|
||||
assert len(dataset) == 2
|
||||
|
||||
replay(robot, episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False, local_files_only=True)
|
||||
replay(robot, episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False)
|
||||
|
||||
# TODO(rcadene, aliberts): rethink this design
|
||||
if robot_type == "aloha":
|
||||
@@ -295,12 +295,24 @@ def test_resume_record(tmpdir, request, robot_type, mock):
|
||||
dataset = record(**record_kwargs)
|
||||
assert len(dataset) == 1, f"`dataset` should contain 1 frame, not {len(dataset)}"
|
||||
|
||||
# init_dataset_return_value = {}
|
||||
|
||||
# def wrapped_init_dataset(*args, **kwargs):
|
||||
# nonlocal init_dataset_return_value
|
||||
# init_dataset_return_value = init_dataset(*args, **kwargs)
|
||||
# return init_dataset_return_value
|
||||
|
||||
# with patch("lerobot.scripts.control_robot.init_dataset", wraps=wrapped_init_dataset):
|
||||
|
||||
with pytest.raises(FileExistsError):
|
||||
# Dataset already exists, but resume=False by default
|
||||
record(**record_kwargs)
|
||||
|
||||
dataset = record(**record_kwargs, resume=True)
|
||||
assert len(dataset) == 2, f"`dataset` should contain 2 frames, not {len(dataset)}"
|
||||
# assert (
|
||||
# init_dataset_return_value["num_episodes"] == 2
|
||||
# ), "`init_dataset` should load the previous episode"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
|
||||
|
||||
@@ -383,7 +383,7 @@ def test_backward_compatibility(env_name, policy_name, extra_overrides, file_nam
|
||||
include a report on what changed and how that affected the outputs.
|
||||
2. Go to the `if __name__ == "__main__"` block of `tests/scripts/save_policy_to_safetensors.py` and
|
||||
add the policies you want to update the test artifacts for.
|
||||
3. Run `python tests/scripts/save_policy_to_safetensors.py`. The test artifact
|
||||
3. Run `DATA_DIR=tests/data python tests/scripts/save_policy_to_safetensors.py`. The test artifact
|
||||
should be updated.
|
||||
4. Check that this test now passes.
|
||||
5. Remember to restore `tests/scripts/save_policy_to_safetensors.py` to its original state.
|
||||
|
||||
@@ -5,7 +5,7 @@ we skip them for now in our CI.
|
||||
|
||||
Example to run backward compatiblity tests locally:
|
||||
```
|
||||
python -m pytest --run-skipped tests/test_push_dataset_to_hub.py::test_push_dataset_to_hub_pusht_backward_compatibility
|
||||
DATA_DIR=tests/data python -m pytest --run-skipped tests/test_push_dataset_to_hub.py::test_push_dataset_to_hub_pusht_backward_compatibility
|
||||
```
|
||||
"""
|
||||
|
||||
@@ -330,7 +330,7 @@ def test_push_dataset_to_hub_format(required_packages, tmpdir, raw_format, repo_
|
||||
],
|
||||
)
|
||||
@pytest.mark.skip(
|
||||
"Not compatible with our CI since it downloads raw datasets. Run with `python -m pytest --run-skipped tests/test_push_dataset_to_hub.py::test_push_dataset_to_hub_pusht_backward_compatibility`"
|
||||
"Not compatible with our CI since it downloads raw datasets. Run with `DATA_DIR=tests/data python -m pytest --run-skipped tests/test_push_dataset_to_hub.py::test_push_dataset_to_hub_pusht_backward_compatibility`"
|
||||
)
|
||||
def test_push_dataset_to_hub_pusht_backward_compatibility(tmpdir, raw_format, repo_id):
|
||||
_, dataset_id = repo_id.split("/")
|
||||
|
||||
@@ -1,251 +0,0 @@
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from hydra import compose, initialize_config_dir
|
||||
from torch import nn
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from lerobot.common.policies.hilserl.classifier.configuration_classifier import ClassifierConfig
|
||||
from lerobot.common.policies.hilserl.classifier.modeling_classifier import Classifier
|
||||
from lerobot.scripts.train_hilserl_classifier import (
|
||||
create_balanced_sampler,
|
||||
train,
|
||||
train_epoch,
|
||||
validate,
|
||||
)
|
||||
|
||||
|
||||
class MockDataset(Dataset):
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
self.meta = MagicMock()
|
||||
self.meta.stats = {}
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return self.data[idx]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
|
||||
def make_dummy_model():
|
||||
model_config = ClassifierConfig(num_classes=2, model_name="hf-tiny-model-private/tiny-random-ResNetModel")
|
||||
model = Classifier(config=model_config)
|
||||
return model
|
||||
|
||||
|
||||
def test_create_balanced_sampler():
|
||||
# Mock dataset with imbalanced classes
|
||||
data = [
|
||||
{"label": 0},
|
||||
{"label": 0},
|
||||
{"label": 1},
|
||||
{"label": 0},
|
||||
{"label": 1},
|
||||
{"label": 1},
|
||||
{"label": 1},
|
||||
{"label": 1},
|
||||
]
|
||||
dataset = MockDataset(data)
|
||||
cfg = MagicMock()
|
||||
cfg.training.label_key = "label"
|
||||
|
||||
sampler = create_balanced_sampler(dataset, cfg)
|
||||
|
||||
# Get weights from the sampler
|
||||
weights = sampler.weights.float()
|
||||
|
||||
# Check that samples have appropriate weights
|
||||
labels = [item["label"] for item in data]
|
||||
class_counts = torch.tensor([labels.count(0), labels.count(1)], dtype=torch.float32)
|
||||
class_weights = 1.0 / class_counts
|
||||
expected_weights = torch.tensor([class_weights[label] for label in labels], dtype=torch.float32)
|
||||
|
||||
# Test that the weights are correct
|
||||
assert torch.allclose(weights, expected_weights)
|
||||
|
||||
|
||||
def test_train_epoch():
|
||||
model = make_dummy_model()
|
||||
# Mock components
|
||||
model.train = MagicMock()
|
||||
|
||||
train_loader = [
|
||||
{
|
||||
"image": torch.rand(2, 3, 224, 224),
|
||||
"label": torch.tensor([0.0, 1.0]),
|
||||
}
|
||||
]
|
||||
|
||||
criterion = nn.BCEWithLogitsLoss()
|
||||
optimizer = MagicMock()
|
||||
grad_scaler = MagicMock()
|
||||
device = torch.device("cpu")
|
||||
logger = MagicMock()
|
||||
step = 0
|
||||
cfg = MagicMock()
|
||||
cfg.training.image_key = "image"
|
||||
cfg.training.label_key = "label"
|
||||
cfg.training.use_amp = False
|
||||
|
||||
# Call the function under test
|
||||
train_epoch(
|
||||
model,
|
||||
train_loader,
|
||||
criterion,
|
||||
optimizer,
|
||||
grad_scaler,
|
||||
device,
|
||||
logger,
|
||||
step,
|
||||
cfg,
|
||||
)
|
||||
|
||||
# Check that model.train() was called
|
||||
model.train.assert_called_once()
|
||||
|
||||
# Check that optimizer.zero_grad() was called
|
||||
optimizer.zero_grad.assert_called()
|
||||
|
||||
# Check that logger.log_dict was called
|
||||
logger.log_dict.assert_called()
|
||||
|
||||
|
||||
def test_validate():
|
||||
model = make_dummy_model()
|
||||
|
||||
# Mock components
|
||||
model.eval = MagicMock()
|
||||
val_loader = [
|
||||
{
|
||||
"image": torch.rand(2, 3, 224, 224),
|
||||
"label": torch.tensor([0.0, 1.0]),
|
||||
}
|
||||
]
|
||||
criterion = nn.BCEWithLogitsLoss()
|
||||
device = torch.device("cpu")
|
||||
logger = MagicMock()
|
||||
cfg = MagicMock()
|
||||
cfg.training.image_key = "image"
|
||||
cfg.training.label_key = "label"
|
||||
cfg.training.use_amp = False
|
||||
|
||||
# Call validate
|
||||
accuracy, eval_info = validate(model, val_loader, criterion, device, logger, cfg)
|
||||
|
||||
# Check that model.eval() was called
|
||||
model.eval.assert_called_once()
|
||||
|
||||
# Check accuracy/eval_info are calculated and of the correct type
|
||||
assert isinstance(accuracy, float)
|
||||
assert isinstance(eval_info, dict)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("resume", [True, False])
|
||||
@patch("lerobot.scripts.train_hilserl_classifier.init_hydra_config")
|
||||
@patch("lerobot.scripts.train_hilserl_classifier.Logger.get_last_checkpoint_dir")
|
||||
@patch("lerobot.scripts.train_hilserl_classifier.Logger.get_last_pretrained_model_dir")
|
||||
@patch("lerobot.scripts.train_hilserl_classifier.Logger")
|
||||
@patch("lerobot.scripts.train_hilserl_classifier.LeRobotDataset")
|
||||
@patch("lerobot.scripts.train_hilserl_classifier.make_policy")
|
||||
def test_resume_function(
|
||||
mock_make_policy,
|
||||
mock_dataset,
|
||||
mock_logger,
|
||||
mock_get_last_pretrained_model_dir,
|
||||
mock_get_last_checkpoint_dir,
|
||||
mock_init_hydra_config,
|
||||
resume,
|
||||
):
|
||||
# Initialize Hydra
|
||||
test_file_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
config_dir = os.path.abspath(os.path.join(test_file_dir, "..", "lerobot", "configs", "policy"))
|
||||
assert os.path.exists(config_dir), f"Config directory does not exist at {config_dir}"
|
||||
|
||||
with initialize_config_dir(config_dir=config_dir, job_name="test_app", version_base="1.2"):
|
||||
cfg = compose(
|
||||
config_name="reward_classifier",
|
||||
overrides=[
|
||||
"device=cpu",
|
||||
"seed=42",
|
||||
f"output_dir={tempfile.mkdtemp()}",
|
||||
"wandb.enable=False",
|
||||
f"resume={resume}",
|
||||
"dataset_repo_id=dataset_repo_id",
|
||||
"train_split_proportion=0.8",
|
||||
"training.num_workers=0",
|
||||
"training.batch_size=2",
|
||||
"training.image_key=image",
|
||||
"training.label_key=label",
|
||||
"training.use_amp=False",
|
||||
"training.num_epochs=1",
|
||||
"eval.batch_size=2",
|
||||
],
|
||||
)
|
||||
|
||||
# Mock the init_hydra_config function to return cfg
|
||||
mock_init_hydra_config.return_value = cfg
|
||||
|
||||
# Mock dataset
|
||||
dataset = MockDataset([{"image": torch.rand(3, 224, 224), "label": i % 2} for i in range(10)])
|
||||
mock_dataset.return_value = dataset
|
||||
|
||||
# Mock checkpoint handling
|
||||
mock_checkpoint_dir = MagicMock(spec=Path)
|
||||
mock_checkpoint_dir.exists.return_value = resume # Only exists if resuming
|
||||
mock_get_last_checkpoint_dir.return_value = mock_checkpoint_dir
|
||||
mock_get_last_pretrained_model_dir.return_value = Path(tempfile.mkdtemp())
|
||||
|
||||
# Mock logger
|
||||
logger = MagicMock()
|
||||
resumed_step = 1000
|
||||
if resume:
|
||||
logger.load_last_training_state.return_value = resumed_step
|
||||
else:
|
||||
logger.load_last_training_state.return_value = 0
|
||||
mock_logger.return_value = logger
|
||||
|
||||
# Instantiate the model and set make_policy to return it
|
||||
model = make_dummy_model()
|
||||
mock_make_policy.return_value = model
|
||||
|
||||
# Call train
|
||||
train(cfg)
|
||||
|
||||
# Check that checkpoint handling methods were called
|
||||
if resume:
|
||||
mock_get_last_checkpoint_dir.assert_called_once_with(Path(cfg.output_dir))
|
||||
mock_get_last_pretrained_model_dir.assert_called_once_with(Path(cfg.output_dir))
|
||||
mock_checkpoint_dir.exists.assert_called_once()
|
||||
logger.load_last_training_state.assert_called_once()
|
||||
else:
|
||||
mock_get_last_checkpoint_dir.assert_not_called()
|
||||
mock_get_last_pretrained_model_dir.assert_not_called()
|
||||
mock_checkpoint_dir.exists.assert_not_called()
|
||||
logger.load_last_training_state.assert_not_called()
|
||||
|
||||
# Collect the steps from logger.log_dict calls
|
||||
train_log_calls = logger.log_dict.call_args_list
|
||||
|
||||
# Extract the steps used in the train logging
|
||||
steps = []
|
||||
for call in train_log_calls:
|
||||
mode = call.kwargs.get("mode", call.args[2] if len(call.args) > 2 else None)
|
||||
if mode == "train":
|
||||
step = call.kwargs.get("step", call.args[1] if len(call.args) > 1 else None)
|
||||
steps.append(step)
|
||||
|
||||
expected_start_step = resumed_step if resume else 0
|
||||
|
||||
# Calculate expected_steps
|
||||
train_size = int(cfg.train_split_proportion * len(dataset))
|
||||
batch_size = cfg.training.batch_size
|
||||
num_batches = (train_size + batch_size - 1) // batch_size
|
||||
|
||||
expected_steps = [expected_start_step + i for i in range(num_batches)]
|
||||
|
||||
assert steps == expected_steps, f"Expected steps {expected_steps}, got {steps}"
|
||||
Reference in New Issue
Block a user