Compare commits

..

31 Commits

Author SHA1 Message Date
Simon Alibert
1e6a7ea334 Fix version 2024-09-10 19:26:03 +02:00
Simon Alibert
4439d3af69 Try different base image 2024-09-10 19:19:15 +02:00
Simon Alibert
599b58daa0 Remove ffmpeg build dependencies 2024-09-10 19:00:52 +02:00
Alexander Soare
a60d27b132 Raise ValueError if horizon is incompatible with downsampling (#422) 2024-09-09 17:22:46 +01:00
Mishig
9c463661c1 [Vizualization] Better UI on small screens (like in smartphones) (#423) 2024-09-09 15:39:40 +02:00
Mishig
4255655618 [Vizualization] Show user error if videos codec is not supported (#424) 2024-09-09 15:38:41 +02:00
Joe Clinton
f17d9a2ba1 Bug: Fix VQ-Bet not working when n_action_pred_token=1 (#420)
Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
2024-09-09 09:41:13 +01:00
Remi
9ff829a3a1 Add comments for Aloha (#417)
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
2024-09-06 21:07:52 +02:00
Mishig
d6516f0e03 [Visualization tool] Fix when dim state != dim action (#415) 2024-09-06 17:07:26 +02:00
Jack Vial
b0b8612eff fix(calibrate): fix calibrate arms option type. should be str not int (#418)
Co-authored-by: Remi <remi.cadene@huggingface.co>
2024-09-06 14:44:31 +02:00
Mishig
1072a055db [Visualization tool] Fix videos sync (#416) 2024-09-06 10:16:08 +02:00
Remi
9c9f5cac90 Add IntelRealSenseCamera (#410)
Co-authored-by: Simon Alibert <simon.alibert@huggingface.co>
Co-authored-by: shantanuparab-tr <shantanu@trossenrobotics.com>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
2024-09-05 23:59:41 +02:00
Simon Alibert
9d0c6fe419 Fix nightlies & untrack json files from git lfs (#414) 2024-09-05 15:07:43 +02:00
Simon Alibert
54ac25cfc9 Revert "hotfix"
This reverts commit 150a292795.
2024-09-05 12:54:53 +02:00
Remi Cadene
150a292795 hotfix 2024-09-04 22:03:33 +02:00
Remi
429a463aff Control aloha robot natively (#316)
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
2024-09-04 19:28:05 +02:00
Jack Vial
27ba2951d1 fix(tdmpc): Add missing save_freq to tdmpc policy config (#404)
Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
2024-09-02 19:04:41 +01:00
Jack Vial
b2896d38f5 fix(act): n_vae_encoder_layers config parameter wasn't being used (#400) 2024-09-02 18:29:27 +01:00
Kenneth Gerald Hamilton
c0da806232 repair mailto link (#397) 2024-09-01 00:11:39 +02:00
Mishig
114e09f570 rm EpisodeSampler from viz (#389) 2024-08-30 10:53:55 +02:00
Simon Alibert
04a995e7d1 Fix safe_action (#395) 2024-08-30 10:36:05 +02:00
Michel Aractingi
4806336816 Add the possibility to visualize language instructions in visualize_dataset_html.py (#388)
Co-authored-by: Mishig <dmishig@gmail.com>
2024-08-28 11:50:31 +02:00
Remi
1ce418e4a1 Add koch bimanual (#385) 2024-08-28 00:53:31 +02:00
Michel Aractingi
eb4c505cff Support for converting OpenX datasets from RLDS format to LeRobotDataset (#354)
Signed-off-by: youliangtan <tan_you_liang@hotmail.com>
Co-authored-by: Simon Alibert <alibert.sim@gmail.com>
Co-authored-by: youliangtan <tan_you_liang@hotmail.com>
Co-authored-by: Remi <re.cadene@gmail.com>
2024-08-27 09:07:00 +02:00
Mishig
aad59e6b6b Fix videos in visualize_dataset are not in sync (#382) 2024-08-26 17:38:48 +02:00
Alexander Soare
9ce98bb93c Add safety limits on relative action target (#373) 2024-08-26 14:30:18 +01:00
Alexander Soare
97086cdcdf Make gripper_open_degree a config param (#379) 2024-08-26 12:28:16 +01:00
Alexander Soare
9c7649f140 Make sure init_hydra_config does not require any keys (#376) 2024-08-23 12:27:08 +01:00
Zhuoheng Li
a2592a5563 Provide more information to the user (#358)
Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
Co-authored-by: Remi <re.cadene@gmail.com>
2024-08-23 11:00:35 +01:00
ellacroix
b5ad79a7d3 Fix typo in tutorial (#371) 2024-08-21 14:14:01 +02:00
Remi
996468bcce Update README.md 2024-08-20 16:45:57 +02:00
198 changed files with 8252 additions and 1256 deletions

View File

@@ -0,0 +1,68 @@
{
"homing_offset": [
2048,
3072,
3072,
-1024,
-1024,
2048,
-2048,
2048,
-2048
],
"drive_mode": [
1,
1,
1,
0,
0,
1,
0,
1,
0
],
"start_pos": [
2015,
3058,
3061,
1071,
1071,
2035,
2152,
2029,
2499
],
"end_pos": [
-1008,
-1963,
-1966,
2141,
2143,
-971,
3043,
-1077,
3144
],
"calib_mode": [
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"LINEAR"
],
"motor_names": [
"waist",
"shoulder",
"shoulder_shadow",
"elbow",
"elbow_shadow",
"forearm_roll",
"wrist_angle",
"wrist_rotate",
"gripper"
]
}

View File

@@ -0,0 +1,68 @@
{
"homing_offset": [
2048,
3072,
3072,
-1024,
-1024,
2048,
-2048,
2048,
-1024
],
"drive_mode": [
1,
1,
1,
0,
0,
1,
0,
1,
0
],
"start_pos": [
2035,
3024,
3019,
979,
981,
1982,
2166,
2124,
1968
],
"end_pos": [
-990,
-2017,
-2015,
2078,
2076,
-1030,
3117,
-1016,
2556
],
"calib_mode": [
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"LINEAR"
],
"motor_names": [
"waist",
"shoulder",
"shoulder_shadow",
"elbow",
"elbow_shadow",
"forearm_roll",
"wrist_angle",
"wrist_rotate",
"gripper"
]
}

View File

@@ -0,0 +1,68 @@
{
"homing_offset": [
2048,
3072,
3072,
-1024,
-1024,
2048,
-2048,
2048,
-2048
],
"drive_mode": [
1,
1,
1,
0,
0,
1,
0,
1,
0
],
"start_pos": [
2056,
2895,
2896,
1191,
1190,
2018,
2051,
2056,
2509
],
"end_pos": [
-1040,
-2004,
-2006,
2126,
2127,
-1010,
3050,
-1117,
3143
],
"calib_mode": [
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"LINEAR"
],
"motor_names": [
"waist",
"shoulder",
"shoulder_shadow",
"elbow",
"elbow_shadow",
"forearm_roll",
"wrist_angle",
"wrist_rotate",
"gripper"
]
}

View File

@@ -0,0 +1,68 @@
{
"homing_offset": [
2048,
3072,
3072,
-1024,
-1024,
2048,
-2048,
2048,
-2048
],
"drive_mode": [
1,
1,
1,
0,
0,
1,
0,
1,
0
],
"start_pos": [
2068,
3034,
3030,
1038,
1041,
1991,
1948,
2090,
1985
],
"end_pos": [
-1025,
-2014,
-2015,
2058,
2060,
-955,
3091,
-940,
2576
],
"calib_mode": [
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"DEGREE",
"LINEAR"
],
"motor_names": [
"waist",
"shoulder",
"shoulder_shadow",
"elbow",
"elbow_shadow",
"forearm_roll",
"wrist_angle",
"wrist_rotate",
"gripper"
]
}

2
.gitattributes vendored
View File

@@ -3,4 +3,4 @@
*.safetensors filter=lfs diff=lfs merge=lfs -text
*.mp4 filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.json filter=lfs diff=lfs merge=lfs -text
*.json !text !filter !merge !diff

6
.gitignore vendored
View File

@@ -66,7 +66,6 @@ htmlcov/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
@@ -74,6 +73,11 @@ coverage.xml
.hypothesis/
.pytest_cache/
# Ignore .cache except calibration
.cache/*
!.cache/calibration/
!.cache/calibration/**
# Translations
*.mo
*.pot

View File

@@ -20,7 +20,7 @@ Some of the ways you can contribute to 🤗 LeRobot:
* Contributing to the examples or to the documentation.
* Submitting issues related to bugs or desired new features.
Following the guides below, feel free to open issues and PRs and to coordinate your efforts with the community on our [Discord Channel](https://discord.gg/VjFz58wn3R). For specific inquiries, reach out to [Remi Cadene](remi.cadene@huggingface.co).
Following the guides below, feel free to open issues and PRs and to coordinate your efforts with the community on our [Discord Channel](https://discord.gg/VjFz58wn3R). For specific inquiries, reach out to [Remi Cadene](mailto:remi.cadene@huggingface.co).
If you are not sure how to contribute or want to know the next features we working on, look on this project page: [LeRobot TODO](https://github.com/orgs/huggingface/projects/46)

View File

@@ -34,9 +34,10 @@
<p>For more info, see <a href="https://x.com/RemiCadene/status/1825455895561859185">our thread on X</a> or <a href="https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md">our tutorial page</a>.</p>
</div>
<br/>
<h3 align="center">
<p>State-of-the-art AI for real-world robotics</p>
<p>LeRobot: State-of-the-art AI for real-world robotics</p>
</h3>
---
@@ -266,13 +267,20 @@ checkpoints
│ └── training_state.pth # optimizer/scheduler/rng state and training step
```
To resume training from a checkpoint, you can add these to the `train.py` python command:
```bash
hydra.run.dir=your/original/experiment/dir resume=true
```
It will load the pretrained model, optimizer and scheduler states for training. For more information please see our tutorial on training resumption [here](https://github.com/huggingface/lerobot/blob/main/examples/5_resume_training.md).
To use wandb for logging training and evaluation curves, make sure you've run `wandb login` as a one-time setup step. Then, when running the training command above, enable WandB in the configuration by adding:
```bash
wandb.enable=true
```
A link to the wandb logs for the run will also show up in yellow in your terminal. Here is an example of what they look like in your browser:
A link to the wandb logs for the run will also show up in yellow in your terminal. Here is an example of what they look like in your browser. Please also check [here](https://github.com/huggingface/lerobot/blob/main/examples/4_train_policy_with_script.md#typical-logs-and-metrics) for the explaination of some commonly used metrics in logs.
![](media/wandb.png)

View File

@@ -22,7 +22,7 @@ RUN echo "source /opt/venv/bin/activate" >> /root/.bashrc
COPY . /lerobot
WORKDIR /lerobot
RUN pip install --upgrade --no-cache-dir pip
RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht, koch]" \
RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]" \
--extra-index-url https://download.pytorch.org/whl/cpu
# Set EGL as the rendering backend for MuJoCo

View File

@@ -1,4 +1,4 @@
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
FROM nvidia/cuda:12.4.1-base-ubuntu22.04
# Configure image
ARG PYTHON_VERSION=3.10
@@ -17,33 +17,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} python${PYTHON_VERSION}-venv \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Install ffmpeg build dependencies. See:
# https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu
# TODO(aliberts): create image to build dependencies from source instead
RUN apt-get update && apt-get install -y --no-install-recommends \
autoconf automake yasm \
libass-dev \
libfreetype6-dev \
libgnutls28-dev \
libunistring-dev \
libmp3lame-dev \
libtool \
libvorbis-dev \
meson \
ninja-build \
pkg-config \
texinfo \
yasm \
zlib1g-dev \
nasm \
libx264-dev \
libx265-dev libnuma-dev \
libvpx-dev \
libfdk-aac-dev \
libopus-dev \
libsvtav1-dev libsvtav1enc-dev libsvtav1dec-dev \
libdav1d-dev
# Install gh cli tool
RUN (type -p wget >/dev/null || (apt update && apt-get install wget -y)) \
&& mkdir -p -m 755 /etc/apt/keyrings \

View File

@@ -24,7 +24,7 @@ RUN echo "source /opt/venv/bin/activate" >> /root/.bashrc
COPY . /lerobot
WORKDIR /lerobot
RUN pip install --upgrade --no-cache-dir pip
RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht, koch]"
RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]"
# Set EGL as the rendering backend for MuJoCo
ENV MUJOCO_GL="egl"

View File

@@ -170,6 +170,36 @@ python lerobot/scripts/train.py --config-dir outputs/train/my_experiment/checkpo
Note that you may still use the regular syntax for config parameter overrides (eg: by adding `training.offline_steps=200000`).
## Typical logs and metrics
When you start the training process, you will first see your full configuration being printed in the terminal. You can check it to make sure that you config it correctly and your config is not overrided by other files. The final configuration will also be saved with the checkpoint.
After that, you will see training log like this one:
```
INFO 2024-08-14 13:35:12 ts/train.py:192 step:0 smpl:64 ep:1 epch:0.00 loss:1.112 grdn:15.387 lr:2.0e-07 updt_s:1.738 data_s:4.774
```
or evaluation log like:
```
INFO 2024-08-14 13:38:45 ts/train.py:226 step:100 smpl:6K ep:52 epch:0.25 ∑rwrd:20.693 success:0.0% eval_s:120.266
```
These logs will also be saved in wandb if `wandb.enable` is set to `true`. Here are the meaning of some abbreviations:
- `smpl`: number of samples seen during training.
- `ep`: number of episodes seen during training. An episode contains multiple samples in a complete manipulation task.
- `epch`: number of time all unique samples are seen (epoch).
- `grdn`: gradient norm.
- `∑rwrd`: compute the sum of rewards in every evaluation episode and then take an average of them.
- `success`: average success rate of eval episodes. Reward and success are usually different except for the sparsing reward setting, where reward=1 only when the task is completed successfully.
- `eval_s`: time to evaluate the policy in the environment, in second.
- `updt_s`: time to update the network parameters, in second.
- `data_s`: time to load a batch of data, in second.
Some metrics are useful for initial performance profiling. For example, if you find the current GPU utilization is low via the `nvidia-smi` command and `data_s` sometimes is too high, you may need to modify batch size or number of dataloading workers to accelerate dataloading. We also recommend [pytorch profiler](https://github.com/huggingface/lerobot?tab=readme-ov-file#improve-your-code-with-profiling) for detailed performance probing.
---
So far we've seen how to train Diffusion Policy for PushT and ACT for ALOHA. Now, what if we want to train ACT for PushT? Well, there are aspects of the ACT configuration that are specific to the ALOHA environments, and these happen to be incompatible with PushT. Therefore, trying to run the following will almost certainly raise an exception of sorts (eg: feature dimension mismatch):

View File

@@ -11,7 +11,7 @@ This tutorial will guide you through the process of setting up and training a ne
By following these steps, you'll be able to replicate tasks like picking up a Lego block and placing it in a bin with a high success rate, as demonstrated in [this video](https://x.com/RemiCadene/status/1814680760592572934).
Although this tutorial is general and can be easily adapted to various types of robots by changing the configuration, it is specifically based on the [Koch v1.1](https://github.com/jess-moss/koch-v1-1), an affordable robot. The Koch v1.1 consists of a leader arm and a follower arm, each with 6 motors. It can work with one or several cameras to record the scene, which serve as visual sensors for the robot.
This tutorial is specifically made for the affordable [Koch v1.1](https://github.com/jess-moss/koch-v1-1) robot, but it contains additional information to be easily adapted to various types of robots like [Aloha bimanual robot](aloha-2.github.io) by changing some configurations. The Koch v1.1 consists of a leader arm and a follower arm, each with 6 motors. It can work with one or several cameras to record the scene, which serve as visual sensors for the robot.
During the data collection phase, you will control the follower arm by moving the leader arm. This process is known as "teleoperation." This technique is used to collect robot trajectories. Afterward, you'll train a neural network to imitate these trajectories and deploy the network to enable your robot to operate autonomously.
@@ -29,16 +29,23 @@ For a visual walkthrough of the assembly process, you can refer to [this video t
## 2. Configure motors, calibrate arms, teleoperate your Koch v1.1
First, install the additional dependencies required for Koch v1.1 by running one of the following commands.
First, install the additional dependencies required for robots built with dynamixel motors like Koch v1.1 by running one of the following commands.
Using `pip`:
```bash
pip install -e ".[koch]"
pip install -e ".[dynamixel]"
```
Or using `poetry`:
```bash
poetry install --sync --extras "koch"
poetry install --sync --extras "dynamixel"
```
/!\ For Linux only, ffmpeg and opencv requires conda install for now. Run this exact sequence of commands:
```bash
conda install -c conda-forge ffmpeg
pip uninstall opencv-python
conda install -c conda-forge opencv>=4.10.0
```
You are now ready to plug the 5V power supply to the motor bus of the leader arm (the smaller one) since all its motors only require 5V.
@@ -147,6 +154,7 @@ follower_arm = DynamixelMotorsBus(
Next, update the port values in the YAML configuration file for the Koch robot at [`lerobot/configs/robot/koch.yaml`](../lerobot/configs/robot/koch.yaml) with the ports you've identified:
```yaml
[...]
robot_type: koch
leader_arms:
main:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
@@ -174,6 +182,8 @@ follower_arms:
[...]
```
Don't forget to set `robot_type: aloha` if you follow this tutorial with [Aloha bimanual robot](aloha-2.github.io) instead of Koch v1.1
This configuration file is used to instantiate your robot across all scripts. We'll cover how this works later on.
**Connect and Configure your Motors**
@@ -298,32 +308,37 @@ Alternatively, you can unplug the power cord, which will automatically disable t
*/!\ Warning*: These motors tend to overheat, especially under torque or if left plugged in for too long. Unplug after use.
### b. Teleoperate your Koch v1.1 with KochRobot
### b. Teleoperate your Koch v1.1 with ManipulatorRobot
**Instantiate the KochRobot**
**Instantiate the ManipulatorRobot**
Before you can teleoperate your robot, you need to instantiate the [`KochRobot`](../lerobot/common/robot_devices/robots/koch.py) using the previously defined `leader_arm` and `follower_arm`.
Before you can teleoperate your robot, you need to instantiate the [`ManipulatorRobot`](../lerobot/common/robot_devices/robots/manipulator.py) using the previously defined `leader_arm` and `follower_arm`.
For the Koch robot, we only have one leader, so we refer to it as `"main"` and define it as `leader_arms={"main": leader_arm}`. We do the same for the follower arm. For other robots (like the Aloha), which may have two pairs of leader and follower arms, you would define them like this: `leader_arms={"left": left_leader_arm, "right": right_leader_arm},`. Same thing for the follower arms.
For the Koch v1.1 robot, we only have one leader, so we refer to it as `"main"` and define it as `leader_arms={"main": leader_arm}`. We do the same for the follower arm. For other robots (like the Aloha), which may have two pairs of leader and follower arms, you would define them like this: `leader_arms={"left": left_leader_arm, "right": right_leader_arm},`. Same thing for the follower arms.
You also need to provide a path to a calibration file, such as `calibration_path=".cache/calibration/koch.pkl"`. More on this in the next section.
You also need to provide a path to a calibration directory, such as `calibration_dir=".cache/calibration/koch"`. More on this in the next section.
Run the following code to instantiate your Koch robot:
Run the following code to instantiate your manipulator robot:
```python
from lerobot.common.robot_devices.robots.koch import KochRobot
from lerobot.common.robot_devices.robots.manipulator import ManipulatorRobot
robot = KochRobot(
robot = ManipulatorRobot(
robot_type="koch",
leader_arms={"main": leader_arm},
follower_arms={"main": follower_arm},
calibration_path=".cache/calibration/koch.pkl",
calibration_dir=".cache/calibration/koch",
)
```
**Calibrate and Connect the KochRobot**
The `robot_type="koch"` is used to set the associated settings and calibration process. For instance, we activate the torque of the gripper of the leader Koch v1.1 arm and position it at a 40 degree angle to use it as a trigger.
Next, you'll need to calibrate your robot to ensure that the leader and follower arms have the same position values when they are in the same physical position. This calibration is essential because it allows a neural network trained on one Koch robot to work on another.
For the [Aloha bimanual robot](https://aloha-2.github.io), we would use `robot_type="aloha"` to set different settings such as a secondary ID for shadow joints (shoulder, elbow). Specific to Aloha, LeRobot comes with default calibration files stored in in `.cache/calibration/aloha_default`. Assuming the motors have been properly assembled, no manual calibration step is expected. If you need to run manual calibration, simply update `calibration_dir` to `.cache/calibration/aloha`.
When you connect your robot for the first time, the [`KochRobot`](../lerobot/common/robot_devices/robots/koch.py) will detect if the calibration file is missing and trigger the calibration procedure. During this process, you will be guided to move each arm to three different positions.
**Calibrate and Connect the ManipulatorRobot**
Next, you'll need to calibrate your Koch robot to ensure that the leader and follower arms have the same position values when they are in the same physical position. This calibration is essential because it allows a neural network trained on one Koch robot to work on another.
When you connect your robot for the first time, the [`ManipulatorRobot`](../lerobot/common/robot_devices/robots/manipulator.py) will detect if the calibration file is missing and trigger the calibration procedure. During this process, you will be guided to move each arm to three different positions.
Here are the positions you'll move the follower arm to:
@@ -354,27 +369,26 @@ The output will look like this:
```
Connecting main follower arm
Connecting main leader arm
Missing calibration file '.cache/calibration/koch.pkl'. Starting calibration procedure.
Running calibration of main follower...
Missing calibration file '.cache/calibration/koch/main_follower.json'
Running calibration of koch main follower...
Move arm to zero position
[...]
Move arm to rotated position
[...]
Move arm to rest position
[...]
Calibration is done! Saving calibration file '.cache/calibration/koch/main_follower.json'
Running calibration of main leader...
Missing calibration file '.cache/calibration/koch/main_leader.json'
Running calibration of koch main leader...
Move arm to zero position
[...]
Move arm to rotated position
[...]
Move arm to rest position
[...]
Calibration is done! Saving calibration file '.cache/calibration/koch.pkl'
Calibration is done! Saving calibration file '.cache/calibration/koch/main_leader.json'
```
*Verifying Calibration*
@@ -414,7 +428,7 @@ for _ in tqdm.tqdm(range(seconds*frequency)):
*Using `teleop_step` for Teleoperation*
Alternatively, you can teleoperate the robot using the `teleop_step` method from [`KochRobot`](../lerobot/common/robot_devices/robots/koch.py).
Alternatively, you can teleoperate the robot using the `teleop_step` method from [`ManipulatorRobot`](../lerobot/common/robot_devices/robots/manipulator.py).
Run this code to teleoperate:
```python
@@ -607,10 +621,10 @@ Additionaly, you can set up your robot to work with your cameras.
Modify the following Python code with the appropriate camera names and configurations:
```python
robot = KochRobot(
robot = ManipulatorRobot(
leader_arms={"main": leader_arm},
follower_arms={"main": follower_arm},
calibration_path=".cache/calibration/koch.pkl",
calibration_dir=".cache/calibration/koch",
cameras={
"laptop": OpenCVCamera(0, fps=30, width=640, height=480),
"phone": OpenCVCamera(1, fps=30, width=640, height=480),
@@ -752,7 +766,7 @@ Before trying `record`, if you want to push your dataset to the hub, make sure y
```bash
huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
```
Also, store your Hugging Face repositery name in a variable (e.g. `cadene` or `lerobot`). For instance, run this to use your Hugging Face user name as repositery:
Also, store your Hugging Face repository name in a variable (e.g. `cadene` or `lerobot`). For instance, run this to use your Hugging Face user name as repository:
```bash
HF_USER=$(huggingface-cli whoami | head -n 1)
echo $HF_USER
@@ -925,7 +939,7 @@ huggingface-cli upload ${HF_USER}/act_koch_test_${CKPT} \
## 5. Evaluate your policy
Now that you have a policy checkpoint, you can easily control your robot with it using methods from [`KochRobot`](../lerobot/common/robot_devices/robots/koch.py) and the policy.
Now that you have a policy checkpoint, you can easily control your robot with it using methods from [`ManipulatorRobot`](../lerobot/common/robot_devices/robots/manipulator.py) and the policy.
Try this code for running inference for 60 seconds at 30 fps:
```python

View File

@@ -27,6 +27,7 @@ Example:
print(lerobot.available_real_world_datasets)
print(lerobot.available_policies)
print(lerobot.available_policies_per_env)
print(lerobot.available_robots)
```
When implementing a new dataset loadable with LeRobotDataset follow these steps:
@@ -129,13 +130,60 @@ available_real_world_datasets = [
"lerobot/unitreeh1_rearrange_objects",
"lerobot/unitreeh1_two_robot_greeting",
"lerobot/unitreeh1_warehouse",
"lerobot/nyu_rot_dataset",
"lerobot/utokyo_saytap",
"lerobot/imperialcollege_sawyer_wrist_cam",
"lerobot/utokyo_xarm_bimanual",
"lerobot/tokyo_u_lsmo",
"lerobot/utokyo_pr2_opening_fridge",
"lerobot/cmu_franka_exploration_dataset",
"lerobot/cmu_stretch",
"lerobot/asu_table_top",
"lerobot/utokyo_pr2_tabletop_manipulation",
"lerobot/utokyo_xarm_pick_and_place",
"lerobot/ucsd_kitchen_dataset",
"lerobot/austin_buds_dataset",
"lerobot/dlr_sara_grid_clamp",
"lerobot/conq_hose_manipulation",
"lerobot/columbia_cairlab_pusht_real",
"lerobot/dlr_sara_pour",
"lerobot/dlr_edan_shared_control",
"lerobot/ucsd_pick_and_place_dataset",
"lerobot/berkeley_cable_routing",
"lerobot/nyu_franka_play_dataset",
"lerobot/austin_sirius_dataset",
"lerobot/cmu_play_fusion",
"lerobot/berkeley_gnm_sac_son",
"lerobot/nyu_door_opening_surprising_effectiveness",
"lerobot/berkeley_fanuc_manipulation",
"lerobot/jaco_play",
"lerobot/viola",
"lerobot/kaist_nonprehensile",
"lerobot/berkeley_mvp",
"lerobot/uiuc_d3field",
"lerobot/berkeley_gnm_recon",
"lerobot/austin_sailor_dataset",
"lerobot/utaustin_mutex",
"lerobot/roboturk",
"lerobot/stanford_hydra_dataset",
"lerobot/berkeley_autolab_ur5",
"lerobot/stanford_robocook",
"lerobot/toto",
"lerobot/fmb",
"lerobot/droid_100",
"lerobot/berkeley_rpt",
"lerobot/stanford_kuka_multimodal_dataset",
"lerobot/iamlab_cmu_pickup_insert",
"lerobot/taco_play",
"lerobot/berkeley_gnm_cory_hall",
"lerobot/usc_cloth_sim",
]
available_datasets = list(
itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets)
)
# lists all available policies from `lerobot/common/policies` by their class attribute: `name`.
# lists all available policies from `lerobot/common/policies`
available_policies = [
"act",
"diffusion",
@@ -143,6 +191,13 @@ available_policies = [
"vqbet",
]
# lists all available robots from `lerobot/common/robot_devices/robots`
available_robots = [
"koch",
"koch_bimanual",
"aloha",
]
# keys and values refer to yaml files
available_policies_per_env = {
"aloha": ["act"],

View File

@@ -40,6 +40,10 @@ def get_stats_einops_patterns(dataset, num_workers=0):
stats_patterns = {}
for key, feats_type in dataset.features.items():
# NOTE: skip language_instruction embedding in stats computation
if key == "language_instruction":
continue
# sanity check that tensors are not float64
assert batch[key].dtype != torch.float64

View File

@@ -60,8 +60,8 @@ AVAILABLE_RAW_REPO_IDS = {
"lerobot-raw/aloha_static_vinh_cup_left_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_vinh_cup_raw": "aloha_hdf5",
"lerobot-raw/aloha_static_ziploc_slide_raw": "aloha_hdf5",
"lerobot-raw/pusht_raw": "pusht_zarr",
"lerobot-raw/umi_cup_in_the_wild_raw": "umi_zarr",
"lerobot-raw/pusht_raw": "pusht_zarr",
"lerobot-raw/unitreeh1_fold_clothes_raw": "aloha_hdf5",
"lerobot-raw/unitreeh1_rearrange_objects_raw": "aloha_hdf5",
"lerobot-raw/unitreeh1_two_robot_greeting_raw": "aloha_hdf5",
@@ -70,6 +70,74 @@ AVAILABLE_RAW_REPO_IDS = {
"lerobot-raw/xarm_lift_medium_replay_raw": "xarm_pkl",
"lerobot-raw/xarm_push_medium_raw": "xarm_pkl",
"lerobot-raw/xarm_push_medium_replay_raw": "xarm_pkl",
"lerobot-raw/fractal20220817_data_raw": "openx_rlds.fractal20220817_data",
"lerobot-raw/kuka_raw": "openx_rlds.kuka",
"lerobot-raw/bridge_openx_raw": "openx_rlds.bridge_openx",
"lerobot-raw/taco_play_raw": "openx_rlds.taco_play",
"lerobot-raw/jaco_play_raw": "openx_rlds.jaco_play",
"lerobot-raw/berkeley_cable_routing_raw": "openx_rlds.berkeley_cable_routing",
"lerobot-raw/roboturk_raw": "openx_rlds.roboturk",
"lerobot-raw/nyu_door_opening_surprising_effectiveness_raw": "openx_rlds.nyu_door_opening_surprising_effectiveness",
"lerobot-raw/viola_raw": "openx_rlds.viola",
"lerobot-raw/berkeley_autolab_ur5_raw": "openx_rlds.berkeley_autolab_ur5",
"lerobot-raw/toto_raw": "openx_rlds.toto",
"lerobot-raw/language_table_raw": "openx_rlds.language_table",
"lerobot-raw/columbia_cairlab_pusht_real_raw": "openx_rlds.columbia_cairlab_pusht_real",
"lerobot-raw/stanford_kuka_multimodal_dataset_raw": "openx_rlds.stanford_kuka_multimodal_dataset",
"lerobot-raw/nyu_rot_dataset_raw": "openx_rlds.nyu_rot_dataset",
"lerobot-raw/io_ai_tech_raw": "openx_rlds.io_ai_tech",
"lerobot-raw/stanford_hydra_dataset_raw": "openx_rlds.stanford_hydra_dataset",
"lerobot-raw/austin_buds_dataset_raw": "openx_rlds.austin_buds_dataset",
"lerobot-raw/nyu_franka_play_dataset_raw": "openx_rlds.nyu_franka_play_dataset",
"lerobot-raw/maniskill_dataset_raw": "openx_rlds.maniskill_dataset",
"lerobot-raw/furniture_bench_dataset_raw": "openx_rlds.furniture_bench_dataset",
"lerobot-raw/cmu_franka_exploration_dataset_raw": "openx_rlds.cmu_franka_exploration_dataset",
"lerobot-raw/ucsd_kitchen_dataset_raw": "openx_rlds.ucsd_kitchen_dataset",
"lerobot-raw/ucsd_pick_and_place_dataset_raw": "openx_rlds.ucsd_pick_and_place_dataset",
"lerobot-raw/spoc_raw": "openx_rlds.spoc",
"lerobot-raw/austin_sailor_dataset_raw": "openx_rlds.austin_sailor_dataset",
"lerobot-raw/austin_sirius_dataset_raw": "openx_rlds.austin_sirius_dataset",
"lerobot-raw/bc_z_raw": "openx_rlds.bc_z",
"lerobot-raw/utokyo_pr2_opening_fridge_raw": "openx_rlds.utokyo_pr2_opening_fridge",
"lerobot-raw/utokyo_pr2_tabletop_manipulation_raw": "openx_rlds.utokyo_pr2_tabletop_manipulation",
"lerobot-raw/utokyo_xarm_pick_and_place_raw": "openx_rlds.utokyo_xarm_pick_and_place",
"lerobot-raw/utokyo_xarm_bimanual_raw": "openx_rlds.utokyo_xarm_bimanual",
"lerobot-raw/utokyo_saytap_raw": "openx_rlds.utokyo_saytap",
"lerobot-raw/robo_net_raw": "openx_rlds.robo_net",
"lerobot-raw/robo_set_raw": "openx_rlds.robo_set",
"lerobot-raw/berkeley_mvp_raw": "openx_rlds.berkeley_mvp",
"lerobot-raw/berkeley_rpt_raw": "openx_rlds.berkeley_rpt",
"lerobot-raw/kaist_nonprehensile_raw": "openx_rlds.kaist_nonprehensile",
"lerobot-raw/stanford_mask_vit_raw": "openx_rlds.stanford_mask_vit",
"lerobot-raw/tokyo_u_lsmo_raw": "openx_rlds.tokyo_u_lsmo",
"lerobot-raw/dlr_sara_pour_raw": "openx_rlds.dlr_sara_pour",
"lerobot-raw/dlr_sara_grid_clamp_raw": "openx_rlds.dlr_sara_grid_clamp",
"lerobot-raw/dlr_edan_shared_control_raw": "openx_rlds.dlr_edan_shared_control",
"lerobot-raw/asu_table_top_raw": "openx_rlds.asu_table_top",
"lerobot-raw/stanford_robocook_raw": "openx_rlds.stanford_robocook",
"lerobot-raw/imperialcollege_sawyer_wrist_cam_raw": "openx_rlds.imperialcollege_sawyer_wrist_cam",
"lerobot-raw/iamlab_cmu_pickup_insert_raw": "openx_rlds.iamlab_cmu_pickup_insert",
"lerobot-raw/uiuc_d3field_raw": "openx_rlds.uiuc_d3field",
"lerobot-raw/utaustin_mutex_raw": "openx_rlds.utaustin_mutex",
"lerobot-raw/berkeley_fanuc_manipulation_raw": "openx_rlds.berkeley_fanuc_manipulation",
"lerobot-raw/cmu_playing_with_food_raw": "openx_rlds.cmu_playing_with_food",
"lerobot-raw/cmu_play_fusion_raw": "openx_rlds.cmu_play_fusion",
"lerobot-raw/cmu_stretch_raw": "openx_rlds.cmu_stretch",
"lerobot-raw/berkeley_gnm_recon_raw": "openx_rlds.berkeley_gnm_recon",
"lerobot-raw/berkeley_gnm_cory_hall_raw": "openx_rlds.berkeley_gnm_cory_hall",
"lerobot-raw/berkeley_gnm_sac_son_raw": "openx_rlds.berkeley_gnm_sac_son",
"lerobot-raw/droid_raw": "openx_rlds.droid",
"lerobot-raw/droid_100_raw": "openx_rlds.droid100",
"lerobot-raw/fmb_raw": "openx_rlds.fmb",
"lerobot-raw/dobbe_raw": "openx_rlds.dobbe",
"lerobot-raw/usc_cloth_sim_raw": "openx_rlds.usc_cloth_sim",
"lerobot-raw/plex_robosuite_raw": "openx_rlds.plex_robosuite",
"lerobot-raw/conq_hose_manipulation_raw": "openx_rlds.conq_hose_manipulation",
"lerobot-raw/vima_raw": "openx_rlds.vima",
"lerobot-raw/robot_vqa_raw": "openx_rlds.robot_vqa",
"lerobot-raw/mimic_play_raw": "openx_rlds.mimic_play",
"lerobot-raw/tidybot_raw": "openx_rlds.tidybot",
"lerobot-raw/eth_agent_affordances_raw": "openx_rlds.eth_agent_affordances",
}
@@ -110,7 +178,7 @@ def download_all_raw_datasets(data_dir: Path | None = None):
def main():
parser = argparse.ArgumentParser(
description=f"""A script to download raw datasets from Hugging Face hub to a local directory. Here is a
non exhaustive list of available repositories to use in `--repo-id`: {AVAILABLE_RAW_REPO_IDS}""",
non exhaustive list of available repositories to use in `--repo-id`: {list(AVAILABLE_RAW_REPO_IDS.keys())}""",
)
parser.add_argument(

View File

@@ -0,0 +1,639 @@
OPENX_DATASET_CONFIGS:
fractal20220817_data:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- base_pose_tool_reached
- gripper_closed
fps: 3
kuka:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- clip_function_input/base_pose_tool_reached
- gripper_closed
fps: 10
bridge_openx:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- EEF_state
- gripper_state
fps: 5
taco_play:
image_obs_keys:
- rgb_static
- rgb_gripper
depth_obs_keys:
- depth_static
- depth_gripper
state_obs_keys:
- state_eef
- state_gripper
fps: 15
jaco_play:
image_obs_keys:
- image
- image_wrist
depth_obs_keys:
- null
state_obs_keys:
- state_eef
- state_gripper
fps: 10
berkeley_cable_routing:
image_obs_keys:
- image
- top_image
- wrist45_image
- wrist225_image
depth_obs_keys:
- null
state_obs_keys:
- robot_state
fps: 10
roboturk:
image_obs_keys:
- front_rgb
depth_obs_keys:
- null
state_obs_keys:
- null
fps: 10
nyu_door_opening_surprising_effectiveness:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- null
fps: 3
viola:
image_obs_keys:
- agentview_rgb
- eye_in_hand_rgb
depth_obs_keys:
- null
state_obs_keys:
- joint_states
- gripper_states
fps: 20
berkeley_autolab_ur5:
image_obs_keys:
- image
- hand_image
depth_obs_keys:
- image_with_depth
state_obs_keys:
- state
fps: 5
toto:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 30
language_table:
image_obs_keys:
- rgb
depth_obs_keys:
- null
state_obs_keys:
- effector_translation
fps: 10
columbia_cairlab_pusht_real:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- robot_state
fps: 10
stanford_kuka_multimodal_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- depth_image
state_obs_keys:
- ee_position
- ee_orientation
fps: 20
nyu_rot_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 3
io_ai_tech:
image_obs_keys:
- image
- image_fisheye
- image_left_side
- image_right_side
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 3
stanford_hydra_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 10
austin_buds_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 20
nyu_franka_play_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
- image_additional_view
depth_obs_keys:
- depth
- depth_additional_view
state_obs_keys:
- eef_state
fps: 3
maniskill_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- depth
- wrist_depth
state_obs_keys:
- tcp_pose
- gripper_state
fps: 20
furniture_bench_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 10
cmu_franka_exploration_dataset_converted_externally_to_rlds:
image_obs_keys:
- highres_image
depth_obs_keys:
- null
state_obs_keys:
- null
fps: 10
ucsd_kitchen_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- joint_state
fps: 2
ucsd_pick_and_place_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 3
spoc:
image_obs_keys:
- image
- image_manipulation
depth_obs_keys:
- null
state_obs_keys:
- null
fps: 3
austin_sailor_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 20
austin_sirius_dataset_converted_externally_to_rlds:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 20
bc_z:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- present/xyz
- present/axis_angle
- present/sensed_close
fps: 10
utokyo_pr2_opening_fridge_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 10
utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 10
utokyo_xarm_pick_and_place_converted_externally_to_rlds:
image_obs_keys:
- image
- image2
- hand_image
depth_obs_keys:
- null
state_obs_keys:
- end_effector_pose
fps: 10
utokyo_xarm_bimanual_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- pose_r
fps: 10
robo_net:
image_obs_keys:
- image
- image1
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 1
robo_set:
image_obs_keys:
- image_left
- image_right
- image_wrist
depth_obs_keys:
- null
state_obs_keys:
- state
- state_velocity
fps: 5
berkeley_mvp_converted_externally_to_rlds:
image_obs_keys:
- hand_image
depth_obs_keys:
- null
state_obs_keys:
- gripper
- pose
- joint_pos
fps: 5
berkeley_rpt_converted_externally_to_rlds:
image_obs_keys:
- hand_image
depth_obs_keys:
- null
state_obs_keys:
- joint_pos
- gripper
fps: 30
kaist_nonprehensile_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 10
stanford_mask_vit_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
tokyo_u_lsmo_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 10
dlr_sara_pour_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 10
dlr_sara_grid_clamp_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 10
dlr_edan_shared_control_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 5
asu_table_top_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 12.5
stanford_robocook_converted_externally_to_rlds:
image_obs_keys:
- image_1
- image_2
depth_obs_keys:
- depth_1
- depth_2
state_obs_keys:
- eef_state
- gripper_state
fps: 5
imperialcollege_sawyer_wrist_cam:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 10
iamlab_cmu_pickup_insert_converted_externally_to_rlds:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- joint_state
- gripper_state
fps: 20
uiuc_d3field:
image_obs_keys:
- image_1
- image_2
depth_obs_keys:
- depth_1
- depth_2
state_obs_keys:
- null
fps: 1
utaustin_mutex:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 20
berkeley_fanuc_manipulation:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- joint_state
- gripper_state
fps: 10
cmu_playing_with_food:
image_obs_keys:
- image
- finger_vision_1
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 10
cmu_play_fusion:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 5
cmu_stretch:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- eef_state
- gripper_state
fps: 10
berkeley_gnm_recon:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
- position
- yaw
fps: 3
berkeley_gnm_cory_hall:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
- position
- yaw
fps: 5
berkeley_gnm_sac_son:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- state
- position
- yaw
fps: 10
droid:
image_obs_keys:
- exterior_image_1_left
- exterior_image_2_left
- wrist_image_left
depth_obs_keys:
- null
state_obs_keys:
- proprio
fps: 15
droid_100:
image_obs_keys:
- exterior_image_1_left
- exterior_image_2_left
- wrist_image_left
depth_obs_keys:
- null
state_obs_keys:
- proprio
fps: 15
fmb:
image_obs_keys:
- image_side_1
- image_side_2
- image_wrist_1
- image_wrist_2
depth_obs_keys:
- image_side_1_depth
- image_side_2_depth
- image_wrist_1_depth
- image_wrist_2_depth
state_obs_keys:
- proprio
fps: 10
dobbe:
image_obs_keys:
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- proprio
fps: 3.75
usc_cloth_sim_converted_externally_to_rlds:
image_obs_keys:
- image
depth_obs_keys:
- null
state_obs_keys:
- null
fps: 10
plex_robosuite:
image_obs_keys:
- image
- wrist_image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 20
conq_hose_manipulation:
image_obs_keys:
- frontleft_fisheye_image
- frontright_fisheye_image
- hand_color_image
depth_obs_keys:
- null
state_obs_keys:
- state
fps: 30

View File

@@ -0,0 +1,106 @@
#!/usr/bin/env python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the Licens e.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
NOTE(YL): Adapted from:
Octo: https://github.com/octo-models/octo/blob/main/octo/data/utils/data_utils.py
data_utils.py
Additional utils for data processing.
"""
from typing import Any, Dict, List
import tensorflow as tf
def binarize_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
"""
Converts gripper actions from continuous to binary values (0 and 1).
We exploit that fact that most of the time, the gripper is fully open (near 1.0) or fully closed (near 0.0). As it
transitions between the two, it sometimes passes through a few intermediate values. We relabel those intermediate
values based on the state that is reached _after_ those intermediate values.
In the edge case that the trajectory ends with an intermediate value, we give up on binarizing and relabel that
chunk of intermediate values as the last action in the trajectory.
The `scan_fn` implements the following logic:
new_actions = np.empty_like(actions)
carry = actions[-1]
for i in reversed(range(actions.shape[0])):
if in_between_mask[i]:
carry = carry
else:
carry = float(open_mask[i])
new_actions[i] = carry
"""
open_mask, closed_mask = actions > 0.95, actions < 0.05
in_between_mask = tf.logical_not(tf.logical_or(open_mask, closed_mask))
is_open_float = tf.cast(open_mask, tf.float32)
def scan_fn(carry, i):
return tf.cond(in_between_mask[i], lambda: tf.cast(carry, tf.float32), lambda: is_open_float[i])
return tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), actions[-1], reverse=True)
def invert_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
return 1 - actions
def rel2abs_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
"""
Converts relative gripper actions (+1 for closing, -1 for opening) to absolute actions (0 = closed; 1 = open).
Assumes that the first relative gripper is not redundant (i.e. close when already closed)!
"""
# Note =>> -1 for closing, 1 for opening, 0 for no change
opening_mask, closing_mask = actions < -0.1, actions > 0.1
thresholded_actions = tf.where(opening_mask, 1, tf.where(closing_mask, -1, 0))
def scan_fn(carry, i):
return tf.cond(thresholded_actions[i] == 0, lambda: carry, lambda: thresholded_actions[i])
# If no relative grasp, assumes open for whole trajectory
start = -1 * thresholded_actions[tf.argmax(thresholded_actions != 0, axis=0)]
start = tf.cond(start == 0, lambda: 1, lambda: start)
# Note =>> -1 for closed, 1 for open
new_actions = tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), start)
new_actions = tf.cast(new_actions, tf.float32) / 2 + 0.5
return new_actions
# === Bridge-V2 =>> Dataset-Specific Transform ===
def relabel_bridge_actions(traj: Dict[str, Any]) -> Dict[str, Any]:
"""Relabels actions to use reached proprioceptive state; discards last timestep (no-action)."""
movement_actions = traj["observation"]["state"][1:, :6] - traj["observation"]["state"][:-1, :6]
traj_truncated = tf.nest.map_structure(lambda x: x[:-1], traj)
traj_truncated["action"] = tf.concat([movement_actions, traj["action"][:-1, -1:]], axis=1)
return traj_truncated
# === RLDS Dataset Initialization Utilities ===
def pprint_data_mixture(dataset_kwargs_list: List[Dict[str, Any]], dataset_weights: List[int]) -> None:
print("\n######################################################################################")
print(f"# Loading the following {len(dataset_kwargs_list)} datasets (incl. sampling weight):{'': >24} #")
for dataset_kwargs, weight in zip(dataset_kwargs_list, dataset_weights, strict=False):
pad = 80 - len(dataset_kwargs["name"])
print(f"# {dataset_kwargs['name']}: {weight:=>{pad}f} #")
print("######################################################################################\n")

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
NOTE(YL): Adapted from:
OpenVLA: https://github.com/openvla/openvla
Episode transforms for DROID dataset.
"""
from typing import Any, Dict
import tensorflow as tf
import tensorflow_graphics.geometry.transformation as tfg
def rmat_to_euler(rot_mat):
return tfg.euler.from_rotation_matrix(rot_mat)
def euler_to_rmat(euler):
return tfg.rotation_matrix_3d.from_euler(euler)
def invert_rmat(rot_mat):
return tfg.rotation_matrix_3d.inverse(rot_mat)
def rotmat_to_rot6d(mat):
"""
Converts rotation matrix to R6 rotation representation (first two rows in rotation matrix).
Args:
mat: rotation matrix
Returns: 6d vector (first two rows of rotation matrix)
"""
r6 = mat[..., :2, :]
r6_0, r6_1 = r6[..., 0, :], r6[..., 1, :]
r6_flat = tf.concat([r6_0, r6_1], axis=-1)
return r6_flat
def velocity_act_to_wrist_frame(velocity, wrist_in_robot_frame):
"""
Translates velocity actions (translation + rotation) from base frame of the robot to wrist frame.
Args:
velocity: 6d velocity action (3 x translation, 3 x rotation)
wrist_in_robot_frame: 6d pose of the end-effector in robot base frame
Returns: 9d velocity action in robot wrist frame (3 x translation, 6 x rotation as R6)
"""
r_frame = euler_to_rmat(wrist_in_robot_frame[:, 3:6])
r_frame_inv = invert_rmat(r_frame)
# world to wrist: dT_pi = R^-1 dT_rbt
vel_t = (r_frame_inv @ velocity[:, :3][..., None])[..., 0]
# world to wrist: dR_pi = R^-1 dR_rbt R
dr_ = euler_to_rmat(velocity[:, 3:6])
dr_ = r_frame_inv @ (dr_ @ r_frame)
dr_r6 = rotmat_to_rot6d(dr_)
return tf.concat([vel_t, dr_r6], axis=-1)
def rand_swap_exterior_images(img1, img2):
"""
Randomly swaps the two exterior images (for training with single exterior input).
"""
return tf.cond(tf.random.uniform(shape=[]) > 0.5, lambda: (img1, img2), lambda: (img2, img1))
def droid_baseact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
"""
DROID dataset transformation for actions expressed in *base* frame of the robot.
"""
dt = trajectory["action_dict"]["cartesian_velocity"][:, :3]
dr_ = trajectory["action_dict"]["cartesian_velocity"][:, 3:6]
trajectory["action"] = tf.concat(
(
dt,
dr_,
1 - trajectory["action_dict"]["gripper_position"],
),
axis=-1,
)
trajectory["observation"]["exterior_image_1_left"], trajectory["observation"]["exterior_image_2_left"] = (
rand_swap_exterior_images(
trajectory["observation"]["exterior_image_1_left"],
trajectory["observation"]["exterior_image_2_left"],
)
)
trajectory["observation"]["proprio"] = tf.concat(
(
trajectory["observation"]["cartesian_position"],
trajectory["observation"]["gripper_position"],
),
axis=-1,
)
return trajectory
def droid_wristact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
"""
DROID dataset transformation for actions expressed in *wrist* frame of the robot.
"""
wrist_act = velocity_act_to_wrist_frame(
trajectory["action_dict"]["cartesian_velocity"], trajectory["observation"]["cartesian_position"]
)
trajectory["action"] = tf.concat(
(
wrist_act,
trajectory["action_dict"]["gripper_position"],
),
axis=-1,
)
trajectory["observation"]["exterior_image_1_left"], trajectory["observation"]["exterior_image_2_left"] = (
rand_swap_exterior_images(
trajectory["observation"]["exterior_image_1_left"],
trajectory["observation"]["exterior_image_2_left"],
)
)
trajectory["observation"]["proprio"] = tf.concat(
(
trajectory["observation"]["cartesian_position"],
trajectory["observation"]["gripper_position"],
),
axis=-1,
)
return trajectory
def droid_finetuning_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
"""
DROID dataset transformation for actions expressed in *base* frame of the robot.
"""
dt = trajectory["action_dict"]["cartesian_velocity"][:, :3]
dr_ = trajectory["action_dict"]["cartesian_velocity"][:, 3:6]
trajectory["action"] = tf.concat(
(
dt,
dr_,
1 - trajectory["action_dict"]["gripper_position"],
),
axis=-1,
)
trajectory["observation"]["proprio"] = tf.concat(
(
trajectory["observation"]["cartesian_position"],
trajectory["observation"]["gripper_position"],
),
axis=-1,
)
return trajectory
def zero_action_filter(traj: Dict) -> bool:
"""
Filters transitions whose actions are all-0 (only relative actions, no gripper action).
Note: this filter is applied *after* action normalization, so need to compare to "normalized 0".
"""
droid_q01 = tf.convert_to_tensor(
[
-0.7776297926902771,
-0.5803514122962952,
-0.5795090794563293,
-0.6464047729969025,
-0.7041108310222626,
-0.8895104378461838,
]
)
droid_q99 = tf.convert_to_tensor(
[
0.7597932070493698,
0.5726242214441299,
0.7351000607013702,
0.6705610305070877,
0.6464948207139969,
0.8897542208433151,
]
)
droid_norm_0_act = (
2 * (tf.zeros_like(traj["action"][:, :6]) - droid_q01) / (droid_q99 - droid_q01 + 1e-8) - 1
)
return tf.reduce_any(tf.math.abs(traj["action"][:, :6] - droid_norm_0_act) > 1e-5)

View File

@@ -0,0 +1,859 @@
#!/usr/bin/env python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
NOTE(YL): Adapted from:
OpenVLA: https://github.com/openvla/openvla
Octo: https://github.com/octo-models/octo
transforms.py
Defines a registry of per-dataset standardization transforms for each dataset in Open-X Embodiment.
Transforms adopt the following structure:
Input: Dictionary of *batched* features (i.e., has leading time dimension)
Output: Dictionary `step` =>> {
"observation": {
<image_keys, depth_image_keys>
State (in chosen state representation)
},
"action": Action (in chosen action representation),
"language_instruction": str
}
"""
from typing import Any, Dict
import tensorflow as tf
from lerobot.common.datasets.push_dataset_to_hub.openx.data_utils import (
binarize_gripper_actions,
invert_gripper_actions,
rel2abs_gripper_actions,
relabel_bridge_actions,
)
def droid_baseact_transform_fn():
from lerobot.common.datasets.push_dataset_to_hub.openx.droid_utils import droid_baseact_transform
return droid_baseact_transform
def bridge_openx_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
"""
Applies to version of Bridge V2 in Open X-Embodiment mixture.
Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it!
"""
for key in trajectory:
if key == "traj_metadata":
continue
elif key in ["observation", "action"]:
for key2 in trajectory[key]:
trajectory[key][key2] = trajectory[key][key2][1:]
else:
trajectory[key] = trajectory[key][1:]
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
tf.cast(trajectory["action"]["open_gripper"][:, None], tf.float32),
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
trajectory = relabel_bridge_actions(trajectory)
trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
return trajectory
def bridge_orig_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
"""
Applies to original version of Bridge V2 from the official project website.
Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it!
"""
for key in trajectory:
if key == "traj_metadata":
continue
elif key == "observation":
for key2 in trajectory[key]:
trajectory[key][key2] = trajectory[key][key2][1:]
else:
trajectory[key] = trajectory[key][1:]
trajectory["action"] = tf.concat(
[
trajectory["action"][:, :6],
binarize_gripper_actions(trajectory["action"][:, -1])[:, None],
],
axis=1,
)
trajectory = relabel_bridge_actions(trajectory)
trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
return trajectory
def ppgm_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = tf.concat(
[
trajectory["action"][:, :6],
binarize_gripper_actions(trajectory["action"][:, -1])[:, None],
],
axis=1,
)
trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"][:, -1:]
return trajectory
def rt1_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# make gripper action absolute action, +1 = open, 0 = close
gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0]
gripper_action = rel2abs_gripper_actions(gripper_action)
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
gripper_action[:, None],
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def kuka_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# make gripper action absolute action, +1 = open, 0 = close
gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0]
gripper_action = rel2abs_gripper_actions(gripper_action)
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
gripper_action[:, None],
),
axis=-1,
)
# decode compressed state
eef_value = tf.io.decode_compressed(
trajectory["observation"]["clip_function_input/base_pose_tool_reached"],
compression_type="ZLIB",
)
eef_value = tf.io.decode_raw(eef_value, tf.float32)
trajectory["observation"]["clip_function_input/base_pose_tool_reached"] = tf.reshape(eef_value, (-1, 7))
gripper_value = tf.io.decode_compressed(
trajectory["observation"]["gripper_closed"], compression_type="ZLIB"
)
gripper_value = tf.io.decode_raw(gripper_value, tf.float32)
trajectory["observation"]["gripper_closed"] = tf.reshape(gripper_value, (-1, 1))
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def taco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["state_eef"] = trajectory["observation"]["robot_obs"][:, :6]
trajectory["observation"]["state_gripper"] = trajectory["observation"]["robot_obs"][:, 7:8]
trajectory["action"] = trajectory["action"]["rel_actions_world"]
# invert gripper action + clip, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :6],
tf.clip_by_value(trajectory["action"][:, -1:], 0, 1),
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def jaco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["state_eef"] = trajectory["observation"]["end_effector_cartesian_pos"][:, :6]
trajectory["observation"]["state_gripper"] = trajectory["observation"]["end_effector_cartesian_pos"][
:, -1:
]
# make gripper action absolute action, +1 = open, 0 = close
gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0]
gripper_action = rel2abs_gripper_actions(gripper_action)
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
tf.zeros_like(trajectory["action"]["world_vector"]),
gripper_action[:, None],
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def berkeley_cable_routing_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
tf.zeros_like(trajectory["action"]["world_vector"][:, :1]),
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def roboturk_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# invert absolute gripper action, +1 = open, 0 = close
gripper_action = invert_gripper_actions(
tf.clip_by_value(trajectory["action"]["gripper_closedness_action"], 0, 1)
)
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
gripper_action,
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
trajectory["language_embedding"] = trajectory["observation"]["natural_language_embedding"]
return trajectory
def nyu_door_opening_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# make gripper action absolute action, +1 = open, 0 = close
gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0]
gripper_action = rel2abs_gripper_actions(gripper_action)
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
gripper_action[:, None],
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def viola_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# make gripper action, +1 = open, 0 = close
gripper_action = trajectory["action"]["gripper_closedness_action"][:, None]
gripper_action = tf.clip_by_value(gripper_action, 0, 1)
gripper_action = invert_gripper_actions(gripper_action)
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
gripper_action,
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def berkeley_autolab_ur5_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["state"] = trajectory["observation"]["robot_state"][:, 6:14]
# make gripper action absolute action, +1 = open, 0 = close
gripper_action = trajectory["action"]["gripper_closedness_action"]
gripper_action = rel2abs_gripper_actions(gripper_action)
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
gripper_action[:, None],
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def toto_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
tf.cast(trajectory["action"]["open_gripper"][:, None], tf.float32),
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def language_table_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# default to "open" gripper
trajectory["action"] = tf.concat(
(
trajectory["action"],
tf.zeros_like(trajectory["action"]),
tf.zeros_like(trajectory["action"]),
tf.ones_like(trajectory["action"][:, :1]),
),
axis=-1,
)
# decode language instruction
instruction_bytes = trajectory["observation"]["instruction"]
instruction_encoded = tf.strings.unicode_encode(instruction_bytes, output_encoding="UTF-8")
# Remove trailing padding --> convert RaggedTensor to regular Tensor.
trajectory["language_instruction"] = tf.strings.split(instruction_encoded, "\x00")[:, :1].to_tensor()[
:, 0
]
return trajectory
def pusht_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = tf.concat(
(
trajectory["action"]["world_vector"],
trajectory["action"]["rotation_delta"],
trajectory["action"]["gripper_closedness_action"][:, None],
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def stanford_kuka_multimodal_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["depth_image"] = trajectory["observation"]["depth_image"][..., 0]
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :3],
tf.zeros_like(trajectory["action"][:, :3]),
trajectory["action"][:, -1:],
),
axis=-1,
)
return trajectory
def nyu_rot_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][..., :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][..., -1:]
trajectory["action"] = trajectory["action"][..., :7]
return trajectory
def stanford_hydra_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# invert gripper action, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :6],
invert_gripper_actions(trajectory["action"][:, -1:]),
),
axis=-1,
)
trajectory["observation"]["eef_state"] = tf.concat(
(
trajectory["observation"]["state"][:, :3],
trajectory["observation"]["state"][:, 7:10],
),
axis=-1,
)
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -3:-2]
return trajectory
def austin_buds_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# invert gripper action + clip, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :6],
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
),
axis=-1,
)
trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :8]
return trajectory
def nyu_franka_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["depth"] = tf.cast(trajectory["observation"]["depth"][..., 0], tf.float32)
trajectory["observation"]["depth_additional_view"] = tf.cast(
trajectory["observation"]["depth_additional_view"][..., 0], tf.float32
)
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, -6:]
# clip gripper action, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, -8:-2],
tf.clip_by_value(trajectory["action"][:, -2:-1], 0, 1),
),
axis=-1,
)
return trajectory
def maniskill_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][..., 7:8]
return trajectory
def furniture_bench_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
import tensorflow_graphics.geometry.transformation as tft
trajectory["observation"]["state"] = tf.concat(
(
trajectory["observation"]["state"][:, :7],
trajectory["observation"]["state"][:, -1:],
),
axis=-1,
)
# invert gripper action + clip, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :3],
tft.euler.from_quaternion(trajectory["action"][:, 3:7]),
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
),
axis=-1,
)
return trajectory
def cmu_franka_exploration_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = trajectory["action"][..., :-1]
return trajectory
def ucsd_kitchen_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :7]
trajectory["action"] = trajectory["action"][..., :-1]
return trajectory
def ucsd_pick_place_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :3],
tf.zeros_like(trajectory["action"][:, :3]),
trajectory["action"][:, -1:],
),
axis=-1,
)
return trajectory
def austin_sailor_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# invert gripper action + clip, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :6],
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
),
axis=-1,
)
return trajectory
def austin_sirius_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# invert gripper action + clip, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :6],
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
),
axis=-1,
)
return trajectory
def bc_z_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = tf.concat(
(
trajectory["action"]["future/xyz_residual"][:, :3],
trajectory["action"]["future/axis_angle_residual"][:, :3],
invert_gripper_actions(tf.cast(trajectory["action"]["future/target_close"][:, :1], tf.float32)),
),
axis=-1,
)
trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"]
return trajectory
def tokyo_pr2_opening_fridge_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
trajectory["action"] = trajectory["action"][..., :-1]
return trajectory
def tokyo_pr2_tabletop_manipulation_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
trajectory["action"] = trajectory["action"][..., :-1]
return trajectory
def utokyo_xarm_bimanual_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = trajectory["action"][..., -7:]
return trajectory
def robo_net_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = tf.concat(
(
trajectory["observation"]["state"][:, :4],
tf.zeros_like(trajectory["observation"]["state"][:, :2]),
),
axis=-1,
)
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :4],
tf.zeros_like(trajectory["action"][:, :2]),
trajectory["action"][:, -1:],
),
axis=-1,
)
return trajectory
def berkeley_mvp_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
"""
trajectory["observation"]["state"] = tf.concat((
tf.cast(trajectory["observation"]["gripper"][:, None], tf.float32),
trajectory["observation"]["pose"],
trajectory["observation"]["joint_pos"],),
axis=-1,)
"""
trajectory["observation"]["gripper"] = tf.cast(trajectory["observation"]["gripper"][:, None], tf.float32)
return trajectory
def berkeley_rpt_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["gripper"] = tf.cast(trajectory["observation"]["gripper"][:, None], tf.float32)
return trajectory
def kaist_nonprehensible_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["state"] = trajectory["observation"]["state"][:, -7:]
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :6],
tf.zeros_like(trajectory["action"][:, :1]),
),
axis=-1,
)
return trajectory
def stanford_mask_vit_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = tf.concat(
(
trajectory["observation"]["end_effector_pose"][:, :4],
tf.zeros_like(trajectory["observation"]["end_effector_pose"][:, :2]),
),
axis=-1,
)
trajectory["observation"]["gripper_state"] = trajectory["observation"]["end_effector_pose"][:, -1:]
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :4],
tf.zeros_like(trajectory["action"][:, :2]),
trajectory["action"][:, -1:],
),
axis=-1,
)
return trajectory
def tokyo_lsmo_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
return trajectory
def dlr_sara_grid_clamp_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :6]
return trajectory
def dlr_edan_shared_control_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# invert gripper action, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :6],
invert_gripper_actions(trajectory["action"][:, -1:]),
),
axis=-1,
)
return trajectory
def asu_table_top_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = trajectory["ground_truth_states"]["EE"]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
return trajectory
def robocook_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
return trajectory
def imperial_wristcam_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = trajectory["action"][..., :-1]
return trajectory
def iamlab_pick_insert_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
import tensorflow_graphics.geometry.transformation as tft
trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :7]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, 7:8]
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :3],
tft.euler.from_quaternion(trajectory["action"][:, 3:7]),
trajectory["action"][:, 7:8],
),
axis=-1,
)
return trajectory
def uiuc_d3field_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = tf.concat(
(
trajectory["action"],
tf.zeros_like(trajectory["action"]),
tf.zeros_like(trajectory["action"][:, :1]),
),
axis=-1,
)
return trajectory
def utaustin_mutex_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :8]
# invert gripper action + clip, +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :6],
invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)),
),
axis=-1,
)
return trajectory
def berkeley_fanuc_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :6]
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, 6:7]
# dataset does not store gripper actions, so use gripper state info, invert so +1 = open, 0 = close
trajectory["action"] = tf.concat(
(
trajectory["action"],
invert_gripper_actions(trajectory["observation"]["gripper_state"]),
),
axis=-1,
)
return trajectory
def cmu_playing_with_food_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
import tensorflow_graphics.geometry.transformation as tft
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :3],
tft.euler.from_quaternion(trajectory["action"][:, 3:7]),
trajectory["action"][:, -1:],
),
axis=-1,
)
return trajectory
def playfusion_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :3],
trajectory["action"][:, -4:],
),
axis=-1,
)
return trajectory
def cmu_stretch_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["eef_state"] = tf.concat(
(
trajectory["observation"]["state"][:, :3],
tf.zeros_like(trajectory["observation"]["state"][:, :3]),
),
axis=-1,
)
trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:]
trajectory["action"] = trajectory["action"][..., :-1]
return trajectory
def gnm_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
trajectory["observation"]["state"] = tf.concat(
(
trajectory["observation"]["position"],
tf.zeros_like(trajectory["observation"]["state"][:, :3]),
trajectory["observation"]["yaw"],
),
axis=-1,
)
trajectory["action"] = tf.concat(
(
trajectory["action"],
tf.zeros_like(trajectory["action"]),
tf.zeros_like(trajectory["action"]),
tf.zeros_like(trajectory["action"][:, :1]),
),
axis=-1,
)
return trajectory
def fmb_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# every input feature is batched, ie has leading batch dimension
trajectory["observation"]["proprio"] = tf.concat(
(
trajectory["observation"]["eef_pose"],
trajectory["observation"]["state_gripper_pose"][..., None],
),
axis=-1,
)
return trajectory
def dobbe_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# every input feature is batched, ie has leading batch dimension
trajectory["observation"]["proprio"] = trajectory["observation"]["state"]
return trajectory
def robo_set_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
# gripper action is in -1...1 --> clip to 0...1, flip
gripper_action = trajectory["action"][:, -1:]
gripper_action = invert_gripper_actions(tf.clip_by_value(gripper_action, 0, 1))
trajectory["action"] = tf.concat(
(
trajectory["action"][:, :7],
gripper_action,
),
axis=-1,
)
return trajectory
def identity_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
return trajectory
# === Registry ===
OPENX_STANDARDIZATION_TRANSFORMS = {
"bridge_openx": bridge_openx_dataset_transform,
"bridge_orig": bridge_orig_dataset_transform,
"bridge_dataset": bridge_orig_dataset_transform,
"ppgm": ppgm_dataset_transform,
"ppgm_static": ppgm_dataset_transform,
"ppgm_wrist": ppgm_dataset_transform,
"fractal20220817_data": rt1_dataset_transform,
"kuka": kuka_dataset_transform,
"taco_play": taco_play_dataset_transform,
"jaco_play": jaco_play_dataset_transform,
"berkeley_cable_routing": berkeley_cable_routing_dataset_transform,
"roboturk": roboturk_dataset_transform,
"nyu_door_opening_surprising_effectiveness": nyu_door_opening_dataset_transform,
"viola": viola_dataset_transform,
"berkeley_autolab_ur5": berkeley_autolab_ur5_dataset_transform,
"toto": toto_dataset_transform,
"language_table": language_table_dataset_transform,
"columbia_cairlab_pusht_real": pusht_dataset_transform,
"stanford_kuka_multimodal_dataset_converted_externally_to_rlds": stanford_kuka_multimodal_dataset_transform,
"nyu_rot_dataset_converted_externally_to_rlds": nyu_rot_dataset_transform,
"stanford_hydra_dataset_converted_externally_to_rlds": stanford_hydra_dataset_transform,
"austin_buds_dataset_converted_externally_to_rlds": austin_buds_dataset_transform,
"nyu_franka_play_dataset_converted_externally_to_rlds": nyu_franka_play_dataset_transform,
"maniskill_dataset_converted_externally_to_rlds": maniskill_dataset_transform,
"furniture_bench_dataset_converted_externally_to_rlds": furniture_bench_dataset_transform,
"cmu_franka_exploration_dataset_converted_externally_to_rlds": cmu_franka_exploration_dataset_transform,
"ucsd_kitchen_dataset_converted_externally_to_rlds": ucsd_kitchen_dataset_transform,
"ucsd_pick_and_place_dataset_converted_externally_to_rlds": ucsd_pick_place_dataset_transform,
"austin_sailor_dataset_converted_externally_to_rlds": austin_sailor_dataset_transform,
"austin_sirius_dataset_converted_externally_to_rlds": austin_sirius_dataset_transform,
"bc_z": bc_z_dataset_transform,
"utokyo_pr2_opening_fridge_converted_externally_to_rlds": tokyo_pr2_opening_fridge_dataset_transform,
"utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": tokyo_pr2_tabletop_manipulation_dataset_transform,
"utokyo_xarm_pick_and_place_converted_externally_to_rlds": identity_transform,
"utokyo_xarm_bimanual_converted_externally_to_rlds": utokyo_xarm_bimanual_dataset_transform,
"robo_net": robo_net_dataset_transform,
"berkeley_mvp_converted_externally_to_rlds": berkeley_mvp_dataset_transform,
"berkeley_rpt_converted_externally_to_rlds": berkeley_rpt_dataset_transform,
"kaist_nonprehensile_converted_externally_to_rlds": kaist_nonprehensible_dataset_transform,
"stanford_mask_vit_converted_externally_to_rlds": stanford_mask_vit_dataset_transform,
"tokyo_u_lsmo_converted_externally_to_rlds": tokyo_lsmo_dataset_transform,
"dlr_sara_pour_converted_externally_to_rlds": identity_transform,
"dlr_sara_grid_clamp_converted_externally_to_rlds": dlr_sara_grid_clamp_dataset_transform,
"dlr_edan_shared_control_converted_externally_to_rlds": dlr_edan_shared_control_dataset_transform,
"asu_table_top_converted_externally_to_rlds": asu_table_top_dataset_transform,
"stanford_robocook_converted_externally_to_rlds": robocook_dataset_transform,
"imperialcollege_sawyer_wrist_cam": imperial_wristcam_dataset_transform,
"iamlab_cmu_pickup_insert_converted_externally_to_rlds": iamlab_pick_insert_dataset_transform,
"uiuc_d3field": uiuc_d3field_dataset_transform,
"utaustin_mutex": utaustin_mutex_dataset_transform,
"berkeley_fanuc_manipulation": berkeley_fanuc_dataset_transform,
"cmu_playing_with_food": cmu_playing_with_food_dataset_transform,
"cmu_play_fusion": playfusion_dataset_transform,
"cmu_stretch": cmu_stretch_dataset_transform,
"berkeley_gnm_recon": gnm_dataset_transform,
"berkeley_gnm_cory_hall": gnm_dataset_transform,
"berkeley_gnm_sac_son": gnm_dataset_transform,
"droid": droid_baseact_transform_fn(),
"droid_100": droid_baseact_transform_fn(), # first 100 episodes of droid
"fmb": fmb_transform,
"dobbe": dobbe_dataset_transform,
"robo_set": robo_set_dataset_transform,
"usc_cloth_sim_converted_externally_to_rlds": identity_transform,
"plex_robosuite": identity_transform,
"conq_hose_manipulation": identity_transform,
"io_ai_tech": identity_transform,
"spoc": identity_transform,
}

View File

@@ -0,0 +1,359 @@
#!/usr/bin/env python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
For https://github.com/google-deepmind/open_x_embodiment (OPENX) datasets.
Example:
python lerobot/scripts/push_dataset_to_hub.py \
--raw-dir /hdd/tensorflow_datasets/bridge_dataset/1.0.0/ \
--repo-id youliangtan/sampled_bridge_data_v2 \
--raw-format openx_rlds.bridge_orig \
--episodes 3 4 5 8 9
Exact dataset fps defined in openx/config.py, obtained from:
https://docs.google.com/spreadsheets/d/1rPBD77tk60AEIGZrGSODwyyzs5FgCU9Uz3h-3_t2A9g/edit?gid=0#gid=0&range=R:R
"""
import shutil
from pathlib import Path
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import torch
import tqdm
import yaml
from datasets import Dataset, Features, Image, Sequence, Value
from PIL import Image as PILImage
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
from lerobot.common.datasets.push_dataset_to_hub.openx.transforms import OPENX_STANDARDIZATION_TRANSFORMS
from lerobot.common.datasets.push_dataset_to_hub.utils import (
concatenate_episodes,
get_default_encoding,
save_images_concurrently,
)
from lerobot.common.datasets.utils import (
calculate_episode_data_index,
hf_transform_to_torch,
)
from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames
with open("lerobot/common/datasets/push_dataset_to_hub/openx/configs.yaml") as f:
_openx_list = yaml.safe_load(f)
OPENX_DATASET_CONFIGS = _openx_list["OPENX_DATASET_CONFIGS"]
np.set_printoptions(precision=2)
def tf_to_torch(data):
return torch.from_numpy(data.numpy())
def tf_img_convert(img):
if img.dtype == tf.string:
img = tf.io.decode_image(img, expand_animations=False, dtype=tf.uint8)
elif img.dtype != tf.uint8:
raise ValueError(f"Unsupported image dtype: found with dtype {img.dtype}")
return img.numpy()
def _broadcast_metadata_rlds(i: tf.Tensor, traj: dict) -> dict:
"""
In the RLDS format, each trajectory has some top-level metadata that is explicitly separated out, and a "steps"
entry. This function moves the "steps" entry to the top level, broadcasting any metadata to the length of the
trajectory. This function also adds the extra metadata fields `_len`, `_traj_index`, and `_frame_index`.
NOTE: adapted from DLimp library https://github.com/kvablack/dlimp/
"""
steps = traj.pop("steps")
traj_len = tf.shape(tf.nest.flatten(steps)[0])[0]
# broadcast metadata to the length of the trajectory
metadata = tf.nest.map_structure(lambda x: tf.repeat(x, traj_len), traj)
# put steps back in
assert "traj_metadata" not in steps
traj = {**steps, "traj_metadata": metadata}
assert "_len" not in traj
assert "_traj_index" not in traj
assert "_frame_index" not in traj
traj["_len"] = tf.repeat(traj_len, traj_len)
traj["_traj_index"] = tf.repeat(i, traj_len)
traj["_frame_index"] = tf.range(traj_len)
return traj
def load_from_raw(
raw_dir: Path,
videos_dir: Path,
fps: int,
video: bool,
episodes: list[int] | None = None,
encoding: dict | None = None,
openx_dataset_name: str | None = None,
):
"""
Args:
raw_dir (Path): _description_
videos_dir (Path): _description_
fps (int): _description_
video (bool): _description_
episodes (list[int] | None, optional): _description_. Defaults to None.
"""
ds_builder = tfds.builder_from_directory(str(raw_dir))
dataset = ds_builder.as_dataset(
split="all",
decoders={"steps": tfds.decode.SkipDecoding()},
)
dataset_info = ds_builder.info
print("dataset_info: ", dataset_info)
ds_length = len(dataset)
dataset = dataset.take(ds_length)
# "flatten" the dataset as such we can apply trajectory level map() easily
# each [obs][key] has a shape of (frame_size, ...)
dataset = dataset.enumerate().map(_broadcast_metadata_rlds)
# we will apply the standardization transform if the dataset_name is provided
# if the dataset name is not provided and the goal is to convert any rlds formatted dataset
# search for 'image' keys in the observations
if openx_dataset_name is not None:
print(" - applying standardization transform for dataset: ", openx_dataset_name)
assert openx_dataset_name in OPENX_STANDARDIZATION_TRANSFORMS
transform_fn = OPENX_STANDARDIZATION_TRANSFORMS[openx_dataset_name]
dataset = dataset.map(transform_fn)
image_keys = OPENX_DATASET_CONFIGS[openx_dataset_name]["image_obs_keys"]
else:
obs_keys = dataset_info.features["steps"]["observation"].keys()
image_keys = [key for key in obs_keys if "image" in key]
lang_key = "language_instruction" if "language_instruction" in dataset.element_spec else None
print(" - image_keys: ", image_keys)
print(" - lang_key: ", lang_key)
it = iter(dataset)
ep_dicts = []
# Init temp path to save ep_dicts in case of crash
tmp_ep_dicts_dir = videos_dir.parent.joinpath("ep_dicts")
tmp_ep_dicts_dir.mkdir(parents=True, exist_ok=True)
# check if ep_dicts have already been saved in /tmp
starting_ep_idx = 0
saved_ep_dicts = [ep.__str__() for ep in tmp_ep_dicts_dir.iterdir()]
if len(saved_ep_dicts) > 0:
saved_ep_dicts.sort()
# get last ep_idx number
starting_ep_idx = int(saved_ep_dicts[-1][-13:-3]) + 1
for i in range(starting_ep_idx):
episode = next(it)
ep_dicts.append(torch.load(saved_ep_dicts[i]))
# if we user specified episodes, skip the ones not in the list
if episodes is not None:
if ds_length == 0:
raise ValueError("No episodes found.")
# convert episodes index to sorted list
episodes = sorted(episodes)
for ep_idx in tqdm.tqdm(range(starting_ep_idx, ds_length)):
episode = next(it)
# if user specified episodes, skip the ones not in the list
if episodes is not None:
if len(episodes) == 0:
break
if ep_idx == episodes[0]:
# process this episode
print(" selecting episode idx: ", ep_idx)
episodes.pop(0)
else:
continue # skip
num_frames = episode["action"].shape[0]
###########################################################
# Handle the episodic data
# last step of demonstration is considered done
done = torch.zeros(num_frames, dtype=torch.bool)
done[-1] = True
ep_dict = {}
langs = [] # TODO: might be located in "observation"
image_array_dict = {key: [] for key in image_keys}
# We will create the state observation tensor by stacking the state
# obs keys defined in the openx/configs.py
if openx_dataset_name is not None:
state_obs_keys = OPENX_DATASET_CONFIGS[openx_dataset_name]["state_obs_keys"]
# stack the state observations, if is None, pad with zeros
states = []
for key in state_obs_keys:
if key in episode["observation"]:
states.append(tf_to_torch(episode["observation"][key]))
else:
states.append(torch.zeros(num_frames, 1)) # pad with zeros
states = torch.cat(states, dim=1)
# assert states.shape == (num_frames, 8), f"states shape: {states.shape}"
else:
states = tf_to_torch(episode["observation"]["state"])
actions = tf_to_torch(episode["action"])
rewards = tf_to_torch(episode["reward"]).float()
# If lang_key is present, convert the entire tensor at once
if lang_key is not None:
langs = [str(x) for x in episode[lang_key]]
for im_key in image_keys:
imgs = episode["observation"][im_key]
image_array_dict[im_key] = [tf_img_convert(img) for img in imgs]
# simple assertions
for item in [states, actions, rewards, done]:
assert len(item) == num_frames
###########################################################
# loop through all cameras
for im_key in image_keys:
img_key = f"observation.images.{im_key}"
imgs_array = image_array_dict[im_key]
imgs_array = np.array(imgs_array)
if video:
# save png images in temporary directory
tmp_imgs_dir = videos_dir / "tmp_images"
save_images_concurrently(imgs_array, tmp_imgs_dir)
# encode images to a mp4 video
fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
video_path = videos_dir / fname
encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {}))
# clean temporary images directory
shutil.rmtree(tmp_imgs_dir)
# store the reference to the video frame
ep_dict[img_key] = [
{"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames)
]
else:
ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array]
if lang_key is not None:
ep_dict["language_instruction"] = langs
ep_dict["observation.state"] = states
ep_dict["action"] = actions
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames)
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
ep_dict["next.reward"] = rewards
ep_dict["next.done"] = done
path_ep_dict = tmp_ep_dicts_dir.joinpath(
"ep_dict_" + "0" * (10 - len(str(ep_idx))) + str(ep_idx) + ".pt"
)
torch.save(ep_dict, path_ep_dict)
ep_dicts.append(ep_dict)
data_dict = concatenate_episodes(ep_dicts)
total_frames = data_dict["frame_index"].shape[0]
data_dict["index"] = torch.arange(0, total_frames, 1)
return data_dict
def to_hf_dataset(data_dict, video) -> Dataset:
features = {}
keys = [key for key in data_dict if "observation.images." in key]
for key in keys:
if video:
features[key] = VideoFrame()
else:
features[key] = Image()
features["observation.state"] = Sequence(
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
)
if "observation.velocity" in data_dict:
features["observation.velocity"] = Sequence(
length=data_dict["observation.velocity"].shape[1], feature=Value(dtype="float32", id=None)
)
if "observation.effort" in data_dict:
features["observation.effort"] = Sequence(
length=data_dict["observation.effort"].shape[1], feature=Value(dtype="float32", id=None)
)
if "language_instruction" in data_dict:
features["language_instruction"] = Value(dtype="string", id=None)
features["action"] = Sequence(
length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)
)
features["episode_index"] = Value(dtype="int64", id=None)
features["frame_index"] = Value(dtype="int64", id=None)
features["timestamp"] = Value(dtype="float32", id=None)
features["next.reward"] = Value(dtype="float32", id=None)
features["next.done"] = Value(dtype="bool", id=None)
features["index"] = Value(dtype="int64", id=None)
hf_dataset = Dataset.from_dict(data_dict, features=Features(features))
hf_dataset.set_transform(hf_transform_to_torch)
return hf_dataset
def from_raw_to_lerobot_format(
raw_dir: Path,
videos_dir: Path,
fps: int | None = None,
video: bool = True,
episodes: list[int] | None = None,
encoding: dict | None = None,
openx_dataset_name: str | None = None,
):
"""This is a test impl for rlds conversion"""
if openx_dataset_name is None:
# set a default rlds frame rate if the dataset is not from openx
fps = 30
elif "fps" not in OPENX_DATASET_CONFIGS[openx_dataset_name]:
raise ValueError(
"fps for this dataset is not specified in openx/configs.py yet," "means it is not yet tested"
)
fps = OPENX_DATASET_CONFIGS[openx_dataset_name]["fps"]
data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding, openx_dataset_name)
hf_dataset = to_hf_dataset(data_dict, video)
episode_data_index = calculate_episode_data_index(hf_dataset)
info = {
"codebase_version": CODEBASE_VERSION,
"fps": fps,
"video": video,
}
if video:
info["encoding"] = get_default_encoding()
return hf_dataset, episode_data_index, info

View File

@@ -80,6 +80,11 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]):
if isinstance(first_item, PILImage.Image):
to_tensor = transforms.ToTensor()
items_dict[key] = [to_tensor(img) for img in items_dict[key]]
elif isinstance(first_item, str):
# TODO (michel-aractingi): add str2embedding via language tokenizer
# For now we leave this part up to the user to choose how to address
# language conditioned tasks
pass
elif isinstance(first_item, dict) and "path" in first_item and "timestamp" in first_item:
# video frame will be processed downstream
pass

View File

@@ -296,7 +296,7 @@ class ACT(nn.Module):
self.use_images = any(k.startswith("observation.image") for k in config.input_shapes)
self.use_env_state = "observation.environment_state" in config.input_shapes
if self.config.use_vae:
self.vae_encoder = ACTEncoder(config)
self.vae_encoder = ACTEncoder(config, is_vae_encoder=True)
self.vae_encoder_cls_embed = nn.Embedding(1, config.dim_model)
# Projection layer for joint-space configuration to hidden dimension.
if self.use_robot_state:
@@ -521,9 +521,11 @@ class ACT(nn.Module):
class ACTEncoder(nn.Module):
"""Convenience module for running multiple encoder layers, maybe followed by normalization."""
def __init__(self, config: ACTConfig):
def __init__(self, config: ACTConfig, is_vae_encoder: bool = False):
super().__init__()
self.layers = nn.ModuleList([ACTEncoderLayer(config) for _ in range(config.n_encoder_layers)])
self.is_vae_encoder = is_vae_encoder
num_layers = config.n_vae_encoder_layers if self.is_vae_encoder else config.n_encoder_layers
self.layers = nn.ModuleList([ACTEncoderLayer(config) for _ in range(num_layers)])
self.norm = nn.LayerNorm(config.dim_model) if config.pre_norm else nn.Identity()
def forward(

View File

@@ -196,3 +196,12 @@ class DiffusionConfig:
f"`noise_scheduler_type` must be one of {supported_noise_schedulers}. "
f"Got {self.noise_scheduler_type}."
)
# Check that the horizon size and U-Net downsampling is compatible.
# U-Net downsamples by 2 with each stage.
downsampling_factor = 2 ** len(self.down_dims)
if self.horizon % downsampling_factor != 0:
raise ValueError(
"The horizon should be an integer multiple of the downsampling factor (which is determined "
f"by `len(down_dims)`). Got {self.horizon=} and {self.down_dims=}"
)

View File

@@ -350,17 +350,22 @@ class VQBeTModel(nn.Module):
# get action features (pass through GPT)
features = self.policy(input_tokens)
# len(self.config.input_shapes) is the number of different observation modes. this line gets the index of action prompt tokens.
# len(self.config.input_shapes) is the number of different observation modes.
# this line gets the index of action prompt tokens.
historical_act_pred_index = np.arange(0, n_obs_steps) * (len(self.config.input_shapes) + 1) + len(
self.config.input_shapes
)
# only extract the output tokens at the position of action query:
# Behavior Transformer (BeT), and VQ-BeT are both sequence-to-sequence prediction models, mapping sequential observation to sequential action (please refer to section 2.2 in BeT paper https://arxiv.org/pdf/2206.11251).
# Thus, it predict historical action sequence, in addition to current and future actions (predicting future actions : optional).
features = torch.cat(
[features[:, historical_act_pred_index], features[:, -len_additional_action_token:]], dim=1
)
# Behavior Transformer (BeT), and VQ-BeT are both sequence-to-sequence prediction models,
# mapping sequential observation to sequential action (please refer to section 2.2 in BeT paper https://arxiv.org/pdf/2206.11251).
# Thus, it predicts a historical action sequence, in addition to current and future actions (predicting future actions : optional).
if len_additional_action_token > 0:
features = torch.cat(
[features[:, historical_act_pred_index], features[:, -len_additional_action_token:]], dim=1
)
else:
features = features[:, historical_act_pred_index]
# pass through action head
action_head_output = self.action_head(features)
# if rollout, VQ-BeT don't calculate loss

View File

@@ -0,0 +1,448 @@
"""
This file contains utilities for recording frames from Intel Realsense cameras.
"""
import argparse
import concurrent.futures
import logging
import shutil
import threading
import time
import traceback
from dataclasses import dataclass, replace
from pathlib import Path
from threading import Thread
import cv2
import numpy as np
import pyrealsense2 as rs
from PIL import Image
from lerobot.common.robot_devices.utils import (
RobotDeviceAlreadyConnectedError,
RobotDeviceNotConnectedError,
)
from lerobot.common.utils.utils import capture_timestamp_utc
from lerobot.scripts.control_robot import busy_wait
SERIAL_NUMBER_INDEX = 1
def find_camera_indices(raise_when_empty=True) -> list[int]:
"""
Find the serial numbers of the Intel RealSense cameras
connected to the computer.
"""
camera_ids = []
for device in rs.context().query_devices():
serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
camera_ids.append(serial_number)
if raise_when_empty and len(camera_ids) == 0:
raise OSError(
"Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
)
return camera_ids
def save_image(img_array, camera_idx, frame_index, images_dir):
try:
img = Image.fromarray(img_array)
path = images_dir / f"camera_{camera_idx}_frame_{frame_index:06d}.png"
path.parent.mkdir(parents=True, exist_ok=True)
img.save(str(path), quality=100)
logging.info(f"Saved image: {path}")
except Exception as e:
logging.error(f"Failed to save image for camera {camera_idx} frame {frame_index}: {e}")
def save_images_from_cameras(
images_dir: Path,
camera_ids: list[int] | None = None,
fps=None,
width=None,
height=None,
record_time_s=2,
):
"""
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
associated to a given camera index.
"""
if camera_ids is None:
camera_ids = find_camera_indices()
print("Connecting cameras")
cameras = []
for cam_idx in camera_ids:
camera = IntelRealSenseCamera(cam_idx, fps=fps, width=width, height=height)
camera.connect()
print(
f"IntelRealSenseCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
)
cameras.append(camera)
images_dir = Path(images_dir)
if images_dir.exists():
shutil.rmtree(
images_dir,
)
images_dir.mkdir(parents=True, exist_ok=True)
print(f"Saving images to {images_dir}")
frame_index = 0
start_time = time.perf_counter()
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
while True:
now = time.perf_counter()
for camera in cameras:
# If we use async_read when fps is None, the loop will go full speed, and we will end up
# saving the same images from the cameras multiple times until the RAM/disk is full.
image = camera.read() if fps is None else camera.async_read()
if image is None:
print("No Frame")
bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
executor.submit(
save_image,
bgr_converted_image,
camera.camera_index,
frame_index,
images_dir,
)
if fps is not None:
dt_s = time.perf_counter() - now
busy_wait(1 / fps - dt_s)
if time.perf_counter() - start_time > record_time_s:
break
print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
frame_index += 1
finally:
print(f"Images have been saved to {images_dir}")
for camera in cameras:
camera.disconnect()
@dataclass
class IntelRealSenseCameraConfig:
"""
Example of tested options for Intel Real Sense D405:
```python
IntelRealSenseCameraConfig(30, 640, 480)
IntelRealSenseCameraConfig(60, 640, 480)
IntelRealSenseCameraConfig(90, 640, 480)
IntelRealSenseCameraConfig(30, 1280, 720)
IntelRealSenseCameraConfig(30, 640, 480, use_depth=True)
```
"""
fps: int | None = None
width: int | None = None
height: int | None = None
color_mode: str = "rgb"
use_depth: bool = False
force_hardware_reset: bool = True
def __post_init__(self):
if self.color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
)
if (self.fps or self.width or self.height) and not (self.fps and self.width and self.height):
raise ValueError(
"For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
f"but {self.fps=}, {self.width=}, {self.height=} were provided."
)
class IntelRealSenseCamera:
"""
The IntelRealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras:
- camera_index corresponds to the serial number of the camera,
- camera_index won't randomly change as it can be the case of OpenCVCamera for Linux,
- read is more reliable than OpenCVCamera,
- depth map can be returned.
To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera:
```bash
python lerobot/common/robot_devices/cameras/intelrealsense.py --images-dir outputs/images_from_intelrealsense_cameras
```
When an IntelRealSenseCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode
of the given camera will be used.
Example of usage:
```python
camera_index = 128422271347
camera = IntelRealSenseCamera(camera_index)
camera.connect()
color_image = camera.read()
# when done using the camera, consider disconnecting
camera.disconnect()
```
Example of changing default fps, width, height and color_mode:
```python
camera = IntelRealSenseCamera(camera_index, fps=30, width=1280, height=720)
camera = connect() # applies the settings, might error out if these settings are not compatible with the camera
camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480)
camera = connect()
camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480, color_mode="bgr")
camera = connect()
```
Example of returning depth:
```python
camera = IntelRealSenseCamera(camera_index, use_depth=True)
camera.connect()
color_image, depth_map = camera.read()
```
"""
def __init__(
self,
camera_index: int,
config: IntelRealSenseCameraConfig | None = None,
**kwargs,
):
if config is None:
config = IntelRealSenseCameraConfig()
# Overwrite the config arguments using kwargs
config = replace(config, **kwargs)
self.camera_index = camera_index
self.fps = config.fps
self.width = config.width
self.height = config.height
self.color_mode = config.color_mode
self.use_depth = config.use_depth
self.force_hardware_reset = config.force_hardware_reset
self.camera = None
self.is_connected = False
self.thread = None
self.stop_event = None
self.color_image = None
self.depth_map = None
self.logs = {}
def connect(self):
if self.is_connected:
raise RobotDeviceAlreadyConnectedError(
f"IntelRealSenseCamera({self.camera_index}) is already connected."
)
config = rs.config()
config.enable_device(str(self.camera_index))
if self.fps and self.width and self.height:
# TODO(rcadene): can we set rgb8 directly?
config.enable_stream(rs.stream.color, self.width, self.height, rs.format.rgb8, self.fps)
else:
config.enable_stream(rs.stream.color)
if self.use_depth:
if self.fps and self.width and self.height:
config.enable_stream(rs.stream.depth, self.width, self.height, rs.format.z16, self.fps)
else:
config.enable_stream(rs.stream.depth)
self.camera = rs.pipeline()
try:
self.camera.start(config)
is_camera_open = True
except RuntimeError:
is_camera_open = False
traceback.print_exc()
# If the camera doesn't work, display the camera indices corresponding to
# valid cameras.
if not is_camera_open:
# Verify that the provided `camera_index` is valid before printing the traceback
available_cam_ids = find_camera_indices()
if self.camera_index not in available_cam_ids:
raise ValueError(
f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
"To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
)
raise OSError(f"Can't access IntelRealSenseCamera({self.camera_index}).")
self.is_connected = True
def read(self, temporary_color: str | None = None) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
"""Read a frame from the camera returned in the format height x width x channels (e.g. 480 x 640 x 3)
of type `np.uint8`, contrarily to the pytorch format which is float channel first.
When `use_depth=True`, returns a tuple `(color_image, depth_map)` with a depth map in the format
height x width (e.g. 480 x 640) of type np.uint16.
Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
"""
if not self.is_connected:
raise RobotDeviceNotConnectedError(
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
start_time = time.perf_counter()
frame = self.camera.wait_for_frames(timeout_ms=5000)
color_frame = frame.get_color_frame()
if not color_frame:
raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.camera_index}).")
color_image = np.asanyarray(color_frame.get_data())
requested_color_mode = self.color_mode if temporary_color is None else temporary_color
if requested_color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
)
# IntelRealSense uses RGB format as default (red, green, blue).
if requested_color_mode == "bgr":
color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
h, w, _ = color_image.shape
if h != self.height or w != self.width:
raise OSError(
f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
)
# log the number of seconds it took to read the image
self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
# log the utc time at which the image was received
self.logs["timestamp_utc"] = capture_timestamp_utc()
if self.use_depth:
depth_frame = frame.get_depth_frame()
if not depth_frame:
raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.camera_index}).")
depth_map = np.asanyarray(depth_frame.get_data())
h, w = depth_map.shape
if h != self.height or w != self.width:
raise OSError(
f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
)
return color_image, depth_map
else:
return color_image
def read_loop(self):
while self.stop_event is None or not self.stop_event.is_set():
if self.use_depth:
self.color_image, self.depth_map = self.read()
else:
self.color_image = self.read()
def async_read(self):
"""Access the latest color image"""
if not self.is_connected:
raise RobotDeviceNotConnectedError(
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
if self.thread is None:
self.stop_event = threading.Event()
self.thread = Thread(target=self.read_loop, args=())
self.thread.daemon = True
self.thread.start()
num_tries = 0
while self.color_image is None:
num_tries += 1
time.sleep(1 / self.fps)
if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
raise Exception(
"The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called."
)
if self.use_depth:
return self.color_image, self.depth_map
else:
return self.color_image
def disconnect(self):
if not self.is_connected:
raise RobotDeviceNotConnectedError(
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
if self.thread is not None and self.thread.is_alive():
# wait for the thread to finish
self.stop_event.set()
self.thread.join()
self.thread = None
self.stop_event = None
self.camera.stop()
self.camera = None
self.is_connected = False
def __del__(self):
if getattr(self, "is_connected", False):
self.disconnect()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Save a few frames using `IntelRealSenseCamera` for all cameras connected to the computer, or a selected subset."
)
parser.add_argument(
"--camera-ids",
type=int,
nargs="*",
default=None,
help="List of camera indices used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
)
parser.add_argument(
"--fps",
type=int,
default=30,
help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
)
parser.add_argument(
"--width",
type=str,
default=640,
help="Set the width for all cameras. If not provided, use the default width of each camera.",
)
parser.add_argument(
"--height",
type=str,
default=480,
help="Set the height for all cameras. If not provided, use the default height of each camera.",
)
parser.add_argument(
"--images-dir",
type=Path,
default="outputs/images_from_intelrealsense_cameras",
help="Set directory to save a few frames for each camera.",
)
parser.add_argument(
"--record-time-s",
type=float,
default=2.0,
help="Set the number of seconds used to record the frames. By default, 2 seconds.",
)
args = parser.parse_args()
save_images_from_cameras(**vars(args))

View File

@@ -17,9 +17,12 @@ import cv2
import numpy as np
from PIL import Image
from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
from lerobot.common.robot_devices.utils import (
RobotDeviceAlreadyConnectedError,
RobotDeviceNotConnectedError,
busy_wait,
)
from lerobot.common.utils.utils import capture_timestamp_utc
from lerobot.scripts.control_robot import busy_wait
# Use 1 thread to avoid blocking the main thread. Especially useful during data collection
# when other threads are used to save the images.
@@ -77,6 +80,10 @@ def save_image(img_array, camera_index, frame_index, images_dir):
def save_images_from_cameras(
images_dir: Path, camera_ids: list[int] | None = None, fps=None, width=None, height=None, record_time_s=2
):
"""
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
associated to a given camera index.
"""
if camera_ids is None:
camera_ids = find_camera_indices()
@@ -153,7 +160,7 @@ class OpenCVCameraConfig:
def __post_init__(self):
if self.color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"Expected color_mode values are 'rgb' or 'bgr', but {self.color_mode} is provided."
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
)
@@ -199,6 +206,7 @@ class OpenCVCamera:
def __init__(self, camera_index: int, config: OpenCVCameraConfig | None = None, **kwargs):
if config is None:
config = OpenCVCameraConfig()
# Overwrite config arguments using kwargs
config = replace(config, **kwargs)
@@ -217,7 +225,7 @@ class OpenCVCamera:
def connect(self):
if self.is_connected:
raise RobotDeviceAlreadyConnectedError(f"Camera {self.camera_index} is already connected.")
raise RobotDeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")
# First create a temporary camera trying to access `camera_index`,
# and verify it is a valid camera by calling `isOpened`.
@@ -243,7 +251,7 @@ class OpenCVCamera:
"To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`."
)
raise OSError(f"Can't access camera {self.camera_index}.")
raise OSError(f"Can't access OpenCVCamera({self.camera_index}).")
# Secondly, create the camera that will be used downstream.
# Note: For some unknown reason, calling `isOpened` blocks the camera which then
@@ -266,15 +274,15 @@ class OpenCVCamera:
if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
raise OSError(
f"Can't set {self.fps=} for camera {self.camera_index}. Actual value is {actual_fps}."
f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
)
if self.width is not None and self.width != actual_width:
raise OSError(
f"Can't set {self.width=} for camera {self.camera_index}. Actual value is {actual_width}."
f"Can't set {self.width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
)
if self.height is not None and self.height != actual_height:
raise OSError(
f"Can't set {self.height=} for camera {self.camera_index}. Actual value is {actual_height}."
f"Can't set {self.height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
)
self.fps = actual_fps
@@ -285,7 +293,7 @@ class OpenCVCamera:
def read(self, temporary_color_mode: str | None = None) -> np.ndarray:
"""Read a frame from the camera returned in the format (height, width, channels)
(e.g. (640, 480, 3)), contrarily to the pytorch format which is channel first.
(e.g. 480 x 640 x 3), contrarily to the pytorch format which is channel first.
Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
@@ -308,7 +316,7 @@ class OpenCVCamera:
f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
)
# OpenCV uses BGR format as default (blue, green red) for all operations, including displaying images.
# OpenCV uses BGR format as default (blue, green, red) for all operations, including displaying images.
# However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
# so we convert the image color from BGR to RGB.
if requested_color_mode == "rgb":

View File

@@ -1,4 +1,6 @@
import enum
import logging
import math
import time
import traceback
from copy import deepcopy
@@ -27,11 +29,28 @@ TIMEOUT_MS = 1000
MAX_ID_RANGE = 252
# The following bounds define the lower and upper joints range (after calibration).
# For joints in degree (i.e. revolute joints), their nominal range is [-180, 180] degrees
# which corresponds to a half rotation on the left and half rotation on the right.
# Some joints might require higher range, so we allow up to [-270, 270] degrees until
# an error is raised.
LOWER_BOUND_DEGREE = -270
UPPER_BOUND_DEGREE = 270
# For joints in percentage (i.e. joints that move linearly like the prismatic joint of a gripper),
# their nominal range is [0, 100] %. For instance, for Aloha gripper, 0% is fully
# closed, and 100% is fully open. To account for slight calibration issue, we allow up to
# [-10, 110] until an error is raised.
LOWER_BOUND_LINEAR = -10
UPPER_BOUND_LINEAR = 110
HALF_TURN_DEGREE = 180
# https://emanual.robotis.com/docs/en/dxl/x/xl330-m077
# https://emanual.robotis.com/docs/en/dxl/x/xl330-m288
# https://emanual.robotis.com/docs/en/dxl/x/xl430-w250
# https://emanual.robotis.com/docs/en/dxl/x/xm430-w350
# https://emanual.robotis.com/docs/en/dxl/x/xm540-w270
# https://emanual.robotis.com/docs/en/dxl/x/xc430-w150
# data_name: (address, size_byte)
X_SERIES_CONTROL_TABLE = {
@@ -109,6 +128,7 @@ MODEL_CONTROL_TABLE = {
"xl430-w250": X_SERIES_CONTROL_TABLE,
"xm430-w350": X_SERIES_CONTROL_TABLE,
"xm540-w270": X_SERIES_CONTROL_TABLE,
"xc430-w150": X_SERIES_CONTROL_TABLE,
}
MODEL_RESOLUTION = {
@@ -118,6 +138,7 @@ MODEL_RESOLUTION = {
"xl430-w250": 4096,
"xm430-w350": 4096,
"xm540-w270": 4096,
"xc430-w150": 4096,
}
MODEL_BAUDRATE_TABLE = {
@@ -127,20 +148,18 @@ MODEL_BAUDRATE_TABLE = {
"xl430-w250": X_SERIES_BAUDRATE_TABLE,
"xm430-w350": X_SERIES_BAUDRATE_TABLE,
"xm540-w270": X_SERIES_BAUDRATE_TABLE,
"xc430-w150": X_SERIES_BAUDRATE_TABLE,
}
NUM_READ_RETRY = 10
NUM_WRITE_RETRY = 10
def convert_degrees_to_steps(degrees: float | np.ndarray, models: str | list[str]):
"""This function convert the degree range to the step range for indicating motors rotation.
It assums a motor achieves a full rotation by going from -180 degree position to +180.
def convert_degrees_to_steps(degrees: float | np.ndarray, models: str | list[str]) -> np.ndarray:
"""This function converts the degree range to the step range for indicating motors rotation.
It assumes a motor achieves a full rotation by going from -180 degree position to +180.
The motor resolution (e.g. 4096) corresponds to the number of steps needed to achieve a full rotation.
"""
if isinstance(degrees, float):
degrees = np.array(degrees)
resolutions = [MODEL_RESOLUTION[model] for model in models]
steps = degrees / 180 * np.array(resolutions) / 2
steps = steps.astype(int)
@@ -250,20 +269,24 @@ class TorqueMode(enum.Enum):
DISABLED = 0
class OperatingMode(enum.Enum):
VELOCITY = 1
POSITION = 3
EXTENDED_POSITION = 4
CURRENT_CONTROLLED_POSITION = 5
PWM = 16
UNKNOWN = -1
class DriveMode(enum.Enum):
NON_INVERTED = 0
INVERTED = 1
class CalibrationMode(enum.Enum):
# Joints with rotational motions are expressed in degrees in nominal range of [-180, 180]
DEGREE = 0
# Joints with linear motions (like gripper of Aloha) are experessed in nominal range of [0, 100]
LINEAR = 1
class JointOutOfRangeError(Exception):
def __init__(self, message="Joint is out of range"):
self.message = message
super().__init__(self.message)
class DynamixelMotorsBus:
# TODO(rcadene): Add a script to find the motor indices without DynamixelWizzard2
"""
@@ -531,9 +554,22 @@ class DynamixelMotorsBus:
def motor_indices(self) -> list[int]:
return [idx for idx, _ in self.motors.values()]
def set_calibration(self, calibration: dict[str, tuple[int, bool]]):
def set_calibration(self, calibration: dict[str, list]):
self.calibration = calibration
def apply_calibration_autocorrect(self, values: np.ndarray | list, motor_names: list[str] | None):
"""This function applies the calibration, automatically detects out of range errors for motors values and attempts to correct.
For more info, see docstring of `apply_calibration` and `autocorrect_calibration`.
"""
try:
values = self.apply_calibration(values, motor_names)
except JointOutOfRangeError as e:
print(e)
self.autocorrect_calibration(values, motor_names)
values = self.apply_calibration(values, motor_names)
return values
def apply_calibration(self, values: np.ndarray | list, motor_names: list[str] | None):
"""Convert from unsigned int32 joint position range [0, 2**32[ to the universal float32 nominal degree range ]-180.0, 180.0[ with
a "zero position" at 0 degree.
@@ -551,53 +587,197 @@ class DynamixelMotorsBus:
if motor_names is None:
motor_names = self.motor_names
# Convert from unsigned int32 original range [0, 2**32[ to centered signed int32 range [-2**31, 2**31[
values = values.astype(np.int32)
for i, name in enumerate(motor_names):
homing_offset, drive_mode = self.calibration[name]
# Update direction of rotation of the motor to match between leader and follower. In fact, the motor of the leader for a given joint
# can be assembled in an opposite direction in term of rotation than the motor of the follower on the same joint.
if drive_mode:
values[i] *= -1
# Convert from range [-2**31, 2**31[ to nominal range ]-resolution, resolution[ (e.g. ]-2048, 2048[)
values[i] += homing_offset
# Convert from range ]-resolution, resolution[ to the universal float32 centered degree range ]-180, 180[
# Convert from unsigned int32 original range [0, 2**32] to signed float32 range
values = values.astype(np.float32)
for i, name in enumerate(motor_names):
_, model = self.motors[name]
resolution = self.model_resolution[model]
values[i] = values[i] / (resolution // 2) * 180
calib_idx = self.calibration["motor_names"].index(name)
calib_mode = self.calibration["calib_mode"][calib_idx]
if CalibrationMode[calib_mode] == CalibrationMode.DEGREE:
drive_mode = self.calibration["drive_mode"][calib_idx]
homing_offset = self.calibration["homing_offset"][calib_idx]
_, model = self.motors[name]
resolution = self.model_resolution[model]
# Update direction of rotation of the motor to match between leader and follower.
# In fact, the motor of the leader for a given joint can be assembled in an
# opposite direction in term of rotation than the motor of the follower on the same joint.
if drive_mode:
values[i] *= -1
# Convert from range [-2**31, 2**31] to
# nominal range [-resolution//2, resolution//2] (e.g. [-2048, 2048])
values[i] += homing_offset
# Convert from range [-resolution//2, resolution//2] to
# universal float32 centered degree range [-180, 180]
# (e.g. 2048 / (4096 // 2) * 180 = 180)
values[i] = values[i] / (resolution // 2) * HALF_TURN_DEGREE
if (values[i] < LOWER_BOUND_DEGREE) or (values[i] > UPPER_BOUND_DEGREE):
raise JointOutOfRangeError(
f"Wrong motor position range detected for {name}. "
f"Expected to be in nominal range of [-{HALF_TURN_DEGREE}, {HALF_TURN_DEGREE}] degrees (a full rotation), "
f"with a maximum range of [{LOWER_BOUND_DEGREE}, {UPPER_BOUND_DEGREE}] degrees to account for joints that can rotate a bit more, "
f"but present value is {values[i]} degree. "
"This might be due to a cable connection issue creating an artificial 360 degrees jump in motor values. "
"You need to recalibrate by running: `python lerobot/scripts/control_robot.py calibrate`"
)
elif CalibrationMode[calib_mode] == CalibrationMode.LINEAR:
start_pos = self.calibration["start_pos"][calib_idx]
end_pos = self.calibration["end_pos"][calib_idx]
# Rescale the present position to a nominal range [0, 100] %,
# useful for joints with linear motions like Aloha gripper
values[i] = (values[i] - start_pos) / (end_pos - start_pos) * 100
if (values[i] < LOWER_BOUND_LINEAR) or (values[i] > UPPER_BOUND_LINEAR):
raise JointOutOfRangeError(
f"Wrong motor position range detected for {name}. "
f"Expected to be in nominal range of [0, 100] % (a full linear translation), "
f"with a maximum range of [{LOWER_BOUND_LINEAR}, {UPPER_BOUND_LINEAR}] % to account for some imprecision during calibration, "
f"but present value is {values[i]} %. "
"This might be due to a cable connection issue creating an artificial jump in motor values. "
"You need to recalibrate by running: `python lerobot/scripts/control_robot.py calibrate`"
)
return values
def autocorrect_calibration(self, values: np.ndarray | list, motor_names: list[str] | None):
"""This function automatically detects issues with values of motors after calibration, and correct for these issues.
Some motors might have values outside of expected maximum bounds after calibration.
For instance, for a joint in degree, its value can be outside [-270, 270] degrees, which is totally unexpected given
a nominal range of [-180, 180] degrees, which represents half a turn to the left or right starting from zero position.
Known issues:
#1: Motor value randomly shifts of a full turn, caused by hardware/connection errors.
#2: Motor internal homing offset is shifted by a full turn, caused by using default calibration (e.g Aloha).
#3: motor internal homing offset is shifted by less or more than a full turn, caused by using default calibration
or by human error during manual calibration.
Issues #1 and #2 can be solved by shifting the calibration homing offset by a full turn.
Issue #3 will be visually detected by user and potentially captured by the safety feature `max_relative_target`,
that will slow down the motor, raise an error asking to recalibrate. Manual recalibrating will solve the issue.
Note: A full turn corresponds to 360 degrees but also to 4096 steps for a motor resolution of 4096.
"""
if motor_names is None:
motor_names = self.motor_names
# Convert from unsigned int32 original range [0, 2**32] to signed float32 range
values = values.astype(np.float32)
for i, name in enumerate(motor_names):
calib_idx = self.calibration["motor_names"].index(name)
calib_mode = self.calibration["calib_mode"][calib_idx]
if CalibrationMode[calib_mode] == CalibrationMode.DEGREE:
drive_mode = self.calibration["drive_mode"][calib_idx]
homing_offset = self.calibration["homing_offset"][calib_idx]
_, model = self.motors[name]
resolution = self.model_resolution[model]
# Update direction of rotation of the motor to match between leader and follower.
# In fact, the motor of the leader for a given joint can be assembled in an
# opposite direction in term of rotation than the motor of the follower on the same joint.
if drive_mode:
values[i] *= -1
# Convert from initial range to range [-180, 180] degrees
calib_val = (values[i] + homing_offset) / (resolution // 2) * HALF_TURN_DEGREE
in_range = (calib_val > LOWER_BOUND_DEGREE) and (calib_val < UPPER_BOUND_DEGREE)
# Solve this inequality to find the factor to shift the range into [-180, 180] degrees
# values[i] = (values[i] + homing_offset + resolution * factor) / (resolution // 2) * HALF_TURN_DEGREE
# - HALF_TURN_DEGREE <= (values[i] + homing_offset + resolution * factor) / (resolution // 2) * HALF_TURN_DEGREE <= HALF_TURN_DEGREE
# (- (resolution // 2) - values[i] - homing_offset) / resolution <= factor <= ((resolution // 2) - values[i] - homing_offset) / resolution
low_factor = (-(resolution // 2) - values[i] - homing_offset) / resolution
upp_factor = ((resolution // 2) - values[i] - homing_offset) / resolution
elif CalibrationMode[calib_mode] == CalibrationMode.LINEAR:
start_pos = self.calibration["start_pos"][calib_idx]
end_pos = self.calibration["end_pos"][calib_idx]
# Convert from initial range to range [0, 100] in %
calib_val = (values[i] - start_pos) / (end_pos - start_pos) * 100
in_range = (calib_val > LOWER_BOUND_LINEAR) and (calib_val < UPPER_BOUND_LINEAR)
# Solve this inequality to find the factor to shift the range into [0, 100] %
# values[i] = (values[i] - start_pos + resolution * factor) / (end_pos + resolution * factor - start_pos - resolution * factor) * 100
# values[i] = (values[i] - start_pos + resolution * factor) / (end_pos - start_pos) * 100
# 0 <= (values[i] - start_pos + resolution * factor) / (end_pos - start_pos) * 100 <= 100
# (start_pos - values[i]) / resolution <= factor <= (end_pos - values[i]) / resolution
low_factor = (start_pos - values[i]) / resolution
upp_factor = (end_pos - values[i]) / resolution
if not in_range:
# Get first integer between the two bounds
if low_factor < upp_factor:
factor = math.ceil(low_factor)
if factor > upp_factor:
raise ValueError(f"No integer found between bounds [{low_factor=}, {upp_factor=}]")
else:
factor = math.ceil(upp_factor)
if factor > low_factor:
raise ValueError(f"No integer found between bounds [{low_factor=}, {upp_factor=}]")
if CalibrationMode[calib_mode] == CalibrationMode.DEGREE:
out_of_range_str = f"{LOWER_BOUND_DEGREE} < {calib_val} < {UPPER_BOUND_DEGREE} degrees"
in_range_str = f"{LOWER_BOUND_DEGREE} < {calib_val} < {UPPER_BOUND_DEGREE} degrees"
elif CalibrationMode[calib_mode] == CalibrationMode.LINEAR:
out_of_range_str = f"{LOWER_BOUND_LINEAR} < {calib_val} < {UPPER_BOUND_LINEAR} %"
in_range_str = f"{LOWER_BOUND_LINEAR} < {calib_val} < {UPPER_BOUND_LINEAR} %"
logging.warning(
f"Auto-correct calibration of motor '{name}' by shifting value by {abs(factor)} full turns, "
f"from '{out_of_range_str}' to '{in_range_str}'."
)
# A full turn corresponds to 360 degrees but also to 4096 steps for a motor resolution of 4096.
self.calibration["homing_offset"][calib_idx] += resolution * factor
def revert_calibration(self, values: np.ndarray | list, motor_names: list[str] | None):
"""Inverse of `apply_calibration`."""
if motor_names is None:
motor_names = self.motor_names
# Convert from the universal float32 centered degree range ]-180, 180[ to resolution range ]-resolution, resolution[
for i, name in enumerate(motor_names):
_, model = self.motors[name]
resolution = self.model_resolution[model]
values[i] = values[i] / 180 * (resolution // 2)
calib_idx = self.calibration["motor_names"].index(name)
calib_mode = self.calibration["calib_mode"][calib_idx]
if CalibrationMode[calib_mode] == CalibrationMode.DEGREE:
drive_mode = self.calibration["drive_mode"][calib_idx]
homing_offset = self.calibration["homing_offset"][calib_idx]
_, model = self.motors[name]
resolution = self.model_resolution[model]
# Convert from nominal 0-centered degree range [-180, 180] to
# 0-centered resolution range (e.g. [-2048, 2048] for resolution=4096)
values[i] = values[i] / HALF_TURN_DEGREE * (resolution // 2)
# Substract the homing offsets to come back to actual motor range of values
# which can be arbitrary.
values[i] -= homing_offset
# Remove drive mode, which is the rotation direction of the motor, to come back to
# actual motor rotation direction which can be arbitrary.
if drive_mode:
values[i] *= -1
elif CalibrationMode[calib_mode] == CalibrationMode.LINEAR:
start_pos = self.calibration["start_pos"][calib_idx]
end_pos = self.calibration["end_pos"][calib_idx]
# Convert from nominal lnear range of [0, 100] % to
# actual motor range of values which can be arbitrary.
values[i] = values[i] / 100 * (end_pos - start_pos) + start_pos
values = np.round(values).astype(np.int32)
# Convert from nominal range ]-resolution, resolution[ to centered signed int32 range [-2**31, 2**31[
for i, name in enumerate(motor_names):
homing_offset, drive_mode = self.calibration[name]
values[i] -= homing_offset
# Update direction of rotation of the motor that was matching between leader and follower to their original direction.
# In fact, the motor of the leader for a given joint can be assembled in an opposite direction in term of rotation
# than the motor of the follower on the same joint.
if drive_mode:
values[i] *= -1
return values
def _read_with_motor_ids(self, motor_models, motor_ids, data_name):
@@ -683,19 +863,7 @@ class DynamixelMotorsBus:
values = values.astype(np.int32)
if data_name in CALIBRATION_REQUIRED and self.calibration is not None:
values = self.apply_calibration(values, motor_names)
# We expect our motors to stay in a nominal range of [-180, 180] degrees
# which corresponds to a half turn rotation.
# However, some motors can turn a bit more, hence we extend the nominal range to [-270, 270]
# which is less than a full 360 degree rotation.
if not np.all((values > -270) & (values < 270)):
raise ValueError(
f"Wrong motor position range detected. "
f"Expected to be in [-270, +270] but in [{values.min()}, {values.max()}]. "
"This might be due to a cable connection issue creating an artificial 360 degrees jump in motor values. "
"You need to recalibrate by running: `python lerobot/scripts/control_robot.py calibrate`"
)
values = self.apply_calibration_autocorrect(values, motor_names)
# log the number of seconds it took to read the data from the motors
delta_ts_name = get_log_name("delta_timestamp_s", "read", data_name, motor_names)

View File

@@ -1,515 +0,0 @@
import pickle
import time
from dataclasses import dataclass, field, replace
from pathlib import Path
import numpy as np
import torch
from lerobot.common.robot_devices.cameras.utils import Camera
from lerobot.common.robot_devices.motors.dynamixel import (
OperatingMode,
TorqueMode,
convert_degrees_to_steps,
)
from lerobot.common.robot_devices.motors.utils import MotorsBus
from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
########################################################################
# Calibration logic
########################################################################
URL_TEMPLATE = (
"https://raw.githubusercontent.com/huggingface/lerobot/main/media/{robot}/{arm}_{position}.webp"
)
# In nominal degree range ]-180, +180[
ZERO_POSITION_DEGREE = 0
ROTATED_POSITION_DEGREE = 90
GRIPPER_OPEN_DEGREE = 35.156
def assert_drive_mode(drive_mode):
# `drive_mode` is in [0,1] with 0 means original rotation direction for the motor, and 1 means inverted.
if not np.all(np.isin(drive_mode, [0, 1])):
raise ValueError(f"`drive_mode` contains values other than 0 or 1: ({drive_mode})")
def apply_drive_mode(position, drive_mode):
assert_drive_mode(drive_mode)
# Convert `drive_mode` from [0, 1] with 0 indicates original rotation direction and 1 inverted,
# to [-1, 1] with 1 indicates original rotation direction and -1 inverted.
signed_drive_mode = -(drive_mode * 2 - 1)
position *= signed_drive_mode
return position
def reset_torque_mode(arm: MotorsBus):
# To be configured, all servos must be in "torque disable" mode
arm.write("Torque_Enable", TorqueMode.DISABLED.value)
# Use 'extended position mode' for all motors except gripper, because in joint mode the servos can't
# rotate more than 360 degrees (from 0 to 4095) And some mistake can happen while assembling the arm,
# you could end up with a servo with a position 0 or 4095 at a crucial point See [
# https://emanual.robotis.com/docs/en/dxl/x/x_series/#operating-mode11]
all_motors_except_gripper = [name for name in arm.motor_names if name != "gripper"]
if len(all_motors_except_gripper) > 0:
arm.write("Operating_Mode", OperatingMode.EXTENDED_POSITION.value, all_motors_except_gripper)
# Use 'position control current based' for gripper to be limited by the limit of the current.
# For the follower gripper, it means it can grasp an object without forcing too much even tho,
# it's goal position is a complete grasp (both gripper fingers are ordered to join and reach a touch).
# For the leader gripper, it means we can use it as a physical trigger, since we can force with our finger
# to make it move, and it will move back to its original target position when we release the force.
arm.write("Operating_Mode", OperatingMode.CURRENT_CONTROLLED_POSITION.value, "gripper")
def run_arm_calibration(arm: MotorsBus, name: str, arm_type: str):
"""This function ensures that a neural network trained on data collected on a given robot
can work on another robot. For instance before calibration, setting a same goal position
for each motor of two different robots will get two very different positions. But after calibration,
the two robots will move to the same position.To this end, this function computes the homing offset
and the drive mode for each motor of a given robot.
Homing offset is used to shift the motor position to a ]-2048, +2048[ nominal range (when the motor uses 2048 steps
to complete a half a turn). This range is set around an arbitrary "zero position" corresponding to all motor positions
being 0. During the calibration process, you will need to manually move the robot to this "zero position".
Drive mode is used to invert the rotation direction of the motor. This is useful when some motors have been assembled
in the opposite orientation for some robots. During the calibration process, you will need to manually move the robot
to the "rotated position".
After calibration, the homing offsets and drive modes are stored in a cache.
Example of usage:
```python
run_arm_calibration(arm, "left", "follower")
```
"""
reset_torque_mode(arm)
print(f"\nRunning calibration of {name} {arm_type}...")
print("\nMove arm to zero position")
print("See: " + URL_TEMPLATE.format(robot="koch", arm=arm_type, position="zero"))
input("Press Enter to continue...")
# We arbitrarely choosed our zero target position to be a straight horizontal position with gripper upwards and closed.
# It is easy to identify and all motors are in a "quarter turn" position. Once calibration is done, this position will
# corresponds to every motor angle being 0. If you set all 0 as Goal Position, the arm will move in this position.
zero_position = convert_degrees_to_steps(ZERO_POSITION_DEGREE, arm.motor_models)
def _compute_nearest_rounded_position(position, models):
# TODO(rcadene): Rework this function since some motors cant physically rotate a quarter turn
# (e.g. the gripper of Aloha arms can only rotate ~50 degree)
quarter_turn_degree = 90
quarter_turn = convert_degrees_to_steps(quarter_turn_degree, models)
nearest_pos = np.round(position.astype(float) / quarter_turn) * quarter_turn
return nearest_pos.astype(position.dtype)
# Compute homing offset so that `present_position + homing_offset ~= target_position`.
position = arm.read("Present_Position")
position = _compute_nearest_rounded_position(position, arm.motor_models)
homing_offset = zero_position - position
print("\nMove arm to rotated target position")
print("See: " + URL_TEMPLATE.format(robot="koch", arm=arm_type, position="rotated"))
input("Press Enter to continue...")
# The rotated target position corresponds to a rotation of a quarter turn from the zero position.
# This allows to identify the rotation direction of each motor.
# For instance, if the motor rotates 90 degree, and its value is -90 after applying the homing offset, then we know its rotation direction
# is inverted. However, for the calibration being successful, we need everyone to follow the same target position.
# Sometimes, there is only one possible rotation direction. For instance, if the gripper is closed, there is only one direction which
# corresponds to opening the gripper. When the rotation direction is ambiguous, we arbitrarely rotate clockwise from the point of view
# of the previous motor in the kinetic chain.
rotated_position = convert_degrees_to_steps(ROTATED_POSITION_DEGREE, arm.motor_models)
# Find drive mode by rotating each motor by a quarter of a turn.
# Drive mode indicates if the motor rotation direction should be inverted (=1) or not (=0).
position = arm.read("Present_Position")
position += homing_offset
position = _compute_nearest_rounded_position(position, arm.motor_models)
drive_mode = (position != rotated_position).astype(np.int32)
# Re-compute homing offset to take into account drive mode
position = arm.read("Present_Position")
position = apply_drive_mode(position, drive_mode)
position = _compute_nearest_rounded_position(position, arm.motor_models)
homing_offset = rotated_position - position
print("\nMove arm to rest position")
print("See: " + URL_TEMPLATE.format(robot="koch", arm=arm_type, position="rest"))
input("Press Enter to continue...")
print()
return homing_offset, drive_mode
########################################################################
# Alexander Koch robot arm
########################################################################
@dataclass
class KochRobotConfig:
"""
Example of usage:
```python
KochRobotConfig()
```
"""
# Define all components of the robot
leader_arms: dict[str, MotorsBus] = field(default_factory=lambda: {})
follower_arms: dict[str, MotorsBus] = field(default_factory=lambda: {})
cameras: dict[str, Camera] = field(default_factory=lambda: {})
class KochRobot:
# TODO(rcadene): Implement force feedback
"""This class allows to control any Koch robot of various number of motors.
A few versions are available:
- [Koch v1.0](https://github.com/AlexanderKoch-Koch/low_cost_robot), with and without the wrist-to-elbow expansion, which was developed
by Alexander Koch from [Tau Robotics](https://tau-robotics.com): [Github for sourcing and assembly](
- [Koch v1.1])https://github.com/jess-moss/koch-v1-1), which was developed by Jess Moss.
Example of highest frequency teleoperation without camera:
```python
# Defines how to communicate with the motors of the leader and follower arms
leader_arms = {
"main": DynamixelMotorsBus(
port="/dev/tty.usbmodem575E0031751",
motors={
# name: (index, model)
"shoulder_pan": (1, "xl330-m077"),
"shoulder_lift": (2, "xl330-m077"),
"elbow_flex": (3, "xl330-m077"),
"wrist_flex": (4, "xl330-m077"),
"wrist_roll": (5, "xl330-m077"),
"gripper": (6, "xl330-m077"),
},
),
}
follower_arms = {
"main": DynamixelMotorsBus(
port="/dev/tty.usbmodem575E0032081",
motors={
# name: (index, model)
"shoulder_pan": (1, "xl430-w250"),
"shoulder_lift": (2, "xl430-w250"),
"elbow_flex": (3, "xl330-m288"),
"wrist_flex": (4, "xl330-m288"),
"wrist_roll": (5, "xl330-m288"),
"gripper": (6, "xl330-m288"),
},
),
}
robot = KochRobot(leader_arms, follower_arms)
# Connect motors buses and cameras if any (Required)
robot.connect()
while True:
robot.teleop_step()
```
Example of highest frequency data collection without camera:
```python
# Assumes leader and follower arms have been instantiated already (see first example)
robot = KochRobot(leader_arms, follower_arms)
robot.connect()
while True:
observation, action = robot.teleop_step(record_data=True)
```
Example of highest frequency data collection with cameras:
```python
# Defines how to communicate with 2 cameras connected to the computer.
# Here, the webcam of the laptop and the phone (connected in USB to the laptop)
# can be reached respectively using the camera indices 0 and 1. These indices can be
# arbitrary. See the documentation of `OpenCVCamera` to find your own camera indices.
cameras = {
"laptop": OpenCVCamera(camera_index=0, fps=30, width=640, height=480),
"phone": OpenCVCamera(camera_index=1, fps=30, width=640, height=480),
}
# Assumes leader and follower arms have been instantiated already (see first example)
robot = KochRobot(leader_arms, follower_arms, cameras)
robot.connect()
while True:
observation, action = robot.teleop_step(record_data=True)
```
Example of controlling the robot with a policy (without running multiple policies in parallel to ensure highest frequency):
```python
# Assumes leader and follower arms + cameras have been instantiated already (see previous example)
robot = KochRobot(leader_arms, follower_arms, cameras)
robot.connect()
while True:
# Uses the follower arms and cameras to capture an observation
observation = robot.capture_observation()
# Assumes a policy has been instantiated
with torch.inference_mode():
action = policy.select_action(observation)
# Orders the robot to move
robot.send_action(action)
```
Example of disconnecting which is not mandatory since we disconnect when the object is deleted:
```python
robot.disconnect()
```
"""
def __init__(
self,
config: KochRobotConfig | None = None,
calibration_path: Path = ".cache/calibration/koch.pkl",
**kwargs,
):
if config is None:
config = KochRobotConfig()
# Overwrite config arguments using kwargs
self.config = replace(config, **kwargs)
self.calibration_path = Path(calibration_path)
self.leader_arms = self.config.leader_arms
self.follower_arms = self.config.follower_arms
self.cameras = self.config.cameras
self.is_connected = False
self.logs = {}
def connect(self):
if self.is_connected:
raise RobotDeviceAlreadyConnectedError(
"KochRobot is already connected. Do not run `robot.connect()` twice."
)
if not self.leader_arms and not self.follower_arms and not self.cameras:
raise ValueError(
"KochRobot doesn't have any device to connect. See example of usage in docstring of the class."
)
# Connect the arms
for name in self.follower_arms:
print(f"Connecting {name} follower arm.")
self.follower_arms[name].connect()
print(f"Connecting {name} leader arm.")
self.leader_arms[name].connect()
# Reset the arms and load or run calibration
if self.calibration_path.exists():
# Reset all arms before setting calibration
for name in self.follower_arms:
reset_torque_mode(self.follower_arms[name])
for name in self.leader_arms:
reset_torque_mode(self.leader_arms[name])
with open(self.calibration_path, "rb") as f:
calibration = pickle.load(f)
else:
print(f"Missing calibration file '{self.calibration_path}'. Starting calibration precedure.")
# Run calibration process which begins by reseting all arms
calibration = self.run_calibration()
print(f"Calibration is done! Saving calibration file '{self.calibration_path}'")
self.calibration_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.calibration_path, "wb") as f:
pickle.dump(calibration, f)
# Set calibration
for name in self.follower_arms:
self.follower_arms[name].set_calibration(calibration[f"follower_{name}"])
for name in self.leader_arms:
self.leader_arms[name].set_calibration(calibration[f"leader_{name}"])
# Set better PID values to close the gap between recored states and actions
# TODO(rcadene): Implement an automatic procedure to set optimial PID values for each motor
for name in self.follower_arms:
self.follower_arms[name].write("Position_P_Gain", 1500, "elbow_flex")
self.follower_arms[name].write("Position_I_Gain", 0, "elbow_flex")
self.follower_arms[name].write("Position_D_Gain", 600, "elbow_flex")
# Enable torque on all motors of the follower arms
for name in self.follower_arms:
print(f"Activating torque on {name} follower arm.")
self.follower_arms[name].write("Torque_Enable", 1)
# Enable torque on the gripper of the leader arms, and move it to 45 degrees,
# so that we can use it as a trigger to close the gripper of the follower arms.
for name in self.leader_arms:
self.leader_arms[name].write("Torque_Enable", 1, "gripper")
self.leader_arms[name].write("Goal_Position", GRIPPER_OPEN_DEGREE, "gripper")
# Connect the cameras
for name in self.cameras:
self.cameras[name].connect()
self.is_connected = True
def run_calibration(self):
calibration = {}
for name in self.follower_arms:
homing_offset, drive_mode = run_arm_calibration(self.follower_arms[name], name, "follower")
calibration[f"follower_{name}"] = {}
for idx, motor_name in enumerate(self.follower_arms[name].motor_names):
calibration[f"follower_{name}"][motor_name] = (homing_offset[idx], drive_mode[idx])
for name in self.leader_arms:
homing_offset, drive_mode = run_arm_calibration(self.leader_arms[name], name, "leader")
calibration[f"leader_{name}"] = {}
for idx, motor_name in enumerate(self.leader_arms[name].motor_names):
calibration[f"leader_{name}"][motor_name] = (homing_offset[idx], drive_mode[idx])
return calibration
def teleop_step(
self, record_data=False
) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
if not self.is_connected:
raise RobotDeviceNotConnectedError(
"KochRobot is not connected. You need to run `robot.connect()`."
)
# Prepare to assign the position of the leader to the follower
leader_pos = {}
for name in self.leader_arms:
before_lread_t = time.perf_counter()
leader_pos[name] = self.leader_arms[name].read("Present_Position")
self.logs[f"read_leader_{name}_pos_dt_s"] = time.perf_counter() - before_lread_t
follower_goal_pos = {}
for name in self.leader_arms:
follower_goal_pos[name] = leader_pos[name]
# Send action
for name in self.follower_arms:
before_fwrite_t = time.perf_counter()
self.follower_arms[name].write("Goal_Position", follower_goal_pos[name])
self.logs[f"write_follower_{name}_goal_pos_dt_s"] = time.perf_counter() - before_fwrite_t
# Early exit when recording data is not requested
if not record_data:
return
# TODO(rcadene): Add velocity and other info
# Read follower position
follower_pos = {}
for name in self.follower_arms:
before_fread_t = time.perf_counter()
follower_pos[name] = self.follower_arms[name].read("Present_Position")
self.logs[f"read_follower_{name}_pos_dt_s"] = time.perf_counter() - before_fread_t
# Create state by concatenating follower current position
state = []
for name in self.follower_arms:
if name in follower_pos:
state.append(follower_pos[name])
state = np.concatenate(state)
# Create action by concatenating follower goal position
action = []
for name in self.follower_arms:
if name in follower_goal_pos:
action.append(follower_goal_pos[name])
action = np.concatenate(action)
# Capture images from cameras
images = {}
for name in self.cameras:
before_camread_t = time.perf_counter()
images[name] = self.cameras[name].async_read()
self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
# Populate output dictionnaries and format to pytorch
obs_dict, action_dict = {}, {}
obs_dict["observation.state"] = torch.from_numpy(state)
action_dict["action"] = torch.from_numpy(action)
for name in self.cameras:
obs_dict[f"observation.images.{name}"] = torch.from_numpy(images[name])
return obs_dict, action_dict
def capture_observation(self):
"""The returned observations do not have a batch dimension."""
if not self.is_connected:
raise RobotDeviceNotConnectedError(
"KochRobot is not connected. You need to run `robot.connect()`."
)
# Read follower position
follower_pos = {}
for name in self.follower_arms:
before_fread_t = time.perf_counter()
follower_pos[name] = self.follower_arms[name].read("Present_Position")
self.logs[f"read_follower_{name}_pos_dt_s"] = time.perf_counter() - before_fread_t
# Create state by concatenating follower current position
state = []
for name in self.follower_arms:
if name in follower_pos:
state.append(follower_pos[name])
state = np.concatenate(state)
# Capture images from cameras
images = {}
for name in self.cameras:
before_camread_t = time.perf_counter()
images[name] = self.cameras[name].async_read()
self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
# Populate output dictionnaries and format to pytorch
obs_dict = {}
obs_dict["observation.state"] = torch.from_numpy(state)
for name in self.cameras:
obs_dict[f"observation.images.{name}"] = torch.from_numpy(images[name])
return obs_dict
def send_action(self, action: torch.Tensor):
"""The provided action is expected to be a vector."""
if not self.is_connected:
raise RobotDeviceNotConnectedError(
"KochRobot is not connected. You need to run `robot.connect()`."
)
from_idx = 0
to_idx = 0
follower_goal_pos = {}
for name in self.follower_arms:
if name in self.follower_arms:
to_idx += len(self.follower_arms[name].motor_names)
follower_goal_pos[name] = action[from_idx:to_idx].numpy()
from_idx = to_idx
for name in self.follower_arms:
self.follower_arms[name].write("Goal_Position", follower_goal_pos[name].astype(np.int32))
def disconnect(self):
if not self.is_connected:
raise RobotDeviceNotConnectedError(
"KochRobot is not connected. You need to run `robot.connect()` before disconnecting."
)
for name in self.follower_arms:
self.follower_arms[name].disconnect()
for name in self.leader_arms:
self.leader_arms[name].disconnect()
for name in self.cameras:
self.cameras[name].disconnect()
self.is_connected = False
def __del__(self):
if getattr(self, "is_connected", False):
self.disconnect()

View File

@@ -0,0 +1,703 @@
import json
import logging
import time
import warnings
from dataclasses import dataclass, field, replace
from pathlib import Path
from typing import Sequence
import numpy as np
import torch
from lerobot.common.robot_devices.cameras.utils import Camera
from lerobot.common.robot_devices.motors.dynamixel import (
CalibrationMode,
TorqueMode,
convert_degrees_to_steps,
)
from lerobot.common.robot_devices.motors.utils import MotorsBus
from lerobot.common.robot_devices.robots.utils import get_arm_id
from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
########################################################################
# Calibration logic
########################################################################
URL_TEMPLATE = (
"https://raw.githubusercontent.com/huggingface/lerobot/main/media/{robot}/{arm}_{position}.webp"
)
# The following positions are provided in nominal degree range ]-180, +180[
# For more info on these constants, see comments in the code where they get used.
ZERO_POSITION_DEGREE = 0
ROTATED_POSITION_DEGREE = 90
def assert_drive_mode(drive_mode):
# `drive_mode` is in [0,1] with 0 means original rotation direction for the motor, and 1 means inverted.
if not np.all(np.isin(drive_mode, [0, 1])):
raise ValueError(f"`drive_mode` contains values other than 0 or 1: ({drive_mode})")
def apply_drive_mode(position, drive_mode):
assert_drive_mode(drive_mode)
# Convert `drive_mode` from [0, 1] with 0 indicates original rotation direction and 1 inverted,
# to [-1, 1] with 1 indicates original rotation direction and -1 inverted.
signed_drive_mode = -(drive_mode * 2 - 1)
position *= signed_drive_mode
return position
def compute_nearest_rounded_position(position, models):
delta_turn = convert_degrees_to_steps(ROTATED_POSITION_DEGREE, models)
nearest_pos = np.round(position.astype(float) / delta_turn) * delta_turn
return nearest_pos.astype(position.dtype)
def run_arm_calibration(arm: MotorsBus, robot_type: str, arm_name: str, arm_type: str):
"""This function ensures that a neural network trained on data collected on a given robot
can work on another robot. For instance before calibration, setting a same goal position
for each motor of two different robots will get two very different positions. But after calibration,
the two robots will move to the same position.To this end, this function computes the homing offset
and the drive mode for each motor of a given robot.
Homing offset is used to shift the motor position to a ]-2048, +2048[ nominal range (when the motor uses 2048 steps
to complete a half a turn). This range is set around an arbitrary "zero position" corresponding to all motor positions
being 0. During the calibration process, you will need to manually move the robot to this "zero position".
Drive mode is used to invert the rotation direction of the motor. This is useful when some motors have been assembled
in the opposite orientation for some robots. During the calibration process, you will need to manually move the robot
to the "rotated position".
After calibration, the homing offsets and drive modes are stored in a cache.
Example of usage:
```python
run_arm_calibration(arm, "koch", "left", "follower")
```
"""
if (arm.read("Torque_Enable") != TorqueMode.DISABLED.value).any():
raise ValueError("To run calibration, the torque must be disabled on all motors.")
print(f"\nRunning calibration of {robot_type} {arm_name} {arm_type}...")
print("\nMove arm to zero position")
print("See: " + URL_TEMPLATE.format(robot=robot_type, arm=arm_type, position="zero"))
input("Press Enter to continue...")
# We arbitrarily chose our zero target position to be a straight horizontal position with gripper upwards and closed.
# It is easy to identify and all motors are in a "quarter turn" position. Once calibration is done, this position will
# correspond to every motor angle being 0. If you set all 0 as Goal Position, the arm will move in this position.
zero_target_pos = convert_degrees_to_steps(ZERO_POSITION_DEGREE, arm.motor_models)
# Compute homing offset so that `present_position + homing_offset ~= target_position`.
zero_pos = arm.read("Present_Position")
zero_nearest_pos = compute_nearest_rounded_position(zero_pos, arm.motor_models)
homing_offset = zero_target_pos - zero_nearest_pos
# The rotated target position corresponds to a rotation of a quarter turn from the zero position.
# This allows to identify the rotation direction of each motor.
# For instance, if the motor rotates 90 degree, and its value is -90 after applying the homing offset, then we know its rotation direction
# is inverted. However, for the calibration being successful, we need everyone to follow the same target position.
# Sometimes, there is only one possible rotation direction. For instance, if the gripper is closed, there is only one direction which
# corresponds to opening the gripper. When the rotation direction is ambiguous, we arbitrarely rotate clockwise from the point of view
# of the previous motor in the kinetic chain.
print("\nMove arm to rotated target position")
print("See: " + URL_TEMPLATE.format(robot=robot_type, arm=arm_type, position="rotated"))
input("Press Enter to continue...")
rotated_target_pos = convert_degrees_to_steps(ROTATED_POSITION_DEGREE, arm.motor_models)
# Find drive mode by rotating each motor by a quarter of a turn.
# Drive mode indicates if the motor rotation direction should be inverted (=1) or not (=0).
rotated_pos = arm.read("Present_Position")
drive_mode = (rotated_pos < zero_pos).astype(np.int32)
# Re-compute homing offset to take into account drive mode
rotated_drived_pos = apply_drive_mode(rotated_pos, drive_mode)
rotated_nearest_pos = compute_nearest_rounded_position(rotated_drived_pos, arm.motor_models)
homing_offset = rotated_target_pos - rotated_nearest_pos
print("\nMove arm to rest position")
print("See: " + URL_TEMPLATE.format(robot=robot_type, arm=arm_type, position="rest"))
input("Press Enter to continue...")
print()
# Joints with rotational motions are expressed in degrees in nominal range of [-180, 180]
calib_mode = [CalibrationMode.DEGREE.name] * len(arm.motor_names)
# TODO(rcadene): make type of joints (DEGREE or LINEAR) configurable from yaml?
if robot_type == "aloha" and "gripper" in arm.motor_names:
# Joints with linear motions (like gripper of Aloha) are experessed in nominal range of [0, 100]
calib_idx = arm.motor_names.index("gripper")
calib_mode[calib_idx] = CalibrationMode.LINEAR.name
calib_data = {
"homing_offset": homing_offset.tolist(),
"drive_mode": drive_mode.tolist(),
"start_pos": zero_pos.tolist(),
"end_pos": rotated_pos.tolist(),
"calib_mode": calib_mode,
"motor_names": arm.motor_names,
}
return calib_data
def ensure_safe_goal_position(
goal_pos: torch.Tensor, present_pos: torch.Tensor, max_relative_target: float | list[float]
):
# Cap relative action target magnitude for safety.
diff = goal_pos - present_pos
max_relative_target = torch.tensor(max_relative_target)
safe_diff = torch.minimum(diff, max_relative_target)
safe_diff = torch.maximum(safe_diff, -max_relative_target)
safe_goal_pos = present_pos + safe_diff
if not torch.allclose(goal_pos, safe_goal_pos):
logging.warning(
"Relative goal position magnitude had to be clamped to be safe.\n"
f" requested relative goal position target: {diff}\n"
f" clamped relative goal position target: {safe_diff}"
)
return safe_goal_pos
########################################################################
# Manipulator robot
########################################################################
@dataclass
class ManipulatorRobotConfig:
"""
Example of usage:
```python
ManipulatorRobotConfig()
```
"""
# Define all components of the robot
robot_type: str | None = None
leader_arms: dict[str, MotorsBus] = field(default_factory=lambda: {})
follower_arms: dict[str, MotorsBus] = field(default_factory=lambda: {})
cameras: dict[str, Camera] = field(default_factory=lambda: {})
# Optionally limit the magnitude of the relative positional target vector for safety purposes.
# Set this to a positive scalar to have the same value for all motors, or a list that is the same length
# as the number of motors in your follower arms (assumes all follower arms have the same number of
# motors).
max_relative_target: list[float] | float | None = None
# Optionally set the leader arm in torque mode with the gripper motor set to this angle. This makes it
# possible to squeeze the gripper and have it spring back to an open position on its own. If None, the
# gripper is not put in torque mode.
gripper_open_degree: float | None = None
def __setattr__(self, prop: str, val):
if prop == "max_relative_target" and val is not None and isinstance(val, Sequence):
for name in self.follower_arms:
if len(self.follower_arms[name].motors) != len(val):
raise ValueError(
f"len(max_relative_target)={len(val)} but the follower arm with name {name} has "
f"{len(self.follower_arms[name].motors)} motors. Please make sure that the "
f"`max_relative_target` list has as many parameters as there are motors per arm. "
"Note: This feature does not yet work with robots where different follower arms have "
"different numbers of motors."
)
super().__setattr__(prop, val)
class ManipulatorRobot:
# TODO(rcadene): Implement force feedback
"""This class allows to control any manipulator robot of various number of motors.
Non exaustive list of robots:
- [Koch v1.0](https://github.com/AlexanderKoch-Koch/low_cost_robot), with and without the wrist-to-elbow expansion, developed
by Alexander Koch from [Tau Robotics](https://tau-robotics.com)
- [Koch v1.1](https://github.com/jess-moss/koch-v1-1) developed by Jess Moss
- [Aloha](https://www.trossenrobotics.com/aloha-kits) developed by Trossen Robotics
Example of highest frequency teleoperation without camera:
```python
# Defines how to communicate with the motors of the leader and follower arms
leader_arms = {
"main": DynamixelMotorsBus(
port="/dev/tty.usbmodem575E0031751",
motors={
# name: (index, model)
"shoulder_pan": (1, "xl330-m077"),
"shoulder_lift": (2, "xl330-m077"),
"elbow_flex": (3, "xl330-m077"),
"wrist_flex": (4, "xl330-m077"),
"wrist_roll": (5, "xl330-m077"),
"gripper": (6, "xl330-m077"),
},
),
}
follower_arms = {
"main": DynamixelMotorsBus(
port="/dev/tty.usbmodem575E0032081",
motors={
# name: (index, model)
"shoulder_pan": (1, "xl430-w250"),
"shoulder_lift": (2, "xl430-w250"),
"elbow_flex": (3, "xl330-m288"),
"wrist_flex": (4, "xl330-m288"),
"wrist_roll": (5, "xl330-m288"),
"gripper": (6, "xl330-m288"),
},
),
}
robot = ManipulatorRobot(
robot_type="koch",
calibration_dir=".cache/calibration/koch",
leader_arms=leader_arms,
follower_arms=follower_arms,
)
# Connect motors buses and cameras if any (Required)
robot.connect()
while True:
robot.teleop_step()
```
Example of highest frequency data collection without camera:
```python
# Assumes leader and follower arms have been instantiated already (see first example)
robot = ManipulatorRobot(
robot_type="koch",
calibration_dir=".cache/calibration/koch",
leader_arms=leader_arms,
follower_arms=follower_arms,
)
robot.connect()
while True:
observation, action = robot.teleop_step(record_data=True)
```
Example of highest frequency data collection with cameras:
```python
# Defines how to communicate with 2 cameras connected to the computer.
# Here, the webcam of the laptop and the phone (connected in USB to the laptop)
# can be reached respectively using the camera indices 0 and 1. These indices can be
# arbitrary. See the documentation of `OpenCVCamera` to find your own camera indices.
cameras = {
"laptop": OpenCVCamera(camera_index=0, fps=30, width=640, height=480),
"phone": OpenCVCamera(camera_index=1, fps=30, width=640, height=480),
}
# Assumes leader and follower arms have been instantiated already (see first example)
robot = ManipulatorRobot(
robot_type="koch",
calibration_dir=".cache/calibration/koch",
leader_arms=leader_arms,
follower_arms=follower_arms,
cameras=cameras,
)
robot.connect()
while True:
observation, action = robot.teleop_step(record_data=True)
```
Example of controlling the robot with a policy (without running multiple policies in parallel to ensure highest frequency):
```python
# Assumes leader and follower arms + cameras have been instantiated already (see previous example)
robot = ManipulatorRobot(
robot_type="koch",
calibration_dir=".cache/calibration/koch",
leader_arms=leader_arms,
follower_arms=follower_arms,
cameras=cameras,
)
robot.connect()
while True:
# Uses the follower arms and cameras to capture an observation
observation = robot.capture_observation()
# Assumes a policy has been instantiated
with torch.inference_mode():
action = policy.select_action(observation)
# Orders the robot to move
robot.send_action(action)
```
Example of disconnecting which is not mandatory since we disconnect when the object is deleted:
```python
robot.disconnect()
```
"""
def __init__(
self,
config: ManipulatorRobotConfig | None = None,
calibration_dir: Path = ".cache/calibration/koch",
**kwargs,
):
if config is None:
config = ManipulatorRobotConfig()
# Overwrite config arguments using kwargs
self.config = replace(config, **kwargs)
self.calibration_dir = Path(calibration_dir)
self.robot_type = self.config.robot_type
self.leader_arms = self.config.leader_arms
self.follower_arms = self.config.follower_arms
self.cameras = self.config.cameras
self.is_connected = False
self.logs = {}
def connect(self):
if self.is_connected:
raise RobotDeviceAlreadyConnectedError(
"ManipulatorRobot is already connected. Do not run `robot.connect()` twice."
)
if not self.leader_arms and not self.follower_arms and not self.cameras:
raise ValueError(
"ManipulatorRobot doesn't have any device to connect. See example of usage in docstring of the class."
)
# Connect the arms
for name in self.follower_arms:
print(f"Connecting {name} follower arm.")
self.follower_arms[name].connect()
print(f"Connecting {name} leader arm.")
self.leader_arms[name].connect()
# We assume that at connection time, arms are in a rest position, and torque can
# be safely disabled to run calibration and/or set robot preset configurations.
for name in self.follower_arms:
self.follower_arms[name].write("Torque_Enable", TorqueMode.DISABLED.value)
for name in self.leader_arms:
self.leader_arms[name].write("Torque_Enable", TorqueMode.DISABLED.value)
self.activate_calibration()
# Set robot preset (e.g. torque in leader gripper for Koch v1.1)
if self.robot_type == "koch":
self.set_koch_robot_preset()
elif self.robot_type == "aloha":
self.set_aloha_robot_preset()
else:
warnings.warn(f"No preset found for robot type: {self.robot_type}", stacklevel=1)
# Enable torque on all motors of the follower arms
for name in self.follower_arms:
print(f"Activating torque on {name} follower arm.")
self.follower_arms[name].write("Torque_Enable", 1)
if self.config.gripper_open_degree is not None:
# Set the leader arm in torque mode with the gripper motor set to an angle. This makes it possible
# to squeeze the gripper and have it spring back to an open position on its own.
for name in self.leader_arms:
self.leader_arms[name].write("Torque_Enable", 1, "gripper")
self.leader_arms[name].write("Goal_Position", self.config.gripper_open_degree, "gripper")
# Connect the cameras
for name in self.cameras:
self.cameras[name].connect()
self.is_connected = True
def activate_calibration(self):
"""After calibration all motors function in human interpretable ranges.
Rotations are expressed in degrees in nominal range of [-180, 180],
and linear motions (like gripper of Aloha) in nominal range of [0, 100].
"""
def load_or_run_calibration_(name, arm, arm_type):
arm_id = get_arm_id(name, arm_type)
arm_calib_path = self.calibration_dir / f"{arm_id}.json"
if arm_calib_path.exists():
with open(arm_calib_path) as f:
calibration = json.load(f)
else:
print(f"Missing calibration file '{arm_calib_path}'")
calibration = run_arm_calibration(arm, self.robot_type, name, arm_type)
print(f"Calibration is done! Saving calibration file '{arm_calib_path}'")
arm_calib_path.parent.mkdir(parents=True, exist_ok=True)
with open(arm_calib_path, "w") as f:
json.dump(calibration, f)
return calibration
for name, arm in self.follower_arms.items():
calibration = load_or_run_calibration_(name, arm, "follower")
arm.set_calibration(calibration)
for name, arm in self.leader_arms.items():
calibration = load_or_run_calibration_(name, arm, "leader")
arm.set_calibration(calibration)
def set_koch_robot_preset(self):
def set_operating_mode_(arm):
if (arm.read("Torque_Enable") != TorqueMode.DISABLED.value).any():
raise ValueError("To run set robot preset, the torque must be disabled on all motors.")
# Use 'extended position mode' for all motors except gripper, because in joint mode the servos can't
# rotate more than 360 degrees (from 0 to 4095) And some mistake can happen while assembling the arm,
# you could end up with a servo with a position 0 or 4095 at a crucial point See [
# https://emanual.robotis.com/docs/en/dxl/x/x_series/#operating-mode11]
all_motors_except_gripper = [name for name in arm.motor_names if name != "gripper"]
if len(all_motors_except_gripper) > 0:
# 4 corresponds to Extended Position on Koch motors
arm.write("Operating_Mode", 4, all_motors_except_gripper)
# Use 'position control current based' for gripper to be limited by the limit of the current.
# For the follower gripper, it means it can grasp an object without forcing too much even tho,
# it's goal position is a complete grasp (both gripper fingers are ordered to join and reach a touch).
# For the leader gripper, it means we can use it as a physical trigger, since we can force with our finger
# to make it move, and it will move back to its original target position when we release the force.
# 5 corresponds to Current Controlled Position on Koch gripper motors "xl330-m077, xl330-m288"
arm.write("Operating_Mode", 5, "gripper")
for name in self.follower_arms:
set_operating_mode_(self.follower_arms[name])
# Set better PID values to close the gap between recorded states and actions
# TODO(rcadene): Implement an automatic procedure to set optimial PID values for each motor
self.follower_arms[name].write("Position_P_Gain", 1500, "elbow_flex")
self.follower_arms[name].write("Position_I_Gain", 0, "elbow_flex")
self.follower_arms[name].write("Position_D_Gain", 600, "elbow_flex")
if self.config.gripper_open_degree is not None:
for name in self.leader_arms:
set_operating_mode_(self.leader_arms[name])
# Enable torque on the gripper of the leader arms, and move it to 45 degrees,
# so that we can use it as a trigger to close the gripper of the follower arms.
self.leader_arms[name].write("Torque_Enable", 1, "gripper")
self.leader_arms[name].write("Goal_Position", self.config.gripper_open_degree, "gripper")
def set_aloha_robot_preset(self):
def set_shadow_(arm):
# Set secondary/shadow ID for shoulder and elbow. These joints have two motors.
# As a result, if only one of them is required to move to a certain position,
# the other will follow. This is to avoid breaking the motors.
if "shoulder_shadow" in arm.motor_names:
shoulder_idx = arm.read("ID", "shoulder")
arm.write("Secondary_ID", shoulder_idx, "shoulder_shadow")
if "elbow_shadow" in arm.motor_names:
elbow_idx = arm.read("ID", "elbow")
arm.write("Secondary_ID", elbow_idx, "elbow_shadow")
for name in self.follower_arms:
set_shadow_(self.follower_arms[name])
for name in self.leader_arms:
set_shadow_(self.leader_arms[name])
for name in self.follower_arms:
# Set a velocity limit of 131 as advised by Trossen Robotics
self.follower_arms[name].write("Velocity_Limit", 131)
# Use 'extended position mode' for all motors except gripper, because in joint mode the servos can't
# rotate more than 360 degrees (from 0 to 4095) And some mistake can happen while assembling the arm,
# you could end up with a servo with a position 0 or 4095 at a crucial point See [
# https://emanual.robotis.com/docs/en/dxl/x/x_series/#operating-mode11]
all_motors_except_gripper = [
name for name in self.follower_arms[name].motor_names if name != "gripper"
]
if len(all_motors_except_gripper) > 0:
# 4 corresponds to Extended Position on Aloha motors
self.follower_arms[name].write("Operating_Mode", 4, all_motors_except_gripper)
# Use 'position control current based' for follower gripper to be limited by the limit of the current.
# It can grasp an object without forcing too much even tho,
# it's goal position is a complete grasp (both gripper fingers are ordered to join and reach a touch).
# 5 corresponds to Current Controlled Position on Aloha gripper follower "xm430-w350"
self.follower_arms[name].write("Operating_Mode", 5, "gripper")
# Note: We can't enable torque on the leader gripper since "xc430-w150" doesn't have
# a Current Controlled Position mode.
if self.config.gripper_open_degree is not None:
warnings.warn(
f"`gripper_open_degree` is set to {self.config.gripper_open_degree}, but None is expected for Aloha instead",
stacklevel=1,
)
def teleop_step(
self, record_data=False
) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
if not self.is_connected:
raise RobotDeviceNotConnectedError(
"ManipulatorRobot is not connected. You need to run `robot.connect()`."
)
# Prepare to assign the position of the leader to the follower
leader_pos = {}
for name in self.leader_arms:
before_lread_t = time.perf_counter()
leader_pos[name] = self.leader_arms[name].read("Present_Position")
leader_pos[name] = torch.from_numpy(leader_pos[name])
self.logs[f"read_leader_{name}_pos_dt_s"] = time.perf_counter() - before_lread_t
# Send goal position to the follower
follower_goal_pos = {}
for name in self.follower_arms:
before_fwrite_t = time.perf_counter()
goal_pos = leader_pos[name]
# Cap goal position when too far away from present position.
# Slower fps expected due to reading from the follower.
if self.config.max_relative_target is not None:
present_pos = self.follower_arms[name].read("Present_Position")
present_pos = torch.from_numpy(present_pos)
goal_pos = ensure_safe_goal_position(goal_pos, present_pos, self.config.max_relative_target)
# Used when record_data=True
follower_goal_pos[name] = goal_pos
goal_pos = goal_pos.numpy().astype(np.int32)
self.follower_arms[name].write("Goal_Position", goal_pos)
self.logs[f"write_follower_{name}_goal_pos_dt_s"] = time.perf_counter() - before_fwrite_t
# Early exit when recording data is not requested
if not record_data:
return
# TODO(rcadene): Add velocity and other info
# Read follower position
follower_pos = {}
for name in self.follower_arms:
before_fread_t = time.perf_counter()
follower_pos[name] = self.follower_arms[name].read("Present_Position")
follower_pos[name] = torch.from_numpy(follower_pos[name])
self.logs[f"read_follower_{name}_pos_dt_s"] = time.perf_counter() - before_fread_t
# Create state by concatenating follower current position
state = []
for name in self.follower_arms:
if name in follower_pos:
state.append(follower_pos[name])
state = torch.cat(state)
# Create action by concatenating follower goal position
action = []
for name in self.follower_arms:
if name in follower_goal_pos:
action.append(follower_goal_pos[name])
action = torch.cat(action)
# Capture images from cameras
images = {}
for name in self.cameras:
before_camread_t = time.perf_counter()
images[name] = self.cameras[name].async_read()
images[name] = torch.from_numpy(images[name])
self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
# Populate output dictionnaries
obs_dict, action_dict = {}, {}
obs_dict["observation.state"] = state
action_dict["action"] = action
for name in self.cameras:
obs_dict[f"observation.images.{name}"] = images[name]
return obs_dict, action_dict
def capture_observation(self):
"""The returned observations do not have a batch dimension."""
if not self.is_connected:
raise RobotDeviceNotConnectedError(
"ManipulatorRobot is not connected. You need to run `robot.connect()`."
)
# Read follower position
follower_pos = {}
for name in self.follower_arms:
before_fread_t = time.perf_counter()
follower_pos[name] = self.follower_arms[name].read("Present_Position")
follower_pos[name] = torch.from_numpy(follower_pos[name])
self.logs[f"read_follower_{name}_pos_dt_s"] = time.perf_counter() - before_fread_t
# Create state by concatenating follower current position
state = []
for name in self.follower_arms:
if name in follower_pos:
state.append(follower_pos[name])
state = torch.cat(state)
# Capture images from cameras
images = {}
for name in self.cameras:
before_camread_t = time.perf_counter()
images[name] = self.cameras[name].async_read()
images[name] = torch.from_numpy(images[name])
self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
# Populate output dictionnaries and format to pytorch
obs_dict = {}
obs_dict["observation.state"] = state
for name in self.cameras:
obs_dict[f"observation.images.{name}"] = images[name]
return obs_dict
def send_action(self, action: torch.Tensor) -> torch.Tensor:
"""Command the follower arms to move to a target joint configuration.
The relative action magnitude may be clipped depending on the configuration parameter
`max_relative_target`. In this case, the action sent differs from original action.
Thus, this function always returns the action actually sent.
Args:
action: tensor containing the concatenated goal positions for the follower arms.
"""
if not self.is_connected:
raise RobotDeviceNotConnectedError(
"ManipulatorRobot is not connected. You need to run `robot.connect()`."
)
from_idx = 0
to_idx = 0
action_sent = []
for name in self.follower_arms:
# Get goal position of each follower arm by splitting the action vector
to_idx += len(self.follower_arms[name].motor_names)
goal_pos = action[from_idx:to_idx]
from_idx = to_idx
# Cap goal position when too far away from present position.
# Slower fps expected due to reading from the follower.
if self.config.max_relative_target is not None:
present_pos = self.follower_arms[name].read("Present_Position")
present_pos = torch.from_numpy(present_pos)
goal_pos = ensure_safe_goal_position(goal_pos, present_pos, self.config.max_relative_target)
# Save tensor to concat and return
action_sent.append(goal_pos)
# Send goal position to each follower
goal_pos = goal_pos.numpy().astype(np.int32)
self.follower_arms[name].write("Goal_Position", goal_pos)
return torch.cat(action_sent)
def disconnect(self):
if not self.is_connected:
raise RobotDeviceNotConnectedError(
"ManipulatorRobot is not connected. You need to run `robot.connect()` before disconnecting."
)
for name in self.follower_arms:
self.follower_arms[name].disconnect()
for name in self.leader_arms:
self.leader_arms[name].disconnect()
for name in self.cameras:
self.cameras[name].disconnect()
self.is_connected = False
def __del__(self):
if getattr(self, "is_connected", False):
self.disconnect()

View File

@@ -1,6 +1,13 @@
from typing import Protocol
def get_arm_id(name, arm_type):
"""Returns the string identifier of a robot arm. For instance, for a bimanual manipulator
like Aloha, it could be left_follower, right_follower, left_leader, or right_leader.
"""
return f"{name}_{arm_type}"
class Robot(Protocol):
def init_teleop(self): ...
def run_calibration(self): ...

View File

@@ -1,3 +1,21 @@
import platform
import time
def busy_wait(seconds):
if platform.system() == "Darwin":
# On Mac, `time.sleep` is not accurate and we need to use this while loop trick,
# but it consumes CPU cycles.
# TODO(rcadene): find an alternative: from python 11, time.sleep is precise
end_time = time.perf_counter() + seconds
while time.perf_counter() < end_time:
pass
else:
# On Linux time.sleep is accurate
if seconds > 0:
time.sleep(seconds)
class RobotDeviceNotConnectedError(Exception):
"""Exception raised when the robot device is not connected."""

View File

@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import os.path as osp
import random
from contextlib import contextmanager
@@ -27,6 +28,12 @@ import torch
from omegaconf import DictConfig
def inside_slurm():
"""Check whether the python process was launched through slurm"""
# TODO(rcadene): return False for interactive mode `--pty bash`
return "SLURM_JOB_ID" in os.environ
def get_safe_torch_device(cfg_device: str, log: bool = False) -> torch.device:
"""Given a string, return a torch.device with checks on whether the device is available."""
match cfg_device:
@@ -158,7 +165,6 @@ def init_hydra_config(config_path: str, overrides: list[str] | None = None) -> D
version_base="1.2",
)
cfg = hydra.compose(Path(config_path).stem, overrides)
return cfg

View File

@@ -120,7 +120,7 @@ eval:
# `batch_size` specifies the number of environments to use in a gym.vector.VectorEnv.
batch_size: 1
# `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
use_async_envs: false
use_async_envs: true
wandb:
enable: false

View File

@@ -2,6 +2,11 @@
fps: 50
eval:
# `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
# set it to false to avoid some problems of the aloha env
use_async_envs: false
env:
name: aloha
task: AlohaInsertion-v0

View File

@@ -2,6 +2,11 @@
fps: 15
eval:
# `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
# set it to false to avoid some problems of the aloha env
use_async_envs: false
env:
name: xarm
task: XarmLift-v0

View File

@@ -12,6 +12,7 @@ training:
grad_clip_norm: 10.0
lr: 3e-4
save_freq: 10000
eval_freq: 5000
log_freq: 100

View File

@@ -0,0 +1,115 @@
# Aloha: A Low-Cost Hardware for Bimanual Teleoperation
# https://aloha-2.github.io
# https://www.trossenrobotics.com/aloha-stationary
# Requires installing extras packages
# With pip: `pip install -e ".[dynamixel intelrealsense]"`
# With poetry: `poetry install --sync --extras "dynamixel intelrealsense"`
_target_: lerobot.common.robot_devices.robots.manipulator.ManipulatorRobot
robot_type: aloha
# Specific to Aloha, LeRobot comes with default calibration files. Assuming the motors have been
# properly assembled, no manual calibration step is expected. If you need to run manual calibration,
# simply update this path to ".cache/calibration/aloha"
calibration_dir: .cache/calibration/aloha_default
# /!\ FOR SAFETY, READ THIS /!\
# `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes.
# Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
# the number of motors in your follower arms.
# For Aloha, for every goal position request, motor rotations are capped at 5 degrees by default.
# When you feel more confident with teleoperation or running the policy, you can extend
# this safety limit and even removing it by setting it to `null`.
# Also, everything is expected to work safely out-of-the-box, but we highly advise to
# first try to teleoperate the grippers only (by commenting out the rest of the motors in this yaml),
# then to gradually add more motors (by uncommenting), until you can teleoperate both arms fully
max_relative_target: 5
leader_arms:
left:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/ttyDXL_leader_left
motors: # window_x
# name: (index, model)
waist: [1, xm430-w350]
shoulder: [2, xm430-w350]
shoulder_shadow: [3, xm430-w350]
elbow: [4, xm430-w350]
elbow_shadow: [5, xm430-w350]
forearm_roll: [6, xm430-w350]
wrist_angle: [7, xm430-w350]
wrist_rotate: [8, xl430-w250]
gripper: [9, xc430-w150]
right:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/ttyDXL_leader_right
motors: # window_x
# name: (index, model)
waist: [1, xm430-w350]
shoulder: [2, xm430-w350]
shoulder_shadow: [3, xm430-w350]
elbow: [4, xm430-w350]
elbow_shadow: [5, xm430-w350]
forearm_roll: [6, xm430-w350]
wrist_angle: [7, xm430-w350]
wrist_rotate: [8, xl430-w250]
gripper: [9, xc430-w150]
follower_arms:
left:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/ttyDXL_follower_left
motors:
# name: [index, model]
waist: [1, xm540-w270]
shoulder: [2, xm540-w270]
shoulder_shadow: [3, xm540-w270]
elbow: [4, xm540-w270]
elbow_shadow: [5, xm540-w270]
forearm_roll: [6, xm540-w270]
wrist_angle: [7, xm540-w270]
wrist_rotate: [8, xm430-w350]
gripper: [9, xm430-w350]
right:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/ttyDXL_follower_right
motors:
# name: [index, model]
waist: [1, xm540-w270]
shoulder: [2, xm540-w270]
shoulder_shadow: [3, xm540-w270]
elbow: [4, xm540-w270]
elbow_shadow: [5, xm540-w270]
forearm_roll: [6, xm540-w270]
wrist_angle: [7, xm540-w270]
wrist_rotate: [8, xm430-w350]
gripper: [9, xm430-w350]
# Troubleshooting: If one of your IntelRealSense cameras freeze during
# data recording due to bandwidth limit, you might need to plug the camera
# on another USB hub or PCIe card.
cameras:
cam_high:
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
camera_index: 128422271347
fps: 30
width: 640
height: 480
cam_low:
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
camera_index: 130322270656
fps: 30
width: 640
height: 480
cam_left_wrist:
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
camera_index: 218622272670
fps: 30
width: 640
height: 480
cam_right_wrist:
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
camera_index: 130322272300
fps: 30
width: 640
height: 480

View File

@@ -1,5 +1,12 @@
_target_: lerobot.common.robot_devices.robots.koch.KochRobot
calibration_path: .cache/calibration/koch.pkl
_target_: lerobot.common.robot_devices.robots.manipulator.ManipulatorRobot
robot_type: koch
calibration_dir: .cache/calibration/koch
# `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes.
# Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
# the number of motors in your follower arms.
max_relative_target: null
leader_arms:
main:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
@@ -12,6 +19,7 @@ leader_arms:
wrist_flex: [4, "xl330-m077"]
wrist_roll: [5, "xl330-m077"]
gripper: [6, "xl330-m077"]
follower_arms:
main:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
@@ -24,6 +32,7 @@ follower_arms:
wrist_flex: [4, "xl330-m288"]
wrist_roll: [5, "xl330-m288"]
gripper: [6, "xl330-m288"]
cameras:
laptop:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
@@ -37,3 +46,8 @@ cameras:
fps: 30
width: 640
height: 480
# ~ Koch specific settings ~
# Sets the leader arm in torque mode with the gripper motor set to this angle. This makes it possible
# to squeeze the gripper and have it spring back to an open position on its own.
gripper_open_degree: 35.156

View File

@@ -0,0 +1,75 @@
_target_: lerobot.common.robot_devices.robots.manipulator.ManipulatorRobot
robot_type: koch
calibration_dir: .cache/calibration/koch_bimanual
# `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes.
# Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
# the number of motors in your follower arms.
max_relative_target: null
leader_arms:
left:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/tty.usbmodem585A0085511
motors:
# name: (index, model)
shoulder_pan: [1, "xl330-m077"]
shoulder_lift: [2, "xl330-m077"]
elbow_flex: [3, "xl330-m077"]
wrist_flex: [4, "xl330-m077"]
wrist_roll: [5, "xl330-m077"]
gripper: [6, "xl330-m077"]
right:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/tty.usbmodem575E0031751
motors:
# name: (index, model)
shoulder_pan: [1, "xl330-m077"]
shoulder_lift: [2, "xl330-m077"]
elbow_flex: [3, "xl330-m077"]
wrist_flex: [4, "xl330-m077"]
wrist_roll: [5, "xl330-m077"]
gripper: [6, "xl330-m077"]
follower_arms:
left:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/tty.usbmodem585A0076891
motors:
# name: (index, model)
shoulder_pan: [1, "xl430-w250"]
shoulder_lift: [2, "xl430-w250"]
elbow_flex: [3, "xl330-m288"]
wrist_flex: [4, "xl330-m288"]
wrist_roll: [5, "xl330-m288"]
gripper: [6, "xl330-m288"]
right:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/tty.usbmodem575E0032081
motors:
# name: (index, model)
shoulder_pan: [1, "xl430-w250"]
shoulder_lift: [2, "xl430-w250"]
elbow_flex: [3, "xl330-m288"]
wrist_flex: [4, "xl330-m288"]
wrist_roll: [5, "xl330-m288"]
gripper: [6, "xl330-m288"]
cameras:
laptop:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
camera_index: 0
fps: 30
width: 640
height: 480
phone:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
camera_index: 1
fps: 30
width: 640
height: 480
# ~ Koch specific settings ~
# Sets the leader arm in torque mode with the gripper motor set to this angle. This makes it possible
# to squeeze the gripper and have it spring back to an open position on its own.
gripper_open_degree: 35.156

View File

@@ -127,7 +127,8 @@ from lerobot.common.datasets.utils import calculate_episode_data_index, create_b
from lerobot.common.datasets.video_utils import encode_video_frames
from lerobot.common.policies.factory import make_policy
from lerobot.common.robot_devices.robots.factory import make_robot
from lerobot.common.robot_devices.robots.utils import Robot
from lerobot.common.robot_devices.robots.utils import Robot, get_arm_id
from lerobot.common.robot_devices.utils import busy_wait
from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed
from lerobot.scripts.eval import get_pretrained_policy_path
from lerobot.scripts.push_dataset_to_hub import (
@@ -169,15 +170,6 @@ def save_image(img_tensor, key, frame_index, episode_index, videos_dir):
img.save(str(path), quality=100)
def busy_wait(seconds):
# Significantly more accurate than `time.sleep`, and mendatory for our use case,
# but it consumes CPU cycles.
# TODO(rcadene): find an alternative: from python 11, time.sleep is precise
end_time = time.perf_counter() + seconds
while time.perf_counter() < end_time:
pass
def none_or_int(value):
if value == "None":
return None
@@ -249,10 +241,38 @@ def is_headless():
########################################################################################
def calibrate(robot: Robot):
if robot.calibration_path.exists():
print(f"Removing '{robot.calibration_path}'")
robot.calibration_path.unlink()
def calibrate(robot: Robot, arms: list[str] | None):
available_arms = []
for name in robot.follower_arms:
arm_id = get_arm_id(name, "follower")
available_arms.append(arm_id)
for name in robot.leader_arms:
arm_id = get_arm_id(name, "leader")
available_arms.append(arm_id)
unknown_arms = [arm_id for arm_id in arms if arm_id not in available_arms]
available_arms_str = " ".join(available_arms)
unknown_arms_str = " ".join(unknown_arms)
if arms is None or len(arms) == 0:
raise ValueError(
"No arm provided. Use `--arms` as argument with one or more available arms.\n"
f"For instance, to recalibrate all arms add: `--arms {available_arms_str}`"
)
if len(unknown_arms) > 0:
raise ValueError(
f"Unknown arms provided ('{unknown_arms_str}'). Available arms are `{available_arms_str}`."
)
for arm_id in arms:
arm_calib_path = robot.calibration_dir / f"{arm_id}.json"
if arm_calib_path.exists():
print(f"Removing '{arm_calib_path}'")
arm_calib_path.unlink()
else:
print(f"Calibration file not found '{arm_calib_path}'")
if robot.is_connected:
robot.disconnect()
@@ -260,6 +280,8 @@ def calibrate(robot: Robot):
# Calling `connect` automatically runs calibration
# when the calibration file is missing
robot.connect()
robot.disconnect()
print("Calibration is done! You can now teleoperate and record datasets!")
def teleoperate(robot: Robot, fps: int | None = None, teleop_time_s: float | None = None):
@@ -486,8 +508,11 @@ def record(
action = action.to("cpu")
# Order the robot to move
robot.send_action(action)
action = {"action": action}
action_sent = robot.send_action(action)
# Action can eventually be clipped using `max_relative_target`,
# so action actually sent is saved in the dataset.
action = {"action": action_sent}
for key in action:
if key not in ep_dict:
@@ -712,6 +737,12 @@ if __name__ == "__main__":
)
parser_calib = subparsers.add_parser("calibrate", parents=[base_parser])
parser_calib.add_argument(
"--arms",
type=str,
nargs="*",
help="List of arms to calibrate (e.g. `--arms left_follower right_follower left_leader`)",
)
parser_teleop = subparsers.add_parser("teleoperate", parents=[base_parser])
parser_teleop.add_argument(

View File

@@ -70,7 +70,13 @@ from lerobot.common.policies.factory import make_policy
from lerobot.common.policies.policy_protocol import Policy
from lerobot.common.policies.utils import get_device_from_parameters
from lerobot.common.utils.io_utils import write_video
from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed
from lerobot.common.utils.utils import (
get_safe_torch_device,
init_hydra_config,
init_logging,
inside_slurm,
set_global_seed,
)
def rollout(
@@ -79,7 +85,6 @@ def rollout(
seeds: list[int] | None = None,
return_observations: bool = False,
render_callback: Callable[[gym.vector.VectorEnv], None] | None = None,
enable_progbar: bool = False,
) -> dict:
"""Run a batched policy rollout once through a batch of environments.
@@ -109,7 +114,6 @@ def rollout(
are returned optionally because they typically take more memory to cache. Defaults to False.
render_callback: Optional rendering callback to be used after the environments are reset, and after
every step.
enable_progbar: Enable a progress bar over rollout steps.
Returns:
The dictionary described above.
"""
@@ -136,7 +140,7 @@ def rollout(
progbar = trange(
max_steps,
desc=f"Running rollout with at most {max_steps} steps",
disable=not enable_progbar,
disable=inside_slurm(), # we dont want progress bar when we use slurm, since it clutters the logs
leave=False,
)
while not np.all(done):
@@ -210,8 +214,6 @@ def eval_policy(
videos_dir: Path | None = None,
return_episode_data: bool = False,
start_seed: int | None = None,
enable_progbar: bool = False,
enable_inner_progbar: bool = False,
) -> dict:
"""
Args:
@@ -224,8 +226,6 @@ def eval_policy(
the "episodes" key of the returned dictionary.
start_seed: The first seed to use for the first individual rollout. For all subsequent rollouts the
seed is incremented by 1. If not provided, the environments are not manually seeded.
enable_progbar: Enable progress bar over batches.
enable_inner_progbar: Enable progress bar over steps in each batch.
Returns:
Dictionary with metrics and data regarding the rollouts.
"""
@@ -266,7 +266,8 @@ def eval_policy(
if return_episode_data:
episode_data: dict | None = None
progbar = trange(n_batches, desc="Stepping through eval batches", disable=not enable_progbar)
# we dont want progress bar when we use slurm, since it clutters the logs
progbar = trange(n_batches, desc="Stepping through eval batches", disable=inside_slurm())
for batch_ix in progbar:
# Cache frames for rendering videos. Each item will be (b, h, w, c), and the list indexes the rollout
# step.
@@ -285,7 +286,6 @@ def eval_policy(
seeds=list(seeds) if seeds else None,
return_observations=return_episode_data,
render_callback=render_frame if max_episodes_rendered > 0 else None,
enable_progbar=enable_inner_progbar,
)
# Figure out where in each rollout sequence the first done condition was encountered (results after
@@ -454,6 +454,16 @@ def main(
else:
hydra_cfg = init_hydra_config(hydra_cfg_path, config_overrides)
if hydra_cfg.eval.batch_size > hydra_cfg.eval.n_episodes:
raise ValueError(
"The eval batch size is greater than the number of eval episodes "
f"({hydra_cfg.eval.batch_size} > {hydra_cfg.eval.n_episodes}). As a result, {hydra_cfg.eval.batch_size} "
f"eval environments will be instantiated, but only {hydra_cfg.eval.n_episodes} will be used. "
"This might significantly slow down evaluation. To fix this, you should update your command "
f"to increase the number of episodes to match the batch size (e.g. `eval.n_episodes={hydra_cfg.eval.batch_size}`), "
f"or lower the batch size (e.g. `eval.batch_size={hydra_cfg.eval.n_episodes}`)."
)
if out_dir is None:
out_dir = f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}"
@@ -487,8 +497,6 @@ def main(
max_episodes_rendered=10,
videos_dir=Path(out_dir) / "videos",
start_seed=hydra_cfg.seed,
enable_progbar=True,
enable_inner_progbar=True,
)
print(info["aggregated"])

View File

@@ -66,6 +66,8 @@ def get_from_raw_to_lerobot_format_fn(raw_format: str):
from lerobot.common.datasets.push_dataset_to_hub.umi_zarr_format import from_raw_to_lerobot_format
elif raw_format == "aloha_hdf5":
from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import from_raw_to_lerobot_format
elif "openx_rlds" in raw_format:
from lerobot.common.datasets.push_dataset_to_hub.openx_rlds_format import from_raw_to_lerobot_format
elif raw_format == "dora_parquet":
from lerobot.common.datasets.push_dataset_to_hub.dora_parquet_format import from_raw_to_lerobot_format
elif raw_format == "xarm_pkl":
@@ -197,9 +199,25 @@ def push_dataset_to_hub(
# convert dataset from original raw format to LeRobot format
from_raw_to_lerobot_format = get_from_raw_to_lerobot_format_fn(raw_format)
hf_dataset, episode_data_index, info = from_raw_to_lerobot_format(
raw_dir, videos_dir, fps, video, episodes, encoding
)
fmt_kwgs = {
"raw_dir": raw_dir,
"videos_dir": videos_dir,
"fps": fps,
"video": video,
"episodes": episodes,
"encoding": encoding,
}
if "openx_rlds." in raw_format:
# Support for official OXE dataset name inside `raw_format`.
# For instance, `raw_format="oxe_rlds"` uses the default formating (TODO what does that mean?),
# and `raw_format="oxe_rlds.bridge_orig"` uses the brdige_orig formating
_, openx_dataset_name = raw_format.split(".")
print(f"Converting dataset [{openx_dataset_name}] from 'openx_rlds' to LeRobot format.")
fmt_kwgs["openx_dataset_name"] = openx_dataset_name
hf_dataset, episode_data_index, info = from_raw_to_lerobot_format(**fmt_kwgs)
lerobot_dataset = LeRobotDataset.from_preloaded(
repo_id=repo_id,
@@ -268,7 +286,7 @@ def main():
"--raw-format",
type=str,
required=True,
help="Dataset type (e.g. `pusht_zarr`, `umi_zarr`, `aloha_hdf5`, `xarm_pkl`, `dora_parquet`).",
help="Dataset type (e.g. `pusht_zarr`, `umi_zarr`, `aloha_hdf5`, `xarm_pkl`, `dora_parquet`, `openx_rlds`).",
)
parser.add_argument(
"--repo-id",
@@ -328,6 +346,13 @@ def main():
default=0,
help="When set to 1, resumes a previous run.",
)
parser.add_argument(
"--cache-dir",
type=Path,
required=False,
default="/tmp",
help="Directory to store the temporary videos and images generated while creating the dataset.",
)
parser.add_argument(
"--tests-data-dir",
type=Path,

View File

@@ -241,6 +241,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
raise NotImplementedError()
init_logging()
logging.info(pformat(OmegaConf.to_container(cfg)))
if cfg.training.online_steps > 0 and isinstance(cfg.dataset_repo_id, ListConfig):
raise NotImplementedError("Online training with LeRobotMultiDataset is not implemented.")
@@ -287,6 +288,16 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
"you meant to resume training, please use `resume=true` in your command or yaml configuration."
)
if cfg.eval.batch_size > cfg.eval.n_episodes:
raise ValueError(
"The eval batch size is greater than the number of eval episodes "
f"({cfg.eval.batch_size} > {cfg.eval.n_episodes}). As a result, {cfg.eval.batch_size} "
f"eval environments will be instantiated, but only {cfg.eval.n_episodes} will be used. "
"This might significantly slow down evaluation. To fix this, you should update your command "
f"to increase the number of episodes to match the batch size (e.g. `eval.n_episodes={cfg.eval.batch_size}`), "
f"or lower the batch size (e.g. `eval.batch_size={cfg.eval.n_episodes}`)."
)
# log metrics to terminal and wandb
logger = Logger(cfg, out_dir, wandb_job_name=job_name)

View File

@@ -57,7 +57,6 @@ import logging
import shutil
from pathlib import Path
import torch
import tqdm
from flask import Flask, redirect, render_template, url_for
@@ -65,19 +64,6 @@ from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
from lerobot.common.utils.utils import init_logging
class EpisodeSampler(torch.utils.data.Sampler):
def __init__(self, dataset, episode_index):
from_idx = dataset.episode_data_index["from"][episode_index].item()
to_idx = dataset.episode_data_index["to"][episode_index].item()
self.frame_ids = range(from_idx, to_idx)
def __iter__(self):
return iter(self.frame_ids)
def __len__(self):
return len(self.frame_ids)
def run_server(
dataset: LeRobotDataset,
episodes: list[int],
@@ -112,10 +98,14 @@ def run_server(
"fps": dataset.fps,
}
video_paths = get_episode_video_paths(dataset, episode_id)
language_instruction = get_episode_language_instruction(dataset, episode_id)
videos_info = [
{"url": url_for("static", filename=video_path), "filename": Path(video_path).name}
for video_path in video_paths
]
if language_instruction:
videos_info[0]["language_instruction"] = language_instruction
ep_csv_url = url_for("static", filename=get_ep_csv_fname(episode_id))
return render_template(
"visualize_dataset_template.html",
@@ -186,6 +176,20 @@ def get_episode_video_paths(dataset: LeRobotDataset, ep_index: int) -> list[str]
]
def get_episode_language_instruction(dataset: LeRobotDataset, ep_index: int) -> list[str]:
# check if the dataset has language instructions
if "language_instruction" not in dataset.hf_dataset.features:
return None
# get first frame index
first_frame_idx = dataset.episode_data_index["from"][ep_index].item()
language_instruction = dataset.hf_dataset[first_frame_idx]["language_instruction"]
# TODO (michel-aractingi) hack to get the sentence, some strings in openx are badly stored
# with the tf.tensor appearing in the string
return language_instruction.removeprefix("tf.Tensor(b'").removesuffix("', shape=(), dtype=string)")
def visualize_dataset_html(
repo_id: str,
root: Path | None = None,

View File

@@ -14,7 +14,7 @@
<!-- Use [Alpin.js](https://alpinejs.dev), a lightweight and easy to learn JS framework -->
<!-- Use [tailwindcss](https://tailwindcss.com/), CSS classes for styling html -->
<!-- Use [dygraphs](https://dygraphs.com/), a lightweight JS charting library -->
<body class="flex h-screen max-h-screen bg-slate-950 text-gray-200" x-data="createAlpineData()" @keydown.window="(e) => {
<body class="flex flex-col md:flex-row h-screen max-h-screen bg-slate-950 text-gray-200" x-data="createAlpineData()" @keydown.window="(e) => {
// Use the space bar to play and pause, instead of default action (e.g. scrolling)
const { keyCode, key } = e;
if (keyCode === 32 || key === ' ') {
@@ -30,7 +30,7 @@
}
}">
<!-- Sidebar -->
<div x-ref="sidebar" class="w-60 bg-slate-900 p-5 break-words max-h-screen overflow-y-auto">
<div x-ref="sidebar" class="bg-slate-900 p-5 break-words overflow-y-auto shrink-0 md:shrink md:w-60 md:max-h-screen">
<h1 class="mb-4 text-xl font-semibold">{{ dataset_info.repo_id }}</h1>
<ul>
@@ -46,7 +46,8 @@
</ul>
<p>Episodes:</p>
<ul class="ml-2">
<!-- episodes menu for medium & large screens -->
<ul class="ml-2 hidden md:block">
{% for episode in episodes %}
<li class="font-mono text-sm mt-0.5">
<a href="episode_{{ episode }}" class="underline {% if episode_id == episode %}font-bold -ml-1{% endif %}">
@@ -56,26 +57,38 @@
{% endfor %}
</ul>
<!-- episodes menu for small screens -->
<div class="flex overflow-x-auto md:hidden">
{% for episode in episodes %}
<p class="font-mono text-sm mt-0.5 border-r last:border-r-0 px-2 {% if episode_id == episode %}font-bold{% endif %}">
<a href="episode_{{ episode }}" class="">
{{ episode }}
</a>
</p>
{% endfor %}
</div>
</div>
<!-- Toggle sidebar button -->
<button class="flex items-center opacity-50 hover:opacity-100 mx-1"
<button class="flex items-center opacity-50 hover:opacity-100 mx-1 hidden md:block"
@click="() => ($refs.sidebar.classList.toggle('hidden'))" title="Toggle sidebar">
<div class="bg-slate-500 w-2 h-10 rounded-full"></div>
</button>
<!-- Content -->
<div class="flex-1 max-h-screen flex flex-col gap-4 overflow-y-auto">
<div class="max-h-screen flex flex-col gap-4 overflow-y-auto md:flex-1">
<h1 class="text-xl font-bold mt-4 font-mono">
Episode {{ episode_id }}
</h1>
<!-- Videos -->
<div class="flex flex-wrap gap-1">
<p x-show="videoCodecError" class="font-medium text-orange-700">Videos could NOT play because <a href="https://en.wikipedia.org/wiki/AV1" target="_blank" class="underline">AV1</a> decoding is not available on your browser. Learn more about <a href="https://huggingface.co/blog/video-encoding" target="_blank" class="underline">LeRobot video encoding</a>.</p>
{% for video_info in videos_info %}
<div class="max-w-96">
<div x-show="!videoCodecError" class="max-w-96">
<p class="text-sm text-gray-300 bg-gray-800 px-2 rounded-t-xl truncate">{{ video_info.filename }}</p>
<video autoplay muted loop type="video/mp4" class="min-w-64" @timeupdate="() => {
<video muted loop type="video/mp4" class="min-w-64" @canplaythrough="videoCanPlay" @timeupdate="() => {
if (video.duration) {
const time = video.currentTime;
const pc = (100 / video.duration) * time;
@@ -100,6 +113,13 @@
{% endfor %}
</div>
<!-- Language instruction -->
{% if videos_info[0].language_instruction %}
<p class="font-medium mt-2">
Language Instruction: <span class="italic">{{ videos_info[0].language_instruction }}</span>
</p>
{% endif %}
<!-- Shortcuts info -->
<div class="text-sm hidden md:block">
Hotkeys: <span class="font-mono">Space</span> to pause/unpause, <span class="font-mono">Arrow Down</span> to go to next episode, <span class="font-mono">Arrow Up</span> to go to previous episode.
@@ -107,12 +127,12 @@
<!-- Controllers -->
<div class="flex gap-1 text-3xl items-center">
<button x-ref="btnPlay" class="-rotate-90 hidden" class="-rotate-90" title="Play. Toggle with Space" @click="() => {
<button x-ref="btnPlay" class="-rotate-90" class="-rotate-90" title="Play. Toggle with Space" @click="() => {
videos.forEach(video => video.play());
$refs.btnPlay.classList.toggle('hidden');
$refs.btnPause.classList.toggle('hidden');
}">🔽</button>
<button x-ref="btnPause" title="Pause. Toggle with Space" @click="() => {
<button x-ref="btnPause" class="hidden" title="Pause. Toggle with Space" @click="() => {
videos.forEach(video => video.pause());
$refs.btnPlay.classList.toggle('hidden');
$refs.btnPause.classList.toggle('hidden');
@@ -125,7 +145,6 @@
@click="() => (videos.forEach(video => (video.currentTime = 0.0)))">↩️</button>
<input x-ref="slider" max="100" min="0" step="1" type="range" value="0" class="w-80 mx-2" @input="() => {
const sliderValue = $refs.slider.value;
$refs.btnPause.click();
videos.forEach(video => {
const time = (video.duration * sliderValue) / 100;
video.currentTime = time;
@@ -177,9 +196,9 @@
</td>
<template x-for="(cell, colIndex) in row">
<td x-show="cell" class="border border-slate-700">
<div class="flex gap-x-2 w-24 justify-between px-2">
<div class="flex gap-x-2 w-24 justify-between px-2" :class="{ 'hidden': cell.isNull }">
<input type="checkbox" x-model="cell.checked" @change="updateTableValues()">
<span x-text="`${cell.value.toFixed(2)}`"
<span x-text="`${!cell.isNull ? cell.value.toFixed(2) : null}`"
:style="`color: ${cell.color}`"></span>
</div>
</td>
@@ -201,16 +220,29 @@
dygraph: null,
currentFrameData: null,
columnNames: ["state", "action", "pred action"],
nColumns: {% if has_policy %}3{% else %}2{% endif %},
nColumns: 2,
nStates: 0,
nActions: 0,
checked: [],
dygraphTime: 0.0,
dygraphIndex: 0,
videos: null,
video: null,
colors: null,
nVideos: {{ videos_info | length }},
nVideoReadyToPlay: 0,
videoCodecError: false,
// alpine initialization
init() {
// check if videos can play
const dummyVideo = document.createElement('video');
const canPlayVideos = dummyVideo.canPlayType('video/mp4; codecs="av01.0.05M.08"'); // codec source: https://huggingface.co/blog/video-encoding#results
if(!canPlayVideos){
this.videoCodecError = true;
}
// process CSV data
this.videos = document.querySelectorAll('video');
this.video = this.videos[0];
this.dygraph = new Dygraph(document.getElementById("graph"), '{{ ep_csv_url }}', {
@@ -235,17 +267,19 @@
this.checked = Array(this.colors.length).fill(true);
const seriesNames = this.dygraph.getLabels().slice(1);
this.nStates = seriesNames.findIndex(item => item.startsWith('action_'));
this.nActions = seriesNames.length - this.nStates;
const colors = [];
const LIGHTNESS = [30, 65, 85]; // state_lightness, action_lightness, pred_action_lightness
let lightnessIdx = 0;
const chunkSize = Math.ceil(seriesNames.length / this.nColumns);
for (let i = 0; i < seriesNames.length; i += chunkSize) {
const lightness = LIGHTNESS[lightnessIdx];
for (let hue = 0; hue < 360; hue += parseInt(360/chunkSize)) {
const color = `hsl(${hue}, 100%, ${lightness}%)`;
colors.push(color);
}
lightnessIdx += 1;
// colors for "state" lines
for (let hue = 0; hue < 360; hue += parseInt(360/this.nStates)) {
const color = `hsl(${hue}, 100%, ${LIGHTNESS[0]}%)`;
colors.push(color);
}
// colors for "action" lines
for (let hue = 0; hue < 360; hue += parseInt(360/this.nActions)) {
const color = `hsl(${hue}, 100%, ${LIGHTNESS[1]}%)`;
colors.push(color);
}
this.dygraph.updateOptions({ colors });
this.colors = colors;
@@ -272,37 +306,40 @@
if (!this.currentFrameData) {
return [];
}
const columnSize = Math.ceil(this.currentFrameData.length / this.nColumns);
return Array.from({
length: columnSize
}, (_, rowIndex) => {
const row = [
this.currentFrameData[rowIndex] || null,
this.currentFrameData[rowIndex + columnSize] || null,
];
if (this.nColumns === 3) {
row.push(this.currentFrameData[rowIndex + 2 * columnSize] || null)
}
return row;
});
const rows = [];
const nRows = Math.max(this.nStates, this.nActions);
let rowIndex = 0;
while(rowIndex < nRows){
const row = [];
// number of states may NOT match number of actions. In this case, we null-pad the 2D array to make a fully rectangular 2d array
const nullCell = { isNull: true };
const stateValueIdx = rowIndex;
const actionValueIdx = stateValueIdx + this.nStates; // because this.currentFrameData = [state0, state1, ..., stateN, action0, action1, ..., actionN]
// row consists of [state value, action value]
row.push(rowIndex < this.nStates ? this.currentFrameData[stateValueIdx] : nullCell); // push "state value" to row
row.push(rowIndex < this.nActions ? this.currentFrameData[actionValueIdx] : nullCell); // push "action value" to row
rowIndex += 1;
rows.push(row);
}
return rows;
},
isRowChecked(rowIndex) {
return this.rows[rowIndex].every(cell => cell && cell.checked);
return this.rows[rowIndex].every(cell => cell && (cell.isNull || cell.checked));
},
isColumnChecked(colIndex) {
return this.rows.every(row => row[colIndex] && row[colIndex].checked);
return this.rows.every(row => row[colIndex] && (row[colIndex].isNull || row[colIndex].checked));
},
toggleRow(rowIndex) {
const newState = !this.isRowChecked(rowIndex);
this.rows[rowIndex].forEach(cell => {
if (cell) cell.checked = newState;
if (cell && !cell.isNull) cell.checked = newState;
});
this.updateTableValues();
},
toggleColumn(colIndex) {
const newState = !this.isColumnChecked(colIndex);
this.rows.forEach(row => {
if (row[colIndex]) row[colIndex].checked = newState;
if (row[colIndex] && !row[colIndex].isNull) row[colIndex].checked = newState;
});
this.updateTableValues();
},
@@ -343,7 +380,6 @@
window.history.replaceState({}, '', url.toString());
},
formatTime(time) {
var hours = Math.floor(time / 3600);
var minutes = Math.floor((time % 3600) / 60);
@@ -351,6 +387,14 @@
return (hours > 0 ? hours + ':' : '') + (minutes < 10 ? '0' + minutes : minutes) + ':' + (seconds <
10 ?
'0' + seconds : seconds);
},
videoCanPlay() {
this.nVideoReadyToPlay += 1;
if(this.nVideoReadyToPlay == this.nVideos) {
// start autoplay all videos in sync
this.$refs.btnPlay.click();
}
}
};
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 370 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 391 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 387 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 479 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 474 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 471 KiB

31
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "absl-py"
@@ -2406,6 +2406,7 @@ description = "Nvidia JIT LTO Library"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_aarch64.whl", hash = "sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27"},
{file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f9b37bc5c8cf7509665cb6ada5aaa0ce65618f2332b7d3e78e9790511f111212"},
{file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-win_amd64.whl", hash = "sha256:e782564d705ff0bf61ac3e1bf730166da66dd2fe9012f111ede5fc49b64ae697"},
]
@@ -3193,6 +3194,29 @@ files = [
[package.extras]
diagrams = ["jinja2", "railroad-diagrams"]
[[package]]
name = "pyrealsense2"
version = "2.55.1.6486"
description = "Python Wrapper for Intel Realsense SDK 2.0."
optional = true
python-versions = "*"
files = [
{file = "pyrealsense2-2.55.1.6486-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:882613808289c602b23f2e19bf1fbadd63fb3af9be9c2997cc4ea74741a65136"},
{file = "pyrealsense2-2.55.1.6486-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:686320811ef30c162c7240cb619e9b152420c0a32337a137139276c87f213336"},
{file = "pyrealsense2-2.55.1.6486-cp310-cp310-win_amd64.whl", hash = "sha256:600e7c691c7c50043a2c930471f873da33badce9c5b8c75a8bad499389ac10a4"},
{file = "pyrealsense2-2.55.1.6486-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:178f92caec6e8e212a7ced66c678dec47462b0b77e929fa576d02eea297bb177"},
{file = "pyrealsense2-2.55.1.6486-cp311-cp311-win_amd64.whl", hash = "sha256:e01939a63bac3e1a4da742f7e1dbc618a4ec03ee0f7b3690ae5d1ad0c983aca8"},
{file = "pyrealsense2-2.55.1.6486-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5a40216386b49a520b5817afe97efa9a53471747b765e8b4e6ca549678945c04"},
{file = "pyrealsense2-2.55.1.6486-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:affb80b0cad7a732db4e450e9d4a8f7193499e3b35c0ce0b3e67fde5b1e9cf64"},
{file = "pyrealsense2-2.55.1.6486-cp37-cp37m-win_amd64.whl", hash = "sha256:93f81f0955037a325529d93059138e5036fc5e51d5fda4b3c88ae4287fa0b3ed"},
{file = "pyrealsense2-2.55.1.6486-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0f51350ea100215dedc757ea7872ec23342a1d84015e87583911912d882c8ce2"},
{file = "pyrealsense2-2.55.1.6486-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:3695ae04d423d6404db4d0a756c66a0f122f9e0858c91d3dcee132adbef35b62"},
{file = "pyrealsense2-2.55.1.6486-cp38-cp38-win_amd64.whl", hash = "sha256:f06ea7adadcdcb7d3334b8f067e4f7a361f6421a763988897d52602937c716de"},
{file = "pyrealsense2-2.55.1.6486-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:18b5650b1ffcc2c2a42c9f72870d291509afc5819db757f5f365c42a8aae4129"},
{file = "pyrealsense2-2.55.1.6486-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:aac4fb7f9a36ff6ecaa3bf0565f3baa9327b02dd843f14933eece8a4455c6c79"},
{file = "pyrealsense2-2.55.1.6486-cp39-cp39-win_amd64.whl", hash = "sha256:5cbede3cd35946f3051ae6df42619ea01419c58379533c596bbad5dbf648c25b"},
]
[[package]]
name = "pyserial"
version = "3.5"
@@ -4551,7 +4575,8 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
aloha = ["gym-aloha"]
dev = ["debugpy", "pre-commit"]
dora = ["gym-dora"]
koch = ["dynamixel-sdk", "pynput"]
dynamixel = ["dynamixel-sdk", "pynput"]
intelrealsense = ["pyrealsense2"]
pusht = ["gym-pusht"]
test = ["pytest", "pytest-cov"]
umi = ["imagecodecs"]
@@ -4561,4 +4586,4 @@ xarm = ["gym-xarm"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "a340f2ed23db2f3c371c494cbc9a33392e122ed6713e6098277a87b3fb805f2b"
content-hash = "06a8a1941b75c3ec78ade6f8b2c3ad7b5d2f1516b590fa3d5a773add73f6dbec"

View File

@@ -66,7 +66,7 @@ dynamixel-sdk = {version = ">=3.7.31", optional = true}
pynput = {version = ">=1.7.7", optional = true}
# TODO(rcadene, salibert): 71.0.1 has a bug
setuptools = {version = "!=71.0.1", optional = true}
pyrealsense2 = {version = ">=2.55.1.6486", markers = "sys_platform != 'darwin'", optional = true}
[tool.poetry.extras]
@@ -78,7 +78,8 @@ dev = ["pre-commit", "debugpy"]
test = ["pytest", "pytest-cov"]
umi = ["imagecodecs"]
video_benchmark = ["scikit-image", "pandas"]
koch = ["dynamixel-sdk", "pynput"]
dynamixel = ["dynamixel-sdk", "pynput"]
intelrealsense = ["pyrealsense2"]
[tool.ruff]
line-length = 110

View File

@@ -13,28 +13,31 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import traceback
import pytest
from lerobot.common.utils.utils import init_hydra_config
from .utils import DEVICE, KOCH_ROBOT_CONFIG_PATH
from .utils import DEVICE, ROBOT_CONFIG_PATH_TEMPLATE
def pytest_collection_finish():
print(f"\nTesting with {DEVICE=}")
@pytest.fixture(scope="session")
def is_koch_available():
@pytest.fixture
def is_robot_available(robot_type):
try:
from lerobot.common.robot_devices.robots.factory import make_robot
robot_cfg = init_hydra_config(KOCH_ROBOT_CONFIG_PATH)
config_path = ROBOT_CONFIG_PATH_TEMPLATE.format(robot=robot_type)
robot_cfg = init_hydra_config(config_path)
robot = make_robot(robot_cfg)
robot.connect()
del robot
return True
except Exception as e:
print("A koch robot is not available.")
print(e)
except Exception:
traceback.print_exc()
print(f"\nA {robot_type} robot is not available.")
return False

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,61 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8b7fbedfdb3d536847bc6fadf2cbabb9f2b5492edf3e2c274a3e8ffb447105e8
size 1166
{
"citation": "",
"description": "",
"features": {
"observation.images.cam_high": {
"_type": "VideoFrame"
},
"observation.images.cam_left_wrist": {
"_type": "VideoFrame"
},
"observation.images.cam_right_wrist": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"observation.effort": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:34ece24fb6b302db0b68987858509f31713fb299faa9a9d34b8fd68f10bc3100
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "3e76021c95d21c4d",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,61 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8b7fbedfdb3d536847bc6fadf2cbabb9f2b5492edf3e2c274a3e8ffb447105e8
size 1166
{
"citation": "",
"description": "",
"features": {
"observation.images.cam_high": {
"_type": "VideoFrame"
},
"observation.images.cam_left_wrist": {
"_type": "VideoFrame"
},
"observation.images.cam_right_wrist": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"observation.effort": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:38cf4116a65cb92a5c43f9b9da7a7b81cfa9168b17605c8c456f7d3a3a23b77a
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "872117944c4ecdff",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,61 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8b7fbedfdb3d536847bc6fadf2cbabb9f2b5492edf3e2c274a3e8ffb447105e8
size 1166
{
"citation": "",
"description": "",
"features": {
"observation.images.cam_high": {
"_type": "VideoFrame"
},
"observation.images.cam_left_wrist": {
"_type": "VideoFrame"
},
"observation.images.cam_right_wrist": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"observation.effort": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d394d451929b805f2d94f9fc5b12d15c31cfc494df76d7d642b63378b8ba0131
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "ba1d9dc6ea5a9717",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,61 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8b7fbedfdb3d536847bc6fadf2cbabb9f2b5492edf3e2c274a3e8ffb447105e8
size 1166
{
"citation": "",
"description": "",
"features": {
"observation.images.cam_high": {
"_type": "VideoFrame"
},
"observation.images.cam_left_wrist": {
"_type": "VideoFrame"
},
"observation.images.cam_right_wrist": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"observation.effort": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:69435f30146a309c8d7d0eb01216555bf0547095db1fc9c20218d481d6fe62c8
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "d95a3a7eae59566d",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,61 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8b7fbedfdb3d536847bc6fadf2cbabb9f2b5492edf3e2c274a3e8ffb447105e8
size 1166
{
"citation": "",
"description": "",
"features": {
"observation.images.cam_high": {
"_type": "VideoFrame"
},
"observation.images.cam_left_wrist": {
"_type": "VideoFrame"
},
"observation.images.cam_right_wrist": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"observation.effort": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:921505133c62906bd53034a613a827996994875d84c8b26d69d188df9a7ffeba
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "09ba9b66b7f468bc",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,61 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8b7fbedfdb3d536847bc6fadf2cbabb9f2b5492edf3e2c274a3e8ffb447105e8
size 1166
{
"citation": "",
"description": "",
"features": {
"observation.images.cam_high": {
"_type": "VideoFrame"
},
"observation.images.cam_left_wrist": {
"_type": "VideoFrame"
},
"observation.images.cam_right_wrist": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"observation.effort": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:505a42c408d56c8a7d3e2367280b41e27667b58334f32e84c937c44c38217bd6
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "5a33376149b4e966",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,47 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3f44d13de5d5a417263bbd4984942ed42ed3fa0633405aa14d9a969a45274944
size 842
{
"citation": "",
"description": "",
"features": {
"observation.images.top": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c0b18566cbf59e399ea40f1630df12ffbbb9f73bbc733d1d4eba62d675b1fda5
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "3aa08798f073758b",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,5 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7ab5c2bd7d176d4d7902a600240318c2828b7d75f4a888d0887327e4eff089d
size 65
{
"codebase_version": "v1.6",
"fps": 50,
"video": 0
}

View File

@@ -1,3 +1,47 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4c9545525dc1f4d550591bd5efb63b55c15b983ae0510fefda5a16d77c78b6ef
size 837
{
"citation": "",
"description": "",
"features": {
"observation.images.top": {
"_type": "Image"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d7aa033603dc90582516dbcdf3e71e4d3113b70ad49098535def0b282135b5f3
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "9fe3a4bf575a8a67",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,47 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3f44d13de5d5a417263bbd4984942ed42ed3fa0633405aa14d9a969a45274944
size 842
{
"citation": "",
"description": "",
"features": {
"observation.images.top": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5fd5fe80657788d044cdc8a1baf1456c7695cc951049347a469165002a83c6c7
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "e7db591f2ec3eeec",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,5 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7ab5c2bd7d176d4d7902a600240318c2828b7d75f4a888d0887327e4eff089d
size 65
{
"codebase_version": "v1.6",
"fps": 50,
"video": 0
}

View File

@@ -1,3 +1,47 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4c9545525dc1f4d550591bd5efb63b55c15b983ae0510fefda5a16d77c78b6ef
size 837
{
"citation": "",
"description": "",
"features": {
"observation.images.top": {
"_type": "Image"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:752660d8fd884b33b7302a4a42ec7c680de2a3e5022d7d007586f4c6337ce08a
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "9be74ce70441e055",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,47 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3f44d13de5d5a417263bbd4984942ed42ed3fa0633405aa14d9a969a45274944
size 842
{
"citation": "",
"description": "",
"features": {
"observation.images.top": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bf10e41f2df8c5dc1c19ba8e2d2256ec9d81bdbe28b95079d28039fe2a28504c
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "fe4ace534342bfad",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,5 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7ab5c2bd7d176d4d7902a600240318c2828b7d75f4a888d0887327e4eff089d
size 65
{
"codebase_version": "v1.6",
"fps": 50,
"video": 0
}

View File

@@ -1,3 +1,47 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4c9545525dc1f4d550591bd5efb63b55c15b983ae0510fefda5a16d77c78b6ef
size 837
{
"citation": "",
"description": "",
"features": {
"observation.images.top": {
"_type": "Image"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ec153065a4e52f7d55a7f026d804c57a3ce05dc1faa255a1947369f83c70f1e7
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "d7e89e41d66a244c",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,47 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3f44d13de5d5a417263bbd4984942ed42ed3fa0633405aa14d9a969a45274944
size 842
{
"citation": "",
"description": "",
"features": {
"observation.images.top": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8f3dc6a1e423b8cd007396a0261664f364d330d350e569a6a0958b834ee1619b
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "88443018205d3133",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,5 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7ab5c2bd7d176d4d7902a600240318c2828b7d75f4a888d0887327e4eff089d
size 65
{
"codebase_version": "v1.6",
"fps": 50,
"video": 0
}

View File

@@ -1,3 +1,47 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4c9545525dc1f4d550591bd5efb63b55c15b983ae0510fefda5a16d77c78b6ef
size 837
{
"citation": "",
"description": "",
"features": {
"observation.images.top": {
"_type": "Image"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fda1fe75c9f987c065d4244594e4f6456b7ac6efd7fae2a7952fb48b044dbd30
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "b9455593efc2e547",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

View File

@@ -1,3 +1,11 @@
version https://git-lfs.github.com/spec/v1
oid sha256:50e40e4c2bb523fca0b54e9a9635281312e9c6f9d757db03c06a0865c5508f29
size 188
{
"codebase_version": "v1.6",
"fps": 50,
"video": true,
"encoding": {
"vcodec": "libsvtav1",
"pix_fmt": "yuv420p",
"g": 2,
"crf": 30
}
}

View File

@@ -1,3 +1,56 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e2e066afefdee57f3bc534085ab7af54e62d3ab2736d42863a89deb743cd0d04
size 1075
{
"citation": "",
"description": "",
"features": {
"observation.images.cam_high": {
"_type": "VideoFrame"
},
"observation.images.cam_left_wrist": {
"_type": "VideoFrame"
},
"observation.images.cam_low": {
"_type": "VideoFrame"
},
"observation.images.cam_right_wrist": {
"_type": "VideoFrame"
},
"observation.state": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"action": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"length": 14,
"_type": "Sequence"
},
"episode_index": {
"dtype": "int64",
"_type": "Value"
},
"frame_index": {
"dtype": "int64",
"_type": "Value"
},
"timestamp": {
"dtype": "float32",
"_type": "Value"
},
"next.done": {
"dtype": "bool",
"_type": "Value"
},
"index": {
"dtype": "int64",
"_type": "Value"
}
},
"homepage": "",
"license": ""
}

View File

@@ -1,3 +1,13 @@
version https://git-lfs.github.com/spec/v1
oid sha256:722e0ba30e6f4ddce2d31166bfca753bf3c45c507a0f911e1635f0ec2521569e
size 247
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "a10d9c386f0a87b8",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

Some files were not shown because too many files have changed in this diff Show More