From bbf617fd9219967a7a995cc08dbc47d62000f327 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Tue, 2 Jul 2024 21:15:48 +0200 Subject: [PATCH] WIP --- README.md | 55 +- examples/real_robot_example/README.md | 89 ++ .../convert_original_act_checkpoint.ipynb | 840 ++++++++++++++++++ .../gym_real_world/__init__.py | 8 + .../gym_real_world/dynamixel.py | 363 ++++++++ .../gym_real_world/gym_environment.py | 192 ++++ .../gym_real_world/robot.py | 173 ++++ .../record_training_data.py | 237 +++++ examples/real_robot_example/run_policy.py | 60 ++ .../train_config/policy/act_real_world.yaml | 103 +++ .../policy/act_real_world_debug.yaml | 103 +++ lerobot/common/datasets/factory.py | 2 +- .../push_dataset_to_hub/aloha_hdf5_format.py | 13 +- .../datasets/push_dataset_to_hub/utils.py | 9 +- lerobot/common/envs/utils.py | 6 +- .../common/policies/act/configuration_act.py | 2 + lerobot/common/policies/act/modeling_act.py | 28 +- .../robot_devices/cameras/intelrealsense.py | 325 +++++++ .../common/robot_devices/motors/feetech.py | 713 +++++++++++++++ lerobot/common/robot_devices/robots/aloha.py | 204 +++++ lerobot/common/utils/io_utils.py | 39 + lerobot/configs/default.yaml | 2 + lerobot/configs/env/aloha.yaml | 1 + lerobot/configs/env/dora_aloha_real.yaml | 1 + lerobot/configs/env/pusht.yaml | 1 + lerobot/configs/env/xarm.yaml | 1 + lerobot/scripts/eval.py | 9 +- .../scripts/robot_controls/koch_configure.py | 211 +++++ .../scripts/robot_controls/record_dataset.py | 20 + .../scripts/robot_controls/replay_dataset.py | 19 + lerobot/scripts/robot_controls/run_policy.py | 47 + lerobot/scripts/robot_controls/teleoperate.py | 20 + .../scripts/robot_controls/teleoperate_cv2.py | 320 +++++++ .../teleoperate_intelrealsense.py | 338 +++++++ lerobot/scripts/test.py | 10 + lerobot/scripts/train.py | 3 +- tests/test_examples.py | 23 +- 37 files changed, 4562 insertions(+), 28 deletions(-) create mode 100644 examples/real_robot_example/README.md create mode 100644 examples/real_robot_example/convert_original_act_checkpoint.ipynb create mode 100644 examples/real_robot_example/gym_real_world/__init__.py create mode 100644 examples/real_robot_example/gym_real_world/dynamixel.py create mode 100644 examples/real_robot_example/gym_real_world/gym_environment.py create mode 100644 examples/real_robot_example/gym_real_world/robot.py create mode 100644 examples/real_robot_example/record_training_data.py create mode 100644 examples/real_robot_example/run_policy.py create mode 100644 examples/real_robot_example/train_config/policy/act_real_world.yaml create mode 100644 examples/real_robot_example/train_config/policy/act_real_world_debug.yaml create mode 100644 lerobot/common/robot_devices/cameras/intelrealsense.py create mode 100644 lerobot/common/robot_devices/motors/feetech.py create mode 100644 lerobot/common/robot_devices/robots/aloha.py create mode 100644 lerobot/scripts/robot_controls/koch_configure.py create mode 100644 lerobot/scripts/robot_controls/record_dataset.py create mode 100644 lerobot/scripts/robot_controls/replay_dataset.py create mode 100644 lerobot/scripts/robot_controls/run_policy.py create mode 100644 lerobot/scripts/robot_controls/teleoperate.py create mode 100644 lerobot/scripts/robot_controls/teleoperate_cv2.py create mode 100644 lerobot/scripts/robot_controls/teleoperate_intelrealsense.py create mode 100644 lerobot/scripts/test.py diff --git a/README.md b/README.md index 12ebe8d0..ee0e06e1 100644 --- a/README.md +++ b/README.md @@ -127,13 +127,21 @@ wandb login Check out [example 1](./examples/1_load_lerobot_dataset.py) that illustrates how to use our dataset class which automatically download data from the Hugging Face hub. -You can also locally visualize episodes from a dataset by executing our script from the command line: +You can also locally visualize episodes from a dataset on the hub by executing our script from the command line: ```bash python lerobot/scripts/visualize_dataset.py \ --repo-id lerobot/pusht \ --episode-index 0 ``` +or from a dataset in a local folder with the root `DATA_DIR` environment variable +```bash +DATA_DIR='./my_local_data_dir' python lerobot/scripts/visualize_dataset.py \ + --repo-id lerobot/pusht \ + --episode-index 0 +``` + + It will open `rerun.io` and display the camera streams, robot states and actions, like this: https://github-production-user-asset-6210df.s3.amazonaws.com/4681518/328035972-fd46b787-b532-47e2-bb6f-fd536a55a7ed.mov?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240505%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240505T172924Z&X-Amz-Expires=300&X-Amz-Signature=d680b26c532eeaf80740f08af3320d22ad0b8a4e4da1bcc4f33142c15b509eda&X-Amz-SignedHeaders=host&actor_id=24889239&key_id=0&repo_id=748713144 @@ -141,6 +149,51 @@ https://github-production-user-asset-6210df.s3.amazonaws.com/4681518/328035972-f Our script can also visualize datasets stored on a distant server. See `python lerobot/scripts/visualize_dataset.py --help` for more instructions. +### The `LeRobotDataset` format + +A dataset in `LeRobotDataset` format is very simple to use. It can be loaded from a repository on the Hugging Face hub or a local folder simply with e.g. `dataset = LeRobotDataset("lerobot/aloha_static_coffee")` and can be indexed into like any Hugging Face and Pytorch dataset. For instance `dataset[0]` will retrieve a sample of the dataset observations and actions in pytorch tensors format ready to be fed to a model. + +A specificity of `LeRobotDataset` is that we can retrieve several frames for one sample query. By setting `delta_timestamps` to a list of delta timestamps, e.g. `delta_timestamps = {"observation.image": [-1, -0.5, -0.2, 0]}` one can retrieve, for each query, 4 images including one at -1 second before the current time step, the two others at -0.5 second and -0.2, and the final one at the current time step (0 second). See example [1_load_lerobot_dataset.py](examples/1_load_lerobot_dataset.py) for more details on `delta_timestamps`. + +Under the hood, the `LeRobotDataset` format makes use of several ways to serialize data which can be useful to understand if you plan to work more closely with this format. We tried to make a flexible yet simple dataset format that would cover most type of features and specificities present in reinforcement learning and robotics, in simulation and in real-world, with a focus on cameras and robot states. + +Here are the important details and internal structure organization of a typical `LeRobotDataset` instantiated with `dataset = LeRobotDataset("lerobot/aloha_static_coffee")`. The exact features will change from dataset to dataset but not the main aspects: + +``` +dataset attributes: + ├ hf_dataset: a Hugging Face dataset (backed by Arrow/parquet). Typical features example: + │ ├ observation.images.cam_high: VideoFrame + │ │ VideoFrame = {'path': path to a mp4 video, 'timestamp': float32 timestamp in the video} + │ ├ observation.state: List of float32: position of an arm joints (for instance) + │ ... (more observations) + │ ├ action: List of float32 + │ ├ episode_index: int64: index of the episode for this sample + │ ├ frame_index: int64: index of the frame for this sample in the episode ; starts at 0 for each episode + │ ├ timestamp: float32: timestamp in the episode + │ ├ next.done: bool: indicates the end of en episode ; True for the last frame in each episode + │ └ index: int64: general index in the whole dataset + ├ episode_data_index: contains 2 tensors with the start and end indices of each episode + │ ├ from: 1D int64 tensor of first frame index for each episode: shape (num episodes,) starts with 0 + │ └ to: 1D int64 tensor of last frame index for each episode: shape (num episodes,) + ├ stats: a dictionary of statistics (max, mean, min, std) for each feature in the dataset, for instance + │ ├ observation.images.cam_high: {'max': tensor with same number of dimensions (e.g. `(c, 1, 1)` for images, `(c,)` for states), etc.} + │ ... + ├ info: a dictionary of metadata on the dataset + │ ├ fps: float - frame per second the dataset is recorded/synchronized to + │ └ video: bool - indicates if frames are encoded in mp4 video files to save space or stored as png files + ├ videos_dir: path to where the mp4 videos or png images are stored/accessed + └ camera_keys: List of string: the keys to access camera features in the item returned by the dataset (e.g. `["observation.images.cam_high", ...]`) +``` + +A `LeRobotDataset` is serialised using several widespread file formats for each of its parts, namely: +- hf_dataset stored using Hugging Face datasets library serialization to parquet +- videos are stored in mp4 format to save space or png files +- episode_data_index saved using `safetensor` tensor serializtion format +- stats saved using `safetensor` tensor serializtion format +- info are saved using JSON + +Dataset can uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can set the `DATA_DIR` environment variable to you root dataset folder as illustrated in the above section on dataset visualization. + ### Evaluate a pretrained policy Check out [example 2](./examples/2_evaluate_pretrained_policy.py) that illustrates how to download a pretrained policy from Hugging Face hub, and run an evaluation on its corresponding environment. diff --git a/examples/real_robot_example/README.md b/examples/real_robot_example/README.md new file mode 100644 index 00000000..90b953b0 --- /dev/null +++ b/examples/real_robot_example/README.md @@ -0,0 +1,89 @@ +# Using `lerobot` on a real world arm + + +In this example, we'll be using `lerobot` on a real world arm to: +- record a dataset in the `lerobot` format +- (soon) train a policy on it +- (soon) run the policy in the real-world + +## Which robotic arm to use + +In this example we're using the [open-source low-cost arm from Alexander Koch](https://github.com/AlexanderKoch-Koch/low_cost_robot) in the specific setup of: +- having 6 servos per arm, i.e. using the elbow-to-wrist extension +- adding two cameras around it, one on top and one in the front +- having a teleoperation arm as well (build the leader and the follower arms in A. Koch repo, both with elbow-to-wrist extensions) + +I'm using these cameras (but the setup should not be sensitive to the exact cameras you're using): +- C922 Pro Stream Webcam +- Intel(R) RealSense D455 (using only the RGB input) + + +In general, this example should be very easily extendable to any type of arm using Dynamixel servos with at least one camera by changing a couple of configuration in the gym env. + +## Install the example + +Follow these steps: +- install `lerobot` +- install the Dynamixel-sdk: `pip install dynamixel-sdk` + +## Usage + +### 0 - record examples + +Run the `record_training_data.py` example, selecting the duration and number of episodes you want to record, e.g. +``` +DATA_DIR='./data' python record_training_data.py \ +--repo-id=thomwolf/blue_red_sort \ +--num-episodes=50 \ +--num-frames=400 +``` + +TODO: +- various length episodes +- being able to drop episodes +- checking uploading to the hub + +### 1 - visualize the dataset + +Use the standard dataset visualization script pointing it to the right folder: +``` +DATA_DIR='./data' python ../../lerobot/scripts/visualize_dataset.py \ + --repo-id thomwolf/blue_red_sort \ + --episode-index 0 +``` + +### 2 - Train a policy + +From the example directory let's run this command to train a model using ACT + +``` +DATA_DIR='./data' python ../../lerobot/scripts/train.py \ + device=cuda \ + hydra.searchpath=[file://./train_config/] \ + hydra.run.dir=./outputs/train/blue_red_sort \ + dataset_repo_id=thomwolf/blue_red_sort \ + env=gym_real_world \ + policy=act_real_world \ + wandb.enable=false +``` + +### 3 - Evaluate the policy in the real world + +From the example directory let's run this command to evaluate our policy. +The configuration for running the policy is in the checkpoint of the model. +You can override parameters as follow: + +``` +python run_policy.py \ + -p ./outputs/train/blue_red_sort/checkpoints/last/pretrained_model/ + env.episode_length=1000 +``` + + +## Convert a hdf5 dataset recorded with the original ACT repo + +You can convert a dataset from the raw data format of HDF5 files like in: https://github.com/tonyzhaozh/act with the following command: + +``` +python ./lerobot/scripts/push_dataset_to_hub.py +``` diff --git a/examples/real_robot_example/convert_original_act_checkpoint.ipynb b/examples/real_robot_example/convert_original_act_checkpoint.ipynb new file mode 100644 index 00000000..92306b86 --- /dev/null +++ b/examples/real_robot_example/convert_original_act_checkpoint.ipynb @@ -0,0 +1,840 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from safetensors.torch import load_file, save_file\n", + "from pprint import pprint" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "original_ckpt_path = \"/home/thomwolf/Documents/Github/ACT/checkpoints/blue_red_sort/policy_last.ckpt\"\n", + "converted_ckpt_path = \"/home/thomwolf/Documents/Github/ACT/checkpoints/blue_red_sort/model.safetensors\"\n", + "\n", + "comparison_main_path = \"/home/thomwolf/Documents/Github/lerobot/examples/real_robot_example/outputs/train/blue_red_debug_no_masking/checkpoints/last/pretrained_model/\"\n", + "comparison_safetensor_path = comparison_main_path + \"model.safetensors\"\n", + "comparison_config_json_path = comparison_main_path + \"config.json\"\n", + "comparison_config_yaml_path = comparison_main_path + \"config.yaml\"" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "a = torch.load(original_ckpt_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "b = load_file(comparison_safetensor_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['model.action_head.bias',\n", + " 'model.action_head.weight',\n", + " 'model.backbone.bn1.bias',\n", + " 'model.backbone.bn1.running_mean',\n", + " 'model.backbone.bn1.running_var',\n", + " 'model.backbone.bn1.weight',\n", + " 'model.backbone.conv1.weight',\n", + " 'model.backbone.layer1.0.bn1.bias',\n", + " 'model.backbone.layer1.0.bn1.running_mean',\n", + " 'model.backbone.layer1.0.bn1.running_var',\n", + " 'model.backbone.layer1.0.bn1.weight',\n", + " 'model.backbone.layer1.0.bn2.bias',\n", + " 'model.backbone.layer1.0.bn2.running_mean',\n", + " 'model.backbone.layer1.0.bn2.running_var',\n", + " 'model.backbone.layer1.0.bn2.weight',\n", + " 'model.backbone.layer1.0.conv1.weight',\n", + " 'model.backbone.layer1.0.conv2.weight',\n", + " 'model.backbone.layer1.1.bn1.bias',\n", + " 'model.backbone.layer1.1.bn1.running_mean',\n", + " 'model.backbone.layer1.1.bn1.running_var',\n", + " 'model.backbone.layer1.1.bn1.weight',\n", + " 'model.backbone.layer1.1.bn2.bias',\n", + " 'model.backbone.layer1.1.bn2.running_mean',\n", + " 'model.backbone.layer1.1.bn2.running_var',\n", + " 'model.backbone.layer1.1.bn2.weight',\n", + " 'model.backbone.layer1.1.conv1.weight',\n", + " 'model.backbone.layer1.1.conv2.weight',\n", + " 'model.backbone.layer2.0.bn1.bias',\n", + " 'model.backbone.layer2.0.bn1.running_mean',\n", + " 'model.backbone.layer2.0.bn1.running_var',\n", + " 'model.backbone.layer2.0.bn1.weight',\n", + " 'model.backbone.layer2.0.bn2.bias',\n", + " 'model.backbone.layer2.0.bn2.running_mean',\n", + " 'model.backbone.layer2.0.bn2.running_var',\n", + " 'model.backbone.layer2.0.bn2.weight',\n", + " 'model.backbone.layer2.0.conv1.weight',\n", + " 'model.backbone.layer2.0.conv2.weight',\n", + " 'model.backbone.layer2.0.downsample.0.weight',\n", + " 'model.backbone.layer2.0.downsample.1.bias',\n", + " 'model.backbone.layer2.0.downsample.1.running_mean',\n", + " 'model.backbone.layer2.0.downsample.1.running_var',\n", + " 'model.backbone.layer2.0.downsample.1.weight',\n", + " 'model.backbone.layer2.1.bn1.bias',\n", + " 'model.backbone.layer2.1.bn1.running_mean',\n", + " 'model.backbone.layer2.1.bn1.running_var',\n", + " 'model.backbone.layer2.1.bn1.weight',\n", + " 'model.backbone.layer2.1.bn2.bias',\n", + " 'model.backbone.layer2.1.bn2.running_mean',\n", + " 'model.backbone.layer2.1.bn2.running_var',\n", + " 'model.backbone.layer2.1.bn2.weight',\n", + " 'model.backbone.layer2.1.conv1.weight',\n", + " 'model.backbone.layer2.1.conv2.weight',\n", + " 'model.backbone.layer3.0.bn1.bias',\n", + " 'model.backbone.layer3.0.bn1.running_mean',\n", + " 'model.backbone.layer3.0.bn1.running_var',\n", + " 'model.backbone.layer3.0.bn1.weight',\n", + " 'model.backbone.layer3.0.bn2.bias',\n", + " 'model.backbone.layer3.0.bn2.running_mean',\n", + " 'model.backbone.layer3.0.bn2.running_var',\n", + " 'model.backbone.layer3.0.bn2.weight',\n", + " 'model.backbone.layer3.0.conv1.weight',\n", + " 'model.backbone.layer3.0.conv2.weight',\n", + " 'model.backbone.layer3.0.downsample.0.weight',\n", + " 'model.backbone.layer3.0.downsample.1.bias',\n", + " 'model.backbone.layer3.0.downsample.1.running_mean',\n", + " 'model.backbone.layer3.0.downsample.1.running_var',\n", + " 'model.backbone.layer3.0.downsample.1.weight',\n", + " 'model.backbone.layer3.1.bn1.bias',\n", + " 'model.backbone.layer3.1.bn1.running_mean',\n", + " 'model.backbone.layer3.1.bn1.running_var',\n", + " 'model.backbone.layer3.1.bn1.weight',\n", + " 'model.backbone.layer3.1.bn2.bias',\n", + " 'model.backbone.layer3.1.bn2.running_mean',\n", + " 'model.backbone.layer3.1.bn2.running_var',\n", + " 'model.backbone.layer3.1.bn2.weight',\n", + " 'model.backbone.layer3.1.conv1.weight',\n", + " 'model.backbone.layer3.1.conv2.weight',\n", + " 'model.backbone.layer4.0.bn1.bias',\n", + " 'model.backbone.layer4.0.bn1.running_mean',\n", + " 'model.backbone.layer4.0.bn1.running_var',\n", + " 'model.backbone.layer4.0.bn1.weight',\n", + " 'model.backbone.layer4.0.bn2.bias',\n", + " 'model.backbone.layer4.0.bn2.running_mean',\n", + " 'model.backbone.layer4.0.bn2.running_var',\n", + " 'model.backbone.layer4.0.bn2.weight',\n", + " 'model.backbone.layer4.0.conv1.weight',\n", + " 'model.backbone.layer4.0.conv2.weight',\n", + " 'model.backbone.layer4.0.downsample.0.weight',\n", + " 'model.backbone.layer4.0.downsample.1.bias',\n", + " 'model.backbone.layer4.0.downsample.1.running_mean',\n", + " 'model.backbone.layer4.0.downsample.1.running_var',\n", + " 'model.backbone.layer4.0.downsample.1.weight',\n", + " 'model.backbone.layer4.1.bn1.bias',\n", + " 'model.backbone.layer4.1.bn1.running_mean',\n", + " 'model.backbone.layer4.1.bn1.running_var',\n", + " 'model.backbone.layer4.1.bn1.weight',\n", + " 'model.backbone.layer4.1.bn2.bias',\n", + " 'model.backbone.layer4.1.bn2.running_mean',\n", + " 'model.backbone.layer4.1.bn2.running_var',\n", + " 'model.backbone.layer4.1.bn2.weight',\n", + " 'model.backbone.layer4.1.conv1.weight',\n", + " 'model.backbone.layer4.1.conv2.weight',\n", + " 'model.decoder.layers.0.linear1.bias',\n", + " 'model.decoder.layers.0.linear1.weight',\n", + " 'model.decoder.layers.0.linear2.bias',\n", + " 'model.decoder.layers.0.linear2.weight',\n", + " 'model.decoder.layers.0.multihead_attn.in_proj_bias',\n", + " 'model.decoder.layers.0.multihead_attn.in_proj_weight',\n", + " 'model.decoder.layers.0.multihead_attn.out_proj.bias',\n", + " 'model.decoder.layers.0.multihead_attn.out_proj.weight',\n", + " 'model.decoder.layers.0.norm1.bias',\n", + " 'model.decoder.layers.0.norm1.weight',\n", + " 'model.decoder.layers.0.norm2.bias',\n", + " 'model.decoder.layers.0.norm2.weight',\n", + " 'model.decoder.layers.0.norm3.bias',\n", + " 'model.decoder.layers.0.norm3.weight',\n", + " 'model.decoder.layers.0.self_attn.in_proj_bias',\n", + " 'model.decoder.layers.0.self_attn.in_proj_weight',\n", + " 'model.decoder.layers.0.self_attn.out_proj.bias',\n", + " 'model.decoder.layers.0.self_attn.out_proj.weight',\n", + " 'model.decoder_pos_embed.weight',\n", + " 'model.encoder.layers.0.linear1.bias',\n", + " 'model.encoder.layers.0.linear1.weight',\n", + " 'model.encoder.layers.0.linear2.bias',\n", + " 'model.encoder.layers.0.linear2.weight',\n", + " 'model.encoder.layers.0.norm1.bias',\n", + " 'model.encoder.layers.0.norm1.weight',\n", + " 'model.encoder.layers.0.norm2.bias',\n", + " 'model.encoder.layers.0.norm2.weight',\n", + " 'model.encoder.layers.0.self_attn.in_proj_bias',\n", + " 'model.encoder.layers.0.self_attn.in_proj_weight',\n", + " 'model.encoder.layers.0.self_attn.out_proj.bias',\n", + " 'model.encoder.layers.0.self_attn.out_proj.weight',\n", + " 'model.encoder.layers.1.linear1.bias',\n", + " 'model.encoder.layers.1.linear1.weight',\n", + " 'model.encoder.layers.1.linear2.bias',\n", + " 'model.encoder.layers.1.linear2.weight',\n", + " 'model.encoder.layers.1.norm1.bias',\n", + " 'model.encoder.layers.1.norm1.weight',\n", + " 'model.encoder.layers.1.norm2.bias',\n", + " 'model.encoder.layers.1.norm2.weight',\n", + " 'model.encoder.layers.1.self_attn.in_proj_bias',\n", + " 'model.encoder.layers.1.self_attn.in_proj_weight',\n", + " 'model.encoder.layers.1.self_attn.out_proj.bias',\n", + " 'model.encoder.layers.1.self_attn.out_proj.weight',\n", + " 'model.encoder.layers.2.linear1.bias',\n", + " 'model.encoder.layers.2.linear1.weight',\n", + " 'model.encoder.layers.2.linear2.bias',\n", + " 'model.encoder.layers.2.linear2.weight',\n", + " 'model.encoder.layers.2.norm1.bias',\n", + " 'model.encoder.layers.2.norm1.weight',\n", + " 'model.encoder.layers.2.norm2.bias',\n", + " 'model.encoder.layers.2.norm2.weight',\n", + " 'model.encoder.layers.2.self_attn.in_proj_bias',\n", + " 'model.encoder.layers.2.self_attn.in_proj_weight',\n", + " 'model.encoder.layers.2.self_attn.out_proj.bias',\n", + " 'model.encoder.layers.2.self_attn.out_proj.weight',\n", + " 'model.encoder.layers.3.linear1.bias',\n", + " 'model.encoder.layers.3.linear1.weight',\n", + " 'model.encoder.layers.3.linear2.bias',\n", + " 'model.encoder.layers.3.linear2.weight',\n", + " 'model.encoder.layers.3.norm1.bias',\n", + " 'model.encoder.layers.3.norm1.weight',\n", + " 'model.encoder.layers.3.norm2.bias',\n", + " 'model.encoder.layers.3.norm2.weight',\n", + " 'model.encoder.layers.3.self_attn.in_proj_bias',\n", + " 'model.encoder.layers.3.self_attn.in_proj_weight',\n", + " 'model.encoder.layers.3.self_attn.out_proj.bias',\n", + " 'model.encoder.layers.3.self_attn.out_proj.weight',\n", + " 'model.encoder_img_feat_input_proj.bias',\n", + " 'model.encoder_img_feat_input_proj.weight',\n", + " 'model.encoder_latent_input_proj.bias',\n", + " 'model.encoder_latent_input_proj.weight',\n", + " 'model.encoder_robot_and_latent_pos_embed.weight',\n", + " 'model.encoder_robot_state_input_proj.bias',\n", + " 'model.encoder_robot_state_input_proj.weight',\n", + " 'model.vae_encoder.layers.0.linear1.bias',\n", + " 'model.vae_encoder.layers.0.linear1.weight',\n", + " 'model.vae_encoder.layers.0.linear2.bias',\n", + " 'model.vae_encoder.layers.0.linear2.weight',\n", + " 'model.vae_encoder.layers.0.norm1.bias',\n", + " 'model.vae_encoder.layers.0.norm1.weight',\n", + " 'model.vae_encoder.layers.0.norm2.bias',\n", + " 'model.vae_encoder.layers.0.norm2.weight',\n", + " 'model.vae_encoder.layers.0.self_attn.in_proj_bias',\n", + " 'model.vae_encoder.layers.0.self_attn.in_proj_weight',\n", + " 'model.vae_encoder.layers.0.self_attn.out_proj.bias',\n", + " 'model.vae_encoder.layers.0.self_attn.out_proj.weight',\n", + " 'model.vae_encoder.layers.1.linear1.bias',\n", + " 'model.vae_encoder.layers.1.linear1.weight',\n", + " 'model.vae_encoder.layers.1.linear2.bias',\n", + " 'model.vae_encoder.layers.1.linear2.weight',\n", + " 'model.vae_encoder.layers.1.norm1.bias',\n", + " 'model.vae_encoder.layers.1.norm1.weight',\n", + " 'model.vae_encoder.layers.1.norm2.bias',\n", + " 'model.vae_encoder.layers.1.norm2.weight',\n", + " 'model.vae_encoder.layers.1.self_attn.in_proj_bias',\n", + " 'model.vae_encoder.layers.1.self_attn.in_proj_weight',\n", + " 'model.vae_encoder.layers.1.self_attn.out_proj.bias',\n", + " 'model.vae_encoder.layers.1.self_attn.out_proj.weight',\n", + " 'model.vae_encoder.layers.2.linear1.bias',\n", + " 'model.vae_encoder.layers.2.linear1.weight',\n", + " 'model.vae_encoder.layers.2.linear2.bias',\n", + " 'model.vae_encoder.layers.2.linear2.weight',\n", + " 'model.vae_encoder.layers.2.norm1.bias',\n", + " 'model.vae_encoder.layers.2.norm1.weight',\n", + " 'model.vae_encoder.layers.2.norm2.bias',\n", + " 'model.vae_encoder.layers.2.norm2.weight',\n", + " 'model.vae_encoder.layers.2.self_attn.in_proj_bias',\n", + " 'model.vae_encoder.layers.2.self_attn.in_proj_weight',\n", + " 'model.vae_encoder.layers.2.self_attn.out_proj.bias',\n", + " 'model.vae_encoder.layers.2.self_attn.out_proj.weight',\n", + " 'model.vae_encoder.layers.3.linear1.bias',\n", + " 'model.vae_encoder.layers.3.linear1.weight',\n", + " 'model.vae_encoder.layers.3.linear2.bias',\n", + " 'model.vae_encoder.layers.3.linear2.weight',\n", + " 'model.vae_encoder.layers.3.norm1.bias',\n", + " 'model.vae_encoder.layers.3.norm1.weight',\n", + " 'model.vae_encoder.layers.3.norm2.bias',\n", + " 'model.vae_encoder.layers.3.norm2.weight',\n", + " 'model.vae_encoder.layers.3.self_attn.in_proj_bias',\n", + " 'model.vae_encoder.layers.3.self_attn.in_proj_weight',\n", + " 'model.vae_encoder.layers.3.self_attn.out_proj.bias',\n", + " 'model.vae_encoder.layers.3.self_attn.out_proj.weight',\n", + " 'model.vae_encoder_action_input_proj.bias',\n", + " 'model.vae_encoder_action_input_proj.weight',\n", + " 'model.vae_encoder_cls_embed.weight',\n", + " 'model.vae_encoder_latent_output_proj.bias',\n", + " 'model.vae_encoder_latent_output_proj.weight',\n", + " 'model.vae_encoder_pos_enc',\n", + " 'model.vae_encoder_robot_state_input_proj.bias',\n", + " 'model.vae_encoder_robot_state_input_proj.weight',\n", + " 'normalize_inputs.buffer_observation_images_front.mean',\n", + " 'normalize_inputs.buffer_observation_images_front.std',\n", + " 'normalize_inputs.buffer_observation_images_top.mean',\n", + " 'normalize_inputs.buffer_observation_images_top.std',\n", + " 'normalize_inputs.buffer_observation_state.mean',\n", + " 'normalize_inputs.buffer_observation_state.std',\n", + " 'normalize_targets.buffer_action.mean',\n", + " 'normalize_targets.buffer_action.std',\n", + " 'unnormalize_outputs.buffer_action.mean',\n", + " 'unnormalize_outputs.buffer_action.std']\n" + ] + } + ], + "source": [ + "dest = list(b.keys())\n", + "pprint(dest)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['model.pos_table',\n", + " 'model.transformer.encoder.layers.0.self_attn.in_proj_weight',\n", + " 'model.transformer.encoder.layers.0.self_attn.in_proj_bias',\n", + " 'model.transformer.encoder.layers.0.self_attn.out_proj.weight',\n", + " 'model.transformer.encoder.layers.0.self_attn.out_proj.bias',\n", + " 'model.transformer.encoder.layers.0.linear1.weight',\n", + " 'model.transformer.encoder.layers.0.linear1.bias',\n", + " 'model.transformer.encoder.layers.0.linear2.weight',\n", + " 'model.transformer.encoder.layers.0.linear2.bias',\n", + " 'model.transformer.encoder.layers.0.norm1.weight',\n", + " 'model.transformer.encoder.layers.0.norm1.bias',\n", + " 'model.transformer.encoder.layers.0.norm2.weight',\n", + " 'model.transformer.encoder.layers.0.norm2.bias',\n", + " 'model.transformer.encoder.layers.1.self_attn.in_proj_weight',\n", + " 'model.transformer.encoder.layers.1.self_attn.in_proj_bias',\n", + " 'model.transformer.encoder.layers.1.self_attn.out_proj.weight',\n", + " 'model.transformer.encoder.layers.1.self_attn.out_proj.bias',\n", + " 'model.transformer.encoder.layers.1.linear1.weight',\n", + " 'model.transformer.encoder.layers.1.linear1.bias',\n", + " 'model.transformer.encoder.layers.1.linear2.weight',\n", + " 'model.transformer.encoder.layers.1.linear2.bias',\n", + " 'model.transformer.encoder.layers.1.norm1.weight',\n", + " 'model.transformer.encoder.layers.1.norm1.bias',\n", + " 'model.transformer.encoder.layers.1.norm2.weight',\n", + " 'model.transformer.encoder.layers.1.norm2.bias',\n", + " 'model.transformer.encoder.layers.2.self_attn.in_proj_weight',\n", + " 'model.transformer.encoder.layers.2.self_attn.in_proj_bias',\n", + " 'model.transformer.encoder.layers.2.self_attn.out_proj.weight',\n", + " 'model.transformer.encoder.layers.2.self_attn.out_proj.bias',\n", + " 'model.transformer.encoder.layers.2.linear1.weight',\n", + " 'model.transformer.encoder.layers.2.linear1.bias',\n", + " 'model.transformer.encoder.layers.2.linear2.weight',\n", + " 'model.transformer.encoder.layers.2.linear2.bias',\n", + " 'model.transformer.encoder.layers.2.norm1.weight',\n", + " 'model.transformer.encoder.layers.2.norm1.bias',\n", + " 'model.transformer.encoder.layers.2.norm2.weight',\n", + " 'model.transformer.encoder.layers.2.norm2.bias',\n", + " 'model.transformer.encoder.layers.3.self_attn.in_proj_weight',\n", + " 'model.transformer.encoder.layers.3.self_attn.in_proj_bias',\n", + " 'model.transformer.encoder.layers.3.self_attn.out_proj.weight',\n", + " 'model.transformer.encoder.layers.3.self_attn.out_proj.bias',\n", + " 'model.transformer.encoder.layers.3.linear1.weight',\n", + " 'model.transformer.encoder.layers.3.linear1.bias',\n", + " 'model.transformer.encoder.layers.3.linear2.weight',\n", + " 'model.transformer.encoder.layers.3.linear2.bias',\n", + " 'model.transformer.encoder.layers.3.norm1.weight',\n", + " 'model.transformer.encoder.layers.3.norm1.bias',\n", + " 'model.transformer.encoder.layers.3.norm2.weight',\n", + " 'model.transformer.encoder.layers.3.norm2.bias',\n", + " 'model.transformer.decoder.layers.0.self_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.0.self_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.0.self_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.0.self_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.0.multihead_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.0.multihead_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.0.multihead_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.0.multihead_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.0.linear1.weight',\n", + " 'model.transformer.decoder.layers.0.linear1.bias',\n", + " 'model.transformer.decoder.layers.0.linear2.weight',\n", + " 'model.transformer.decoder.layers.0.linear2.bias',\n", + " 'model.transformer.decoder.layers.0.norm1.weight',\n", + " 'model.transformer.decoder.layers.0.norm1.bias',\n", + " 'model.transformer.decoder.layers.0.norm2.weight',\n", + " 'model.transformer.decoder.layers.0.norm2.bias',\n", + " 'model.transformer.decoder.layers.0.norm3.weight',\n", + " 'model.transformer.decoder.layers.0.norm3.bias',\n", + " 'model.transformer.decoder.layers.1.self_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.1.self_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.1.self_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.1.self_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.1.multihead_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.1.multihead_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.1.multihead_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.1.multihead_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.1.linear1.weight',\n", + " 'model.transformer.decoder.layers.1.linear1.bias',\n", + " 'model.transformer.decoder.layers.1.linear2.weight',\n", + " 'model.transformer.decoder.layers.1.linear2.bias',\n", + " 'model.transformer.decoder.layers.1.norm1.weight',\n", + " 'model.transformer.decoder.layers.1.norm1.bias',\n", + " 'model.transformer.decoder.layers.1.norm2.weight',\n", + " 'model.transformer.decoder.layers.1.norm2.bias',\n", + " 'model.transformer.decoder.layers.1.norm3.weight',\n", + " 'model.transformer.decoder.layers.1.norm3.bias',\n", + " 'model.transformer.decoder.layers.2.self_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.2.self_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.2.self_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.2.self_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.2.multihead_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.2.multihead_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.2.multihead_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.2.multihead_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.2.linear1.weight',\n", + " 'model.transformer.decoder.layers.2.linear1.bias',\n", + " 'model.transformer.decoder.layers.2.linear2.weight',\n", + " 'model.transformer.decoder.layers.2.linear2.bias',\n", + " 'model.transformer.decoder.layers.2.norm1.weight',\n", + " 'model.transformer.decoder.layers.2.norm1.bias',\n", + " 'model.transformer.decoder.layers.2.norm2.weight',\n", + " 'model.transformer.decoder.layers.2.norm2.bias',\n", + " 'model.transformer.decoder.layers.2.norm3.weight',\n", + " 'model.transformer.decoder.layers.2.norm3.bias',\n", + " 'model.transformer.decoder.layers.3.self_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.3.self_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.3.self_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.3.self_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.3.multihead_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.3.multihead_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.3.multihead_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.3.multihead_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.3.linear1.weight',\n", + " 'model.transformer.decoder.layers.3.linear1.bias',\n", + " 'model.transformer.decoder.layers.3.linear2.weight',\n", + " 'model.transformer.decoder.layers.3.linear2.bias',\n", + " 'model.transformer.decoder.layers.3.norm1.weight',\n", + " 'model.transformer.decoder.layers.3.norm1.bias',\n", + " 'model.transformer.decoder.layers.3.norm2.weight',\n", + " 'model.transformer.decoder.layers.3.norm2.bias',\n", + " 'model.transformer.decoder.layers.3.norm3.weight',\n", + " 'model.transformer.decoder.layers.3.norm3.bias',\n", + " 'model.transformer.decoder.layers.4.self_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.4.self_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.4.self_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.4.self_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.4.multihead_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.4.multihead_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.4.multihead_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.4.multihead_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.4.linear1.weight',\n", + " 'model.transformer.decoder.layers.4.linear1.bias',\n", + " 'model.transformer.decoder.layers.4.linear2.weight',\n", + " 'model.transformer.decoder.layers.4.linear2.bias',\n", + " 'model.transformer.decoder.layers.4.norm1.weight',\n", + " 'model.transformer.decoder.layers.4.norm1.bias',\n", + " 'model.transformer.decoder.layers.4.norm2.weight',\n", + " 'model.transformer.decoder.layers.4.norm2.bias',\n", + " 'model.transformer.decoder.layers.4.norm3.weight',\n", + " 'model.transformer.decoder.layers.4.norm3.bias',\n", + " 'model.transformer.decoder.layers.5.self_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.5.self_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.5.self_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.5.self_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.5.multihead_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.5.multihead_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.5.multihead_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.5.multihead_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.5.linear1.weight',\n", + " 'model.transformer.decoder.layers.5.linear1.bias',\n", + " 'model.transformer.decoder.layers.5.linear2.weight',\n", + " 'model.transformer.decoder.layers.5.linear2.bias',\n", + " 'model.transformer.decoder.layers.5.norm1.weight',\n", + " 'model.transformer.decoder.layers.5.norm1.bias',\n", + " 'model.transformer.decoder.layers.5.norm2.weight',\n", + " 'model.transformer.decoder.layers.5.norm2.bias',\n", + " 'model.transformer.decoder.layers.5.norm3.weight',\n", + " 'model.transformer.decoder.layers.5.norm3.bias',\n", + " 'model.transformer.decoder.layers.6.self_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.6.self_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.6.self_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.6.self_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.6.multihead_attn.in_proj_weight',\n", + " 'model.transformer.decoder.layers.6.multihead_attn.in_proj_bias',\n", + " 'model.transformer.decoder.layers.6.multihead_attn.out_proj.weight',\n", + " 'model.transformer.decoder.layers.6.multihead_attn.out_proj.bias',\n", + " 'model.transformer.decoder.layers.6.linear1.weight',\n", + " 'model.transformer.decoder.layers.6.linear1.bias',\n", + " 'model.transformer.decoder.layers.6.linear2.weight',\n", + " 'model.transformer.decoder.layers.6.linear2.bias',\n", + " 'model.transformer.decoder.layers.6.norm1.weight',\n", + " 'model.transformer.decoder.layers.6.norm1.bias',\n", + " 'model.transformer.decoder.layers.6.norm2.weight',\n", + " 'model.transformer.decoder.layers.6.norm2.bias',\n", + " 'model.transformer.decoder.layers.6.norm3.weight',\n", + " 'model.transformer.decoder.layers.6.norm3.bias',\n", + " 'model.transformer.decoder.norm.weight',\n", + " 'model.transformer.decoder.norm.bias',\n", + " 'model.encoder.layers.0.self_attn.in_proj_weight',\n", + " 'model.encoder.layers.0.self_attn.in_proj_bias',\n", + " 'model.encoder.layers.0.self_attn.out_proj.weight',\n", + " 'model.encoder.layers.0.self_attn.out_proj.bias',\n", + " 'model.encoder.layers.0.linear1.weight',\n", + " 'model.encoder.layers.0.linear1.bias',\n", + " 'model.encoder.layers.0.linear2.weight',\n", + " 'model.encoder.layers.0.linear2.bias',\n", + " 'model.encoder.layers.0.norm1.weight',\n", + " 'model.encoder.layers.0.norm1.bias',\n", + " 'model.encoder.layers.0.norm2.weight',\n", + " 'model.encoder.layers.0.norm2.bias',\n", + " 'model.encoder.layers.1.self_attn.in_proj_weight',\n", + " 'model.encoder.layers.1.self_attn.in_proj_bias',\n", + " 'model.encoder.layers.1.self_attn.out_proj.weight',\n", + " 'model.encoder.layers.1.self_attn.out_proj.bias',\n", + " 'model.encoder.layers.1.linear1.weight',\n", + " 'model.encoder.layers.1.linear1.bias',\n", + " 'model.encoder.layers.1.linear2.weight',\n", + " 'model.encoder.layers.1.linear2.bias',\n", + " 'model.encoder.layers.1.norm1.weight',\n", + " 'model.encoder.layers.1.norm1.bias',\n", + " 'model.encoder.layers.1.norm2.weight',\n", + " 'model.encoder.layers.1.norm2.bias',\n", + " 'model.encoder.layers.2.self_attn.in_proj_weight',\n", + " 'model.encoder.layers.2.self_attn.in_proj_bias',\n", + " 'model.encoder.layers.2.self_attn.out_proj.weight',\n", + " 'model.encoder.layers.2.self_attn.out_proj.bias',\n", + " 'model.encoder.layers.2.linear1.weight',\n", + " 'model.encoder.layers.2.linear1.bias',\n", + " 'model.encoder.layers.2.linear2.weight',\n", + " 'model.encoder.layers.2.linear2.bias',\n", + " 'model.encoder.layers.2.norm1.weight',\n", + " 'model.encoder.layers.2.norm1.bias',\n", + " 'model.encoder.layers.2.norm2.weight',\n", + " 'model.encoder.layers.2.norm2.bias',\n", + " 'model.encoder.layers.3.self_attn.in_proj_weight',\n", + " 'model.encoder.layers.3.self_attn.in_proj_bias',\n", + " 'model.encoder.layers.3.self_attn.out_proj.weight',\n", + " 'model.encoder.layers.3.self_attn.out_proj.bias',\n", + " 'model.encoder.layers.3.linear1.weight',\n", + " 'model.encoder.layers.3.linear1.bias',\n", + " 'model.encoder.layers.3.linear2.weight',\n", + " 'model.encoder.layers.3.linear2.bias',\n", + " 'model.encoder.layers.3.norm1.weight',\n", + " 'model.encoder.layers.3.norm1.bias',\n", + " 'model.encoder.layers.3.norm2.weight',\n", + " 'model.encoder.layers.3.norm2.bias',\n", + " 'model.action_head.weight',\n", + " 'model.action_head.bias',\n", + " 'model.is_pad_head.weight',\n", + " 'model.is_pad_head.bias',\n", + " 'model.query_embed.weight',\n", + " 'model.input_proj.weight',\n", + " 'model.input_proj.bias',\n", + " 'model.backbones.0.0.body.conv1.weight',\n", + " 'model.backbones.0.0.body.bn1.weight',\n", + " 'model.backbones.0.0.body.bn1.bias',\n", + " 'model.backbones.0.0.body.bn1.running_mean',\n", + " 'model.backbones.0.0.body.bn1.running_var',\n", + " 'model.backbones.0.0.body.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer1.0.conv1.weight',\n", + " 'model.backbones.0.0.body.layer1.0.bn1.weight',\n", + " 'model.backbones.0.0.body.layer1.0.bn1.bias',\n", + " 'model.backbones.0.0.body.layer1.0.bn1.running_mean',\n", + " 'model.backbones.0.0.body.layer1.0.bn1.running_var',\n", + " 'model.backbones.0.0.body.layer1.0.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer1.0.conv2.weight',\n", + " 'model.backbones.0.0.body.layer1.0.bn2.weight',\n", + " 'model.backbones.0.0.body.layer1.0.bn2.bias',\n", + " 'model.backbones.0.0.body.layer1.0.bn2.running_mean',\n", + " 'model.backbones.0.0.body.layer1.0.bn2.running_var',\n", + " 'model.backbones.0.0.body.layer1.0.bn2.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer1.1.conv1.weight',\n", + " 'model.backbones.0.0.body.layer1.1.bn1.weight',\n", + " 'model.backbones.0.0.body.layer1.1.bn1.bias',\n", + " 'model.backbones.0.0.body.layer1.1.bn1.running_mean',\n", + " 'model.backbones.0.0.body.layer1.1.bn1.running_var',\n", + " 'model.backbones.0.0.body.layer1.1.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer1.1.conv2.weight',\n", + " 'model.backbones.0.0.body.layer1.1.bn2.weight',\n", + " 'model.backbones.0.0.body.layer1.1.bn2.bias',\n", + " 'model.backbones.0.0.body.layer1.1.bn2.running_mean',\n", + " 'model.backbones.0.0.body.layer1.1.bn2.running_var',\n", + " 'model.backbones.0.0.body.layer1.1.bn2.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer2.0.conv1.weight',\n", + " 'model.backbones.0.0.body.layer2.0.bn1.weight',\n", + " 'model.backbones.0.0.body.layer2.0.bn1.bias',\n", + " 'model.backbones.0.0.body.layer2.0.bn1.running_mean',\n", + " 'model.backbones.0.0.body.layer2.0.bn1.running_var',\n", + " 'model.backbones.0.0.body.layer2.0.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer2.0.conv2.weight',\n", + " 'model.backbones.0.0.body.layer2.0.bn2.weight',\n", + " 'model.backbones.0.0.body.layer2.0.bn2.bias',\n", + " 'model.backbones.0.0.body.layer2.0.bn2.running_mean',\n", + " 'model.backbones.0.0.body.layer2.0.bn2.running_var',\n", + " 'model.backbones.0.0.body.layer2.0.bn2.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer2.0.downsample.0.weight',\n", + " 'model.backbones.0.0.body.layer2.0.downsample.1.weight',\n", + " 'model.backbones.0.0.body.layer2.0.downsample.1.bias',\n", + " 'model.backbones.0.0.body.layer2.0.downsample.1.running_mean',\n", + " 'model.backbones.0.0.body.layer2.0.downsample.1.running_var',\n", + " 'model.backbones.0.0.body.layer2.0.downsample.1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer2.1.conv1.weight',\n", + " 'model.backbones.0.0.body.layer2.1.bn1.weight',\n", + " 'model.backbones.0.0.body.layer2.1.bn1.bias',\n", + " 'model.backbones.0.0.body.layer2.1.bn1.running_mean',\n", + " 'model.backbones.0.0.body.layer2.1.bn1.running_var',\n", + " 'model.backbones.0.0.body.layer2.1.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer2.1.conv2.weight',\n", + " 'model.backbones.0.0.body.layer2.1.bn2.weight',\n", + " 'model.backbones.0.0.body.layer2.1.bn2.bias',\n", + " 'model.backbones.0.0.body.layer2.1.bn2.running_mean',\n", + " 'model.backbones.0.0.body.layer2.1.bn2.running_var',\n", + " 'model.backbones.0.0.body.layer2.1.bn2.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer3.0.conv1.weight',\n", + " 'model.backbones.0.0.body.layer3.0.bn1.weight',\n", + " 'model.backbones.0.0.body.layer3.0.bn1.bias',\n", + " 'model.backbones.0.0.body.layer3.0.bn1.running_mean',\n", + " 'model.backbones.0.0.body.layer3.0.bn1.running_var',\n", + " 'model.backbones.0.0.body.layer3.0.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer3.0.conv2.weight',\n", + " 'model.backbones.0.0.body.layer3.0.bn2.weight',\n", + " 'model.backbones.0.0.body.layer3.0.bn2.bias',\n", + " 'model.backbones.0.0.body.layer3.0.bn2.running_mean',\n", + " 'model.backbones.0.0.body.layer3.0.bn2.running_var',\n", + " 'model.backbones.0.0.body.layer3.0.bn2.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer3.0.downsample.0.weight',\n", + " 'model.backbones.0.0.body.layer3.0.downsample.1.weight',\n", + " 'model.backbones.0.0.body.layer3.0.downsample.1.bias',\n", + " 'model.backbones.0.0.body.layer3.0.downsample.1.running_mean',\n", + " 'model.backbones.0.0.body.layer3.0.downsample.1.running_var',\n", + " 'model.backbones.0.0.body.layer3.0.downsample.1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer3.1.conv1.weight',\n", + " 'model.backbones.0.0.body.layer3.1.bn1.weight',\n", + " 'model.backbones.0.0.body.layer3.1.bn1.bias',\n", + " 'model.backbones.0.0.body.layer3.1.bn1.running_mean',\n", + " 'model.backbones.0.0.body.layer3.1.bn1.running_var',\n", + " 'model.backbones.0.0.body.layer3.1.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer3.1.conv2.weight',\n", + " 'model.backbones.0.0.body.layer3.1.bn2.weight',\n", + " 'model.backbones.0.0.body.layer3.1.bn2.bias',\n", + " 'model.backbones.0.0.body.layer3.1.bn2.running_mean',\n", + " 'model.backbones.0.0.body.layer3.1.bn2.running_var',\n", + " 'model.backbones.0.0.body.layer3.1.bn2.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer4.0.conv1.weight',\n", + " 'model.backbones.0.0.body.layer4.0.bn1.weight',\n", + " 'model.backbones.0.0.body.layer4.0.bn1.bias',\n", + " 'model.backbones.0.0.body.layer4.0.bn1.running_mean',\n", + " 'model.backbones.0.0.body.layer4.0.bn1.running_var',\n", + " 'model.backbones.0.0.body.layer4.0.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer4.0.conv2.weight',\n", + " 'model.backbones.0.0.body.layer4.0.bn2.weight',\n", + " 'model.backbones.0.0.body.layer4.0.bn2.bias',\n", + " 'model.backbones.0.0.body.layer4.0.bn2.running_mean',\n", + " 'model.backbones.0.0.body.layer4.0.bn2.running_var',\n", + " 'model.backbones.0.0.body.layer4.0.bn2.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer4.0.downsample.0.weight',\n", + " 'model.backbones.0.0.body.layer4.0.downsample.1.weight',\n", + " 'model.backbones.0.0.body.layer4.0.downsample.1.bias',\n", + " 'model.backbones.0.0.body.layer4.0.downsample.1.running_mean',\n", + " 'model.backbones.0.0.body.layer4.0.downsample.1.running_var',\n", + " 'model.backbones.0.0.body.layer4.0.downsample.1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer4.1.conv1.weight',\n", + " 'model.backbones.0.0.body.layer4.1.bn1.weight',\n", + " 'model.backbones.0.0.body.layer4.1.bn1.bias',\n", + " 'model.backbones.0.0.body.layer4.1.bn1.running_mean',\n", + " 'model.backbones.0.0.body.layer4.1.bn1.running_var',\n", + " 'model.backbones.0.0.body.layer4.1.bn1.num_batches_tracked',\n", + " 'model.backbones.0.0.body.layer4.1.conv2.weight',\n", + " 'model.backbones.0.0.body.layer4.1.bn2.weight',\n", + " 'model.backbones.0.0.body.layer4.1.bn2.bias',\n", + " 'model.backbones.0.0.body.layer4.1.bn2.running_mean',\n", + " 'model.backbones.0.0.body.layer4.1.bn2.running_var',\n", + " 'model.backbones.0.0.body.layer4.1.bn2.num_batches_tracked',\n", + " 'model.input_proj_robot_state.weight',\n", + " 'model.input_proj_robot_state.bias',\n", + " 'model.cls_embed.weight',\n", + " 'model.encoder_action_proj.weight',\n", + " 'model.encoder_action_proj.bias',\n", + " 'model.encoder_joint_proj.weight',\n", + " 'model.encoder_joint_proj.bias',\n", + " 'model.latent_proj.weight',\n", + " 'model.latent_proj.bias',\n", + " 'model.latent_out_proj.weight',\n", + " 'model.latent_out_proj.bias',\n", + " 'model.additional_pos_embed.weight']\n" + ] + } + ], + "source": [ + "orig = list(a.keys())\n", + "pprint(orig)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "a = torch.load(original_ckpt_path)\n", + "\n", + "to_remove_startswith = ['model.transformer.decoder.layers.1.',\n", + " 'model.transformer.decoder.layers.2.',\n", + " 'model.transformer.decoder.layers.3.',\n", + " 'model.transformer.decoder.layers.4.',\n", + " 'model.transformer.decoder.layers.5.',\n", + " 'model.transformer.decoder.layers.6.',\n", + " 'model.transformer.decoder.norm.',\n", + " 'model.is_pad_head']\n", + "\n", + "to_remove_in = ['num_batches_tracked',]\n", + "\n", + "conv = {}\n", + "\n", + "keys = list(a.keys())\n", + "for k in keys:\n", + " if any(k.startswith(tr) for tr in to_remove_startswith):\n", + " a.pop(k)\n", + " continue\n", + " if any(tr in k for tr in to_remove_in):\n", + " a.pop(k)\n", + " continue\n", + " if k.startswith('model.transformer.encoder.layers.'):\n", + " conv[k.replace('transformer.', '')] = a.pop(k)\n", + " if k.startswith('model.transformer.decoder.layers.0.'):\n", + " conv[k.replace('transformer.', '')] = a.pop(k)\n", + " if k.startswith('model.encoder.layers.'):\n", + " conv[k.replace('encoder.', 'vae_encoder.')] = a.pop(k)\n", + " if k.startswith('model.action_head.'):\n", + " conv[k] = a.pop(k)\n", + " if k.startswith('model.pos_table'):\n", + " conv[k.replace('pos_table', 'vae_encoder_pos_enc')] = a.pop(k)\n", + " if k.startswith('model.query_embed.'):\n", + " conv[k.replace('query_embed', 'decoder_pos_embed')] = a.pop(k)\n", + " if k.startswith('model.input_proj.'):\n", + " conv[k.replace('input_proj.', 'encoder_img_feat_input_proj.')] = a.pop(k)\n", + " if k.startswith('model.input_proj_robot_state.'):\n", + " conv[k.replace('input_proj_robot_state.', 'encoder_robot_state_input_proj.')] = a.pop(k)\n", + " if k.startswith('model.backbones.0.0.body.'):\n", + " conv[k.replace('backbones.0.0.body', 'backbone')] = a.pop(k)\n", + " if k.startswith('model.cls_embed.'):\n", + " conv[k.replace('cls_embed', 'vae_encoder_cls_embed')] = a.pop(k)\n", + " if k.startswith('model.encoder_action_proj.'):\n", + " conv[k.replace('encoder_action_proj', 'vae_encoder_action_input_proj')] = a.pop(k)\n", + " if k.startswith('model.encoder_joint_proj.'):\n", + " conv[k.replace('encoder_joint_proj', 'vae_encoder_robot_state_input_proj')] = a.pop(k)\n", + " if k.startswith('model.latent_proj.'):\n", + " conv[k.replace('latent_proj', 'vae_encoder_latent_output_proj')] = a.pop(k)\n", + " if k.startswith('model.latent_out_proj.'):\n", + " conv[k.replace('latent_out_proj', 'encoder_latent_input_proj')] = a.pop(k)\n", + " if k.startswith('model.additional_pos_embed.'):\n", + " conv[k.replace('additional_pos_embed', 'encoder_robot_and_latent_pos_embed')] = a.pop(k)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "OrderedDict()" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "for k, v in conv.items():\n", + " assert b[k].shape == v.shape\n", + " b[k] = v" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "save_file(b, converted_ckpt_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/thomwolf/Documents/Github/ACT/checkpoints/blue_red_sort/config.yaml'" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now also copy the config files\n", + "import shutil\n", + "shutil.copy(comparison_config_json_path, converted_ckpt_path.replace('model.safetensors', 'config.json'))\n", + "shutil.copy(comparison_config_yaml_path, converted_ckpt_path.replace('model.safetensors', 'config.yaml'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "lerobot", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/real_robot_example/gym_real_world/__init__.py b/examples/real_robot_example/gym_real_world/__init__.py new file mode 100644 index 00000000..713d5c8a --- /dev/null +++ b/examples/real_robot_example/gym_real_world/__init__.py @@ -0,0 +1,8 @@ +from gymnasium.envs.registration import register + +register( + id="gym_real_world/RealEnv-v0", + entry_point="gym_real_world.gym_environment:RealEnv", + max_episode_steps=300, + nondeterministic=True, +) diff --git a/examples/real_robot_example/gym_real_world/dynamixel.py b/examples/real_robot_example/gym_real_world/dynamixel.py new file mode 100644 index 00000000..7bc9730f --- /dev/null +++ b/examples/real_robot_example/gym_real_world/dynamixel.py @@ -0,0 +1,363 @@ +# ruff: noqa +"""From Alexander Koch low_cost_robot codebase at https://github.com/AlexanderKoch-Koch/low_cost_robot +Dynamixel class to control the dynamixel servos +""" + +from __future__ import annotations + +import enum +import math +import os +from dataclasses import dataclass + +import numpy as np +from dynamixel_sdk import * # Uses Dynamixel SDK library + + +def pos2pwm(pos: np.ndarray) -> np.ndarray: + """ + :param pos: numpy array of joint positions in range [-pi, pi] + :return: numpy array of pwm values in range [0, 4096] + """ + return ((pos / 3.14 + 1.0) * 2048).astype(np.int64) + + +def pwm2pos(pwm: np.ndarray) -> np.ndarray: + """ + :param pwm: numpy array of pwm values in range [0, 4096] + :return: numpy array of joint positions in range [-pi, pi] + """ + return (pwm / 2048 - 1) * 3.14 + + +def pwm2vel(pwm: np.ndarray) -> np.ndarray: + """ + :param pwm: numpy array of pwm/s joint velocities + :return: numpy array of rad/s joint velocities + """ + return pwm * 3.14 / 2048 + + +def vel2pwm(vel: np.ndarray) -> np.ndarray: + """ + :param vel: numpy array of rad/s joint velocities + :return: numpy array of pwm/s joint velocities + """ + return (vel * 2048 / 3.14).astype(np.int64) + + +class ReadAttribute(enum.Enum): + TEMPERATURE = 146 + VOLTAGE = 145 + VELOCITY = 128 + POSITION = 132 + CURRENT = 126 + PWM = 124 + HARDWARE_ERROR_STATUS = 70 + HOMING_OFFSET = 20 + BAUDRATE = 8 + + +class OperatingMode(enum.Enum): + VELOCITY = 1 + POSITION = 3 + CURRENT_CONTROLLED_POSITION = 5 + PWM = 16 + UNKNOWN = -1 + + +class Dynamixel: + ADDR_TORQUE_ENABLE = 64 + ADDR_GOAL_POSITION = 116 + ADDR_VELOCITY_LIMIT = 44 + ADDR_GOAL_PWM = 100 + OPERATING_MODE_ADDR = 11 + POSITION_I = 82 + POSITION_P = 84 + ADDR_ID = 7 + + @dataclass + class Config: + def instantiate(self): + return Dynamixel(self) + + baudrate: int = 57600 + protocol_version: float = 2.0 + device_name: str = "" # /dev/tty.usbserial-1120' + dynamixel_id: int = 1 + + def __init__(self, config: Config): + self.config = config + self.connect() + + def connect(self): + if self.config.device_name == "": + for port_name in os.listdir("/dev"): + if "ttyUSB" in port_name or "ttyACM" in port_name: + self.config.device_name = "/dev/" + port_name + print(f"using device {self.config.device_name}") + self.portHandler = PortHandler(self.config.device_name) + # self.portHandler.LA + self.packetHandler = PacketHandler(self.config.protocol_version) + if not self.portHandler.openPort(): + raise Exception(f"Failed to open port {self.config.device_name}") + + if not self.portHandler.setBaudRate(self.config.baudrate): + raise Exception(f"failed to set baudrate to {self.config.baudrate}") + + # self.operating_mode = OperatingMode.UNKNOWN + # self.torque_enabled = False + # self._disable_torque() + + self.operating_modes = [None for _ in range(32)] + self.torque_enabled = [None for _ in range(32)] + return True + + def disconnect(self): + self.portHandler.closePort() + + def set_goal_position(self, motor_id, goal_position): + # if self.operating_modes[motor_id] is not OperatingMode.POSITION: + # self._disable_torque(motor_id) + # self.set_operating_mode(motor_id, OperatingMode.POSITION) + + # if not self.torque_enabled[motor_id]: + # self._enable_torque(motor_id) + + # self._enable_torque(motor_id) + dxl_comm_result, dxl_error = self.packetHandler.write4ByteTxRx( + self.portHandler, motor_id, self.ADDR_GOAL_POSITION, goal_position + ) + # self._process_response(dxl_comm_result, dxl_error) + # print(f'set position of motor {motor_id} to {goal_position}') + + def set_pwm_value(self, motor_id: int, pwm_value, tries=3): + if self.operating_modes[motor_id] is not OperatingMode.PWM: + self._disable_torque(motor_id) + self.set_operating_mode(motor_id, OperatingMode.PWM) + + if not self.torque_enabled[motor_id]: + self._enable_torque(motor_id) + # print(f'enabling torque') + dxl_comm_result, dxl_error = self.packetHandler.write2ByteTxRx( + self.portHandler, motor_id, self.ADDR_GOAL_PWM, pwm_value + ) + # self._process_response(dxl_comm_result, dxl_error) + # print(f'set pwm of motor {motor_id} to {pwm_value}') + if dxl_comm_result != COMM_SUCCESS: + if tries <= 1: + raise ConnectionError(f"dxl_comm_result: {self.packetHandler.getTxRxResult(dxl_comm_result)}") + else: + print(f"dynamixel pwm setting failure trying again with {tries - 1} tries") + self.set_pwm_value(motor_id, pwm_value, tries=tries - 1) + elif dxl_error != 0: + print(f"dxl error {dxl_error}") + raise ConnectionError(f"dynamixel error: {self.packetHandler.getTxRxResult(dxl_error)}") + + def read_temperature(self, motor_id: int): + return self._read_value(motor_id, ReadAttribute.TEMPERATURE, 1) + + def read_velocity(self, motor_id: int): + pos = self._read_value(motor_id, ReadAttribute.VELOCITY, 4) + if pos > 2**31: + pos -= 2**32 + # print(f'read position {pos} for motor {motor_id}') + return pos + + def read_position(self, motor_id: int): + pos = self._read_value(motor_id, ReadAttribute.POSITION, 4) + if pos > 2**31: + pos -= 2**32 + # print(f'read position {pos} for motor {motor_id}') + return pos + + def read_position_degrees(self, motor_id: int) -> float: + return (self.read_position(motor_id) / 4096) * 360 + + def read_position_radians(self, motor_id: int) -> float: + return (self.read_position(motor_id) / 4096) * 2 * math.pi + + def read_current(self, motor_id: int): + current = self._read_value(motor_id, ReadAttribute.CURRENT, 2) + if current > 2**15: + current -= 2**16 + return current + + def read_present_pwm(self, motor_id: int): + return self._read_value(motor_id, ReadAttribute.PWM, 2) + + def read_hardware_error_status(self, motor_id: int): + return self._read_value(motor_id, ReadAttribute.HARDWARE_ERROR_STATUS, 1) + + def disconnect(self): + self.portHandler.closePort() + + def set_id(self, old_id, new_id, use_broadcast_id: bool = False): + """ + sets the id of the dynamixel servo + @param old_id: current id of the servo + @param new_id: new id + @param use_broadcast_id: set ids of all connected dynamixels if True. + If False, change only servo with self.config.id + @return: + """ + if use_broadcast_id: + current_id = 254 + else: + current_id = old_id + dxl_comm_result, dxl_error = self.packetHandler.write1ByteTxRx( + self.portHandler, current_id, self.ADDR_ID, new_id + ) + self._process_response(dxl_comm_result, dxl_error, old_id) + self.config.id = id + + def _enable_torque(self, motor_id): + dxl_comm_result, dxl_error = self.packetHandler.write1ByteTxRx( + self.portHandler, motor_id, self.ADDR_TORQUE_ENABLE, 1 + ) + self._process_response(dxl_comm_result, dxl_error, motor_id) + self.torque_enabled[motor_id] = True + + def _disable_torque(self, motor_id): + dxl_comm_result, dxl_error = self.packetHandler.write1ByteTxRx( + self.portHandler, motor_id, self.ADDR_TORQUE_ENABLE, 0 + ) + self._process_response(dxl_comm_result, dxl_error, motor_id) + self.torque_enabled[motor_id] = False + + def _process_response(self, dxl_comm_result: int, dxl_error: int, motor_id: int): + if dxl_comm_result != COMM_SUCCESS: + raise ConnectionError( + f"dxl_comm_result for motor {motor_id}: {self.packetHandler.getTxRxResult(dxl_comm_result)}" + ) + elif dxl_error != 0: + print(f"dxl error {dxl_error}") + raise ConnectionError( + f"dynamixel error for motor {motor_id}: {self.packetHandler.getTxRxResult(dxl_error)}" + ) + + def set_operating_mode(self, motor_id: int, operating_mode: OperatingMode): + dxl_comm_result, dxl_error = self.packetHandler.write2ByteTxRx( + self.portHandler, motor_id, self.OPERATING_MODE_ADDR, operating_mode.value + ) + self._process_response(dxl_comm_result, dxl_error, motor_id) + self.operating_modes[motor_id] = operating_mode + + def set_pwm_limit(self, motor_id: int, limit: int): + dxl_comm_result, dxl_error = self.packetHandler.write2ByteTxRx(self.portHandler, motor_id, 36, limit) + self._process_response(dxl_comm_result, dxl_error, motor_id) + + def set_velocity_limit(self, motor_id: int, velocity_limit): + dxl_comm_result, dxl_error = self.packetHandler.write4ByteTxRx( + self.portHandler, motor_id, self.ADDR_VELOCITY_LIMIT, velocity_limit + ) + self._process_response(dxl_comm_result, dxl_error, motor_id) + + def set_P(self, motor_id: int, P: int): + dxl_comm_result, dxl_error = self.packetHandler.write2ByteTxRx( + self.portHandler, motor_id, self.POSITION_P, P + ) + self._process_response(dxl_comm_result, dxl_error, motor_id) + + def set_I(self, motor_id: int, I: int): + dxl_comm_result, dxl_error = self.packetHandler.write2ByteTxRx( + self.portHandler, motor_id, self.POSITION_I, I + ) + self._process_response(dxl_comm_result, dxl_error, motor_id) + + def read_home_offset(self, motor_id: int): + self._disable_torque(motor_id) + # dxl_comm_result, dxl_error = self.packetHandler.write4ByteTxRx(self.portHandler, motor_id, + # ReadAttribute.HOMING_OFFSET.value, home_position) + home_offset = self._read_value(motor_id, ReadAttribute.HOMING_OFFSET, 4) + # self._process_response(dxl_comm_result, dxl_error) + self._enable_torque(motor_id) + return home_offset + + def set_home_offset(self, motor_id: int, home_position: int): + self._disable_torque(motor_id) + dxl_comm_result, dxl_error = self.packetHandler.write4ByteTxRx( + self.portHandler, motor_id, ReadAttribute.HOMING_OFFSET.value, home_position + ) + self._process_response(dxl_comm_result, dxl_error, motor_id) + self._enable_torque(motor_id) + + def set_baudrate(self, motor_id: int, baudrate): + # translate baudrate into dynamixel baudrate setting id + if baudrate == 57600: + baudrate_id = 1 + elif baudrate == 1_000_000: + baudrate_id = 3 + elif baudrate == 2_000_000: + baudrate_id = 4 + elif baudrate == 3_000_000: + baudrate_id = 5 + elif baudrate == 4_000_000: + baudrate_id = 6 + else: + raise Exception("baudrate not implemented") + + self._disable_torque(motor_id) + dxl_comm_result, dxl_error = self.packetHandler.write1ByteTxRx( + self.portHandler, motor_id, ReadAttribute.BAUDRATE.value, baudrate_id + ) + self._process_response(dxl_comm_result, dxl_error, motor_id) + + def _read_value(self, motor_id, attribute: ReadAttribute, num_bytes: int, tries=10): + try: + if num_bytes == 1: + value, dxl_comm_result, dxl_error = self.packetHandler.read1ByteTxRx( + self.portHandler, motor_id, attribute.value + ) + elif num_bytes == 2: + value, dxl_comm_result, dxl_error = self.packetHandler.read2ByteTxRx( + self.portHandler, motor_id, attribute.value + ) + elif num_bytes == 4: + value, dxl_comm_result, dxl_error = self.packetHandler.read4ByteTxRx( + self.portHandler, motor_id, attribute.value + ) + except Exception: + if tries == 0: + raise Exception + else: + return self._read_value(motor_id, attribute, num_bytes, tries=tries - 1) + if dxl_comm_result != COMM_SUCCESS: + if tries <= 1: + # print("%s" % self.packetHandler.getTxRxResult(dxl_comm_result)) + raise ConnectionError(f"dxl_comm_result {dxl_comm_result} for servo {motor_id} value {value}") + else: + print(f"dynamixel read failure for servo {motor_id} trying again with {tries - 1} tries") + time.sleep(0.02) + return self._read_value(motor_id, attribute, num_bytes, tries=tries - 1) + elif dxl_error != 0: # # print("%s" % self.packetHandler.getRxPacketError(dxl_error)) + # raise ConnectionError(f'dxl_error {dxl_error} binary ' + "{0:b}".format(37)) + if tries == 0 and dxl_error != 128: + raise Exception(f"Failed to read value from motor {motor_id} error is {dxl_error}") + else: + return self._read_value(motor_id, attribute, num_bytes, tries=tries - 1) + return value + + def set_home_position(self, motor_id: int): + print(f"setting home position for motor {motor_id}") + self.set_home_offset(motor_id, 0) + current_position = self.read_position(motor_id) + print(f"position before {current_position}") + self.set_home_offset(motor_id, -current_position) + # dynamixel.set_home_offset(motor_id, -4096) + # dynamixel.set_home_offset(motor_id, -4294964109) + current_position = self.read_position(motor_id) + # print(f'signed position {current_position - 2** 32}') + print(f"position after {current_position}") + + +if __name__ == "__main__": + dynamixel = Dynamixel.Config(baudrate=1_000_000, device_name="/dev/tty.usbmodem57380045631").instantiate() + motor_id = 1 + pos = dynamixel.read_position(motor_id) + for i in range(10): + s = time.monotonic() + pos = dynamixel.read_position(motor_id) + delta = time.monotonic() - s + print(f"read position took {delta}") + print(f"position {pos}") diff --git a/examples/real_robot_example/gym_real_world/gym_environment.py b/examples/real_robot_example/gym_real_world/gym_environment.py new file mode 100644 index 00000000..2920da78 --- /dev/null +++ b/examples/real_robot_example/gym_real_world/gym_environment.py @@ -0,0 +1,192 @@ +import time +from unittest.mock import MagicMock + +import cv2 +import gymnasium as gym +import numpy as np +from gymnasium import spaces + +from .dynamixel import pos2pwm, pwm2pos +from .robot import Robot + +FPS = 30 + +CAMERAS_SHAPES = { + "images.high": (480, 640, 3), + "images.low": (480, 640, 3), +} + +CAMERAS_PORTS = { + "images.high": "/dev/video6", + "images.low": "/dev/video0", +} + +LEADER_PORT = "/dev/ttyACM1" +FOLLOWER_PORT = "/dev/ttyACM0" + +MockRobot = MagicMock() +MockRobot.read_position = MagicMock() +MockRobot.read_position.return_value = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) + +MockCamera = MagicMock() +MockCamera.isOpened = MagicMock(return_value=True) +MockCamera.read = MagicMock(return_value=(True, np.zeros((480, 640, 3), dtype=np.uint8))) + + +def capture_image(cam, cam_width, cam_height): + # Capture a single frame + _, frame = cam.read() + image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + # # Define your crop coordinates (top left corner and bottom right corner) + # x1, y1 = 400, 0 # Example starting coordinates (top left of the crop rectangle) + # x2, y2 = 1600, 900 # Example ending coordinates (bottom right of the crop rectangle) + # # Crop the image + # image = image[y1:y2, x1:x2] + # Resize the image + image = cv2.resize(image, (cam_width, cam_height), interpolation=cv2.INTER_AREA) + + return image + + +class RealEnv(gym.Env): + metadata = {} + + def __init__( + self, + record: bool = False, + num_joints: int = 6, + cameras_shapes: dict = CAMERAS_SHAPES, + cameras_ports: dict = CAMERAS_PORTS, + follower_port: str = FOLLOWER_PORT, + leader_port: str = LEADER_PORT, + warmup_steps: int = 100, + trigger_torque=70, + fps: int = FPS, + fps_tolerance: float = 0.1, + mock: bool = False, + ): + self.num_joints = num_joints + self.cameras_shapes = cameras_shapes + self.cameras_ports = cameras_ports + self.warmup_steps = warmup_steps + assert len(self.cameras_shapes) == len(self.cameras_ports), "Number of cameras and shapes must match." + + self.follower_port = follower_port + self.leader_port = leader_port + self.record = record + self.fps = fps + self.fps_tolerance = fps_tolerance + + # Initialize the robot + self.follower = Robot(device_name=self.follower_port) if not mock else MockRobot + if self.record: + self.leader = Robot(device_name=self.leader_port) if not mock else MockRobot + self.leader.set_trigger_torque(trigger_torque) + + # Initialize the cameras - sorted by camera names + self.cameras = {} + for cn, p in sorted(self.cameras_ports.items()): + self.cameras[cn] = cv2.VideoCapture(p) if not mock else MockCamera + if not self.cameras[cn].isOpened(): + raise OSError( + f"Cannot open camera port {p} for {cn}." + f" Make sure the camera is connected and the port is correct." + f"Also check you are not spinning several instances of the same environment (eval.batch_size)" + ) + + # Specify gym action and observation spaces + observation_space = {} + + if self.num_joints > 0: + observation_space["agent_pos"] = spaces.Box( + low=-1000.0, + high=1000.0, + shape=(num_joints,), + dtype=np.float64, + ) + if self.record: + observation_space["leader_pos"] = spaces.Box( + low=-1000.0, + high=1000.0, + shape=(num_joints,), + dtype=np.float64, + ) + + if self.cameras_shapes: + for cn, hwc_shape in self.cameras_shapes.items(): + # Assumes images are unsigned int8 in [0,255] + observation_space[cn] = spaces.Box( + low=0, + high=255, + # height x width x channels (e.g. 480 x 640 x 3) + shape=hwc_shape, + dtype=np.uint8, + ) + + self.observation_space = spaces.Dict(observation_space) + self.action_space = spaces.Box(low=-1, high=1, shape=(num_joints,), dtype=np.float32) + + self._observation = {} + self._terminated = False + self.timestamps = [] + + def _get_obs(self): + qpos = self.follower.read_position() + self._observation["agent_pos"] = pwm2pos(qpos) + for cn, c in self.cameras.items(): + self._observation[cn] = capture_image(c, self.cameras_shapes[cn][1], self.cameras_shapes[cn][0]) + + if self.record: + action = self.leader.read_position() + self._observation["leader_pos"] = pwm2pos(action) + + def reset(self, seed: int | None = None): + # Reset the robot and sync the leader and follower if we are recording + for _ in range(self.warmup_steps): + self._get_obs() + if self.record: + self.follower.set_goal_pos(pos2pwm(self._observation["leader_pos"])) + self._terminated = False + info = {} + self.timestamps = [] + return self._observation, info + + def step(self, action: np.ndarray = None): + if self.timestamps: + # wait the right amount of time to stay at the desired fps + time.sleep(max(0, 1 / self.fps - (time.time() - self.timestamps[-1]))) + + self.timestamps.append(time.time()) + + # Get the observation + self._get_obs() + if self.record: + # Teleoperate the leader + self.follower.set_goal_pos(pos2pwm(self._observation["leader_pos"])) + else: + # Apply the action to the follower + self.follower.set_goal_pos(pos2pwm(action)) + + reward = 0 + terminated = truncated = self._terminated + info = {"timestamp": self.timestamps[-1] - self.timestamps[0], "fps_error": False} + + # Check if we are able to keep up with the desired fps + if len(self.timestamps) > 1 and (self.timestamps[-1] - self.timestamps[-2]) > 1 / ( + self.fps - self.fps_tolerance + ): + print( + f"Error: recording fps {1 / (self.timestamps[-1] - self.timestamps[-2]):.5f} is lower" + f" than min admited fps {(self.fps - self.fps_tolerance):.5f}" + f" at frame {len(self.timestamps)}" + ) + info["fps_error"] = True + + return self._observation, reward, terminated, truncated, info + + def render(self): ... + + def close(self): + self.follower._disable_torque() + if self.record: + self.leader._disable_torque() diff --git a/examples/real_robot_example/gym_real_world/robot.py b/examples/real_robot_example/gym_real_world/robot.py new file mode 100644 index 00000000..111c99de --- /dev/null +++ b/examples/real_robot_example/gym_real_world/robot.py @@ -0,0 +1,173 @@ +# ruff: noqa +"""From Alexander Koch low_cost_robot codebase at https://github.com/AlexanderKoch-Koch/low_cost_robot +Class to control the robot using dynamixel servos. +""" + +from enum import Enum, auto +from typing import Union + +import numpy as np +from dynamixel_sdk import DXL_HIBYTE, DXL_HIWORD, DXL_LOBYTE, DXL_LOWORD, GroupSyncRead, GroupSyncWrite + +from .dynamixel import Dynamixel, OperatingMode, ReadAttribute + + +class MotorControlType(Enum): + PWM = auto() + POSITION_CONTROL = auto() + DISABLED = auto() + UNKNOWN = auto() + + +class Robot: + def __init__(self, device_name: str, baudrate=1_000_000, servo_ids=[1, 2, 3, 4, 5, 6]) -> None: + self.servo_ids = servo_ids + self.dynamixel = Dynamixel.Config(baudrate=baudrate, device_name=device_name).instantiate() + self._init_motors() + + def _init_motors(self): + self.position_reader = GroupSyncRead( + self.dynamixel.portHandler, self.dynamixel.packetHandler, ReadAttribute.POSITION.value, 4 + ) + for id in self.servo_ids: + self.position_reader.addParam(id) + + self.velocity_reader = GroupSyncRead( + self.dynamixel.portHandler, self.dynamixel.packetHandler, ReadAttribute.VELOCITY.value, 4 + ) + for id in self.servo_ids: + self.velocity_reader.addParam(id) + + self.pos_writer = GroupSyncWrite( + self.dynamixel.portHandler, self.dynamixel.packetHandler, self.dynamixel.ADDR_GOAL_POSITION, 4 + ) + for id in self.servo_ids: + self.pos_writer.addParam(id, [2048]) + + self.pwm_writer = GroupSyncWrite( + self.dynamixel.portHandler, self.dynamixel.packetHandler, self.dynamixel.ADDR_GOAL_PWM, 2 + ) + for id in self.servo_ids: + self.pwm_writer.addParam(id, [2048]) + # self._disable_torque() + self.motor_control_state = MotorControlType.DISABLED + + def read_position(self, tries=2): + """ + Reads the joint positions of the robot. 2048 is the center position. 0 and 4096 are 180 degrees in each direction. + :param tries: maximum number of tries to read the position + :return: list of joint positions in range [0, 4096] + """ + result = self.position_reader.txRxPacket() + if result != 0: + if tries > 0: + return self.read_position(tries=tries - 1) + else: + print("failed to read position!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") + positions = [] + for id in self.servo_ids: + position = self.position_reader.getData(id, ReadAttribute.POSITION.value, 4) + if position > 2**31: + position -= 2**32 + positions.append(position) + return np.array(positions) + + def read_velocity(self): + """ + Reads the joint velocities of the robot. + :return: list of joint velocities, + """ + self.velocity_reader.txRxPacket() + velocties = [] + for id in self.servo_ids: + velocity = self.velocity_reader.getData(id, ReadAttribute.VELOCITY.value, 4) + if velocity > 2**31: + velocity -= 2**32 + velocties.append(velocity) + return np.array(velocties) + + def set_goal_pos(self, action): + """ + :param action: list or numpy array of target joint positions in range [0, 4096[ + """ + if self.motor_control_state is not MotorControlType.POSITION_CONTROL: + self._set_position_control() + for i, motor_id in enumerate(self.servo_ids): + data_write = [ + DXL_LOBYTE(DXL_LOWORD(action[i])), + DXL_HIBYTE(DXL_LOWORD(action[i])), + DXL_LOBYTE(DXL_HIWORD(action[i])), + DXL_HIBYTE(DXL_HIWORD(action[i])), + ] + self.pos_writer.changeParam(motor_id, data_write) + + self.pos_writer.txPacket() + + def set_pwm(self, action): + """ + Sets the pwm values for the servos. + :param action: list or numpy array of pwm values in range [0, 885] + """ + if self.motor_control_state is not MotorControlType.PWM: + self._set_pwm_control() + for i, motor_id in enumerate(self.servo_ids): + data_write = [ + DXL_LOBYTE(DXL_LOWORD(action[i])), + DXL_HIBYTE(DXL_LOWORD(action[i])), + ] + self.pwm_writer.changeParam(motor_id, data_write) + + self.pwm_writer.txPacket() + + def set_trigger_torque(self, torque: int): + """ + Sets a constant torque torque for the last servo in the chain. This is useful for the trigger of the leader arm + """ + self.dynamixel._enable_torque(self.servo_ids[-1]) + self.dynamixel.set_pwm_value(self.servo_ids[-1], torque) + + def limit_pwm(self, limit: Union[int, list, np.ndarray]): + """ + Limits the pwm values for the servos in for position control + @param limit: 0 ~ 885 + @return: + """ + if isinstance(limit, int): + limits = [ + limit, + ] * 5 + else: + limits = limit + self._disable_torque() + for motor_id, limit in zip(self.servo_ids, limits, strict=False): + self.dynamixel.set_pwm_limit(motor_id, limit) + self._enable_torque() + + def _disable_torque(self): + print(f"disabling torque for servos {self.servo_ids}") + for motor_id in self.servo_ids: + self.dynamixel._disable_torque(motor_id) + + def _enable_torque(self): + print(f"enabling torque for servos {self.servo_ids}") + for motor_id in self.servo_ids: + self.dynamixel._enable_torque(motor_id) + + def _set_pwm_control(self): + self._disable_torque() + for motor_id in self.servo_ids: + self.dynamixel.set_operating_mode(motor_id, OperatingMode.PWM) + self._enable_torque() + self.motor_control_state = MotorControlType.PWM + + def _set_position_control(self): + self._disable_torque() + for motor_id in self.servo_ids: + # TODO(rcadene): redesign + if motor_id == 9: + self.dynamixel.set_operating_mode(9, OperatingMode.CURRENT_CONTROLLED_POSITION) + else: + self.dynamixel.set_operating_mode(motor_id, OperatingMode.POSITION) + + self._enable_torque() + self.motor_control_state = MotorControlType.POSITION_CONTROL diff --git a/examples/real_robot_example/record_training_data.py b/examples/real_robot_example/record_training_data.py new file mode 100644 index 00000000..ed993fc7 --- /dev/null +++ b/examples/real_robot_example/record_training_data.py @@ -0,0 +1,237 @@ +"""This script demonstrates how to record a LeRobot dataset of training data +using a very simple gym environment (see in examples/real_robot_example/gym_real_world/gym_environment.py). + +""" + +import argparse +import copy +import os +from pathlib import Path + +import gym_real_world # noqa: F401 +import gymnasium as gym +import numpy as np +import torch +from datasets import Dataset, Features, Sequence, Value +from omegaconf import OmegaConf +from tqdm import tqdm + +from lerobot.common.datasets.compute_stats import compute_stats +from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, DATA_DIR, LeRobotDataset +from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently +from lerobot.common.datasets.utils import ( + hf_transform_to_torch, +) +from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames +from lerobot.scripts.push_dataset_to_hub import push_meta_data_to_hub, push_videos_to_hub, save_meta_data + +# parse the repo_id name via command line +parser = argparse.ArgumentParser() +parser.add_argument("--repo-id", type=str, default="thomwolf/blue_red_sort") +parser.add_argument("--num-episodes", type=int, default=2) +parser.add_argument("--num-frames", type=int, default=400) +parser.add_argument("--num-workers", type=int, default=16) +parser.add_argument("--keep-last", action="store_true") +parser.add_argument("--data_dir", type=str, default=None) +parser.add_argument("--push-to-hub", action="store_true") +parser.add_argument("--fps", type=int, default=30, help="Frames per second of the recording.") +parser.add_argument( + "--fps_tolerance", + type=float, + default=0.5, + help="Tolerance in fps for the recording before dropping episodes.", +) +parser.add_argument( + "--revision", type=str, default=CODEBASE_VERSION, help="Codebase version used to generate the dataset." +) +parser.add_argument("--gym-config", type=str, default=None, help="Path to the gym config file.") +parser.add_argument("--mock_robot", action="store_true") +args = parser.parse_args() + +repo_id = args.repo_id +num_episodes = args.num_episodes +num_frames = args.num_frames +revision = args.revision +fps = args.fps +fps_tolerance = args.fps_tolerance + +out_data = DATA_DIR / repo_id if args.data_dir is None else Path(args.data_dir) + +# During data collection, frames are stored as png images in `images_dir` +images_dir = out_data / "images" +# After data collection, png images of each episode are encoded into a mp4 file stored in `videos_dir` +videos_dir = out_data / "videos" +meta_data_dir = out_data / "meta_data" + +gym_config = None +if args.config is not None: + gym_config = OmegaConf.load(args.config) + +# Create image and video directories +if not os.path.exists(images_dir): + os.makedirs(images_dir, exist_ok=True) +if not os.path.exists(videos_dir): + os.makedirs(videos_dir, exist_ok=True) + +if __name__ == "__main__": + # Create the gym environment - check the kwargs in gym_real_world/gym_environment.py + gym_handle = "gym_real_world/RealEnv-v0" + gym_kwargs = {} + if gym_config is not None: + gym_kwargs = OmegaConf.to_container(gym_config.gym_kwargs) + env = gym.make( + gym_handle, disable_env_checker=True, record=True, fps=fps, fps_tolerance=fps_tolerance, mock=True + ) + + ep_dicts = [] + episode_data_index = {"from": [], "to": []} + ep_fps = [] + id_from = 0 + id_to = 0 + os.system('spd-say "gym environment created"') + + ep_idx = 0 + while ep_idx < num_episodes: + # bring the follower to the leader and start camera + env.reset() + + os.system(f'spd-say "go {ep_idx}"') + # init buffers + obs_replay = {k: [] for k in env.observation_space} + + drop_episode = False + timestamps = [] + for _ in tqdm(range(num_frames)): + # Apply the next action + observation, _, _, _, info = env.step(action=None) + # images_stacked = np.hstack(list(observation['pixels'].values())) + # images_stacked = cv2.cvtColor(images_stacked, cv2.COLOR_RGB2BGR) + # cv2.imshow('frame', images_stacked) + + if info["fps_error"]: + os.system(f'spd-say "Error fps too low, dropping episode {ep_idx}"') + drop_episode = True + break + + # store data + for key in observation: + obs_replay[key].append(copy.deepcopy(observation[key])) + timestamps.append(info["timestamp"]) + + # if cv2.waitKey(1) & 0xFF == ord('q'): + # break + + os.system('spd-say "stop"') + + if not drop_episode: + os.system(f'spd-say "saving episode {ep_idx}"') + ep_dict = {} + # store images in png and create the video + for img_key in env.cameras: + save_images_concurrently( + obs_replay[img_key], + images_dir / f"{img_key}_episode_{ep_idx:06d}", + args.num_workers, + ) + fname = f"{img_key}_episode_{ep_idx:06d}.mp4" + # store the reference to the video frame + ep_dict[f"observation.{img_key}"] = [ + {"path": f"videos/{fname}", "timestamp": tstp} for tstp in timestamps + ] + + state = torch.tensor(np.array(obs_replay["agent_pos"])) + action = torch.tensor(np.array(obs_replay["leader_pos"])) + next_done = torch.zeros(num_frames, dtype=torch.bool) + next_done[-1] = True + + ep_dict["observation.state"] = state + ep_dict["action"] = action + ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64) + ep_dict["frame_index"] = torch.arange(0, num_frames, 1) + ep_dict["timestamp"] = torch.tensor(timestamps) + ep_dict["next.done"] = next_done + ep_fps.append(num_frames / timestamps[-1]) + ep_dicts.append(ep_dict) + print(f"Episode {ep_idx} done, fps: {ep_fps[-1]:.2f}") + + episode_data_index["from"].append(id_from) + episode_data_index["to"].append( + id_from + num_frames if args.keep_last else id_from + num_frames - 1 + ) + + id_to = id_from + num_frames if args.keep_last else id_from + num_frames - 1 + id_from = id_to + + ep_idx += 1 + + env.close() + + os.system('spd-say "encode video frames"') + for ep_idx in range(num_episodes): + for img_key in env.cameras: + # If necessary, we may want to encode the video + # with variable frame rate: https://superuser.com/questions/1661901/encoding-video-from-vfr-still-images + encode_video_frames( + images_dir / f"{img_key}_episode_{ep_idx:06d}", + videos_dir / f"{img_key}_episode_{ep_idx:06d}.mp4", + ep_fps[ep_idx], + ) + + os.system('spd-say "concatenate episodes"') + data_dict = concatenate_episodes( + ep_dicts, drop_episodes_last_frame=not args.keep_last + ) # Since our fps varies we are sometimes off tolerance for the last frame + + features = {} + + keys = [key for key in data_dict if "observation.images." in key] + for key in keys: + features[key] = VideoFrame() + + features["observation.state"] = Sequence( + length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None) + ) + features["action"] = Sequence( + length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None) + ) + features["episode_index"] = Value(dtype="int64", id=None) + features["frame_index"] = Value(dtype="int64", id=None) + features["timestamp"] = Value(dtype="float32", id=None) + features["next.done"] = Value(dtype="bool", id=None) + features["index"] = Value(dtype="int64", id=None) + + hf_dataset = Dataset.from_dict(data_dict, features=Features(features)) + hf_dataset.set_transform(hf_transform_to_torch) + + info = { + "fps": sum(ep_fps) / len(ep_fps), # to have a good tolerance in data processing for the slowest video + "video": 1, + } + + os.system('spd-say "from preloaded"') + lerobot_dataset = LeRobotDataset.from_preloaded( + repo_id=repo_id, + version=revision, + hf_dataset=hf_dataset, + episode_data_index=episode_data_index, + info=info, + videos_dir=videos_dir, + ) + os.system('spd-say "compute stats"') + stats = compute_stats(lerobot_dataset) + + os.system('spd-say "save to disk"') + hf_dataset = hf_dataset.with_format(None) # to remove transforms that cant be saved + hf_dataset.save_to_disk(str(out_data / "train")) + + save_meta_data(info, stats, episode_data_index, meta_data_dir) + + if args.push_to_hub: + hf_dataset.push_to_hub(repo_id, token=True, revision="main") + hf_dataset.push_to_hub(repo_id, token=True, revision=revision) + + push_meta_data_to_hub(repo_id, meta_data_dir, revision="main") + push_meta_data_to_hub(repo_id, meta_data_dir, revision=revision) + + push_videos_to_hub(repo_id, videos_dir, revision="main") + push_videos_to_hub(repo_id, videos_dir, revision=revision) diff --git a/examples/real_robot_example/run_policy.py b/examples/real_robot_example/run_policy.py new file mode 100644 index 00000000..a47fb914 --- /dev/null +++ b/examples/real_robot_example/run_policy.py @@ -0,0 +1,60 @@ +import argparse +import logging +from pathlib import Path + +import gym_real_world # noqa: F401 +import gymnasium as gym # noqa: F401 +from huggingface_hub import snapshot_download +from huggingface_hub.utils._errors import RepositoryNotFoundError +from huggingface_hub.utils._validators import HFValidationError + +from lerobot.common.utils.utils import init_logging +from lerobot.scripts.eval import eval + +if __name__ == "__main__": + init_logging() + + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "-p", + "--pretrained-policy-name-or-path", + help=( + "Either the repo ID of a model hosted on the Hub or a path to a directory containing weights " + "saved using `Policy.save_pretrained`. If not provided, the policy is initialized from scratch " + "(useful for debugging). This argument is mutually exclusive with `--config`." + ), + ) + parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.") + parser.add_argument( + "overrides", + nargs="*", + help="Any key=value arguments to override config values (use dots for.nested=overrides)", + ) + args = parser.parse_args() + + try: + pretrained_policy_path = Path( + snapshot_download(args.pretrained_policy_name_or_path, revision=args.revision) + ) + except (HFValidationError, RepositoryNotFoundError) as e: + if isinstance(e, HFValidationError): + error_message = ( + "The provided pretrained_policy_name_or_path is not a valid Hugging Face Hub repo ID." + ) + else: + error_message = ( + "The provided pretrained_policy_name_or_path was not found on the Hugging Face Hub." + ) + + logging.warning(f"{error_message} Treating it as a local directory.") + pretrained_policy_path = Path(args.pretrained_policy_name_or_path) + if not pretrained_policy_path.is_dir() or not pretrained_policy_path.exists(): + raise ValueError( + "The provided pretrained_policy_name_or_path is not a valid/existing Hugging Face Hub " + "repo ID, nor is it an existing local directory." + ) + + eval(pretrained_policy_path=pretrained_policy_path, config_overrides=args.overrides) diff --git a/examples/real_robot_example/train_config/policy/act_real_world.yaml b/examples/real_robot_example/train_config/policy/act_real_world.yaml new file mode 100644 index 00000000..107b7658 --- /dev/null +++ b/examples/real_robot_example/train_config/policy/act_real_world.yaml @@ -0,0 +1,103 @@ +# @package _global_ + +# Use `act_real.yaml` to train on real-world Aloha/Aloha2 datasets. +# Compared to `act.yaml`, it contains 4 cameras (i.e. right_wrist, left_wrist, images, +# low) instead of 1 camera (i.e. top). Also, `training.eval_freq` is set to -1. This config is used +# to evaluate checkpoints at a certain frequency of training steps. When it is set to -1, it deactivates evaluation. +# This is because real-world evaluation is done through [dora-lerobot](https://github.com/dora-rs/dora-lerobot). +# Look at its README for more information on how to evaluate a checkpoint in the real-world. +# +# Example of usage for training: +# ```bash +# python lerobot/scripts/train.py \ +# policy=act_real \ +# env=aloha_real +# ``` + +seed: 1000 +dataset_repo_id: ??? + +override_dataset_stats: + observation.images.high: + # stats from imagenet, since we use a pretrained vision model + mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1) + std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1) + observation.images.low: + # stats from imagenet, since we use a pretrained vision model + mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1) + std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1) + +training: + offline_steps: 1000 + online_steps: 0 + eval_freq: -1 + save_freq: 1000 + log_freq: 100 + save_checkpoint: true + + batch_size: 8 + lr: 1e-5 + lr_backbone: 1e-5 + weight_decay: 1e-4 + grad_clip_norm: 10 + online_steps_between_rollouts: 1 + + delta_timestamps: + action: "[i / ${fps} for i in range(1, ${policy.chunk_size} + 1)]" + +eval: + n_episodes: 1 + batch_size: 1 + +# See `configuration_act.py` for more details. +policy: + name: act + + # Input / output structure. + n_obs_steps: 1 + chunk_size: 100 # chunk_size + n_action_steps: 100 + + input_shapes: + # TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env? + observation.images.high: [3, 480, 640] + observation.images.low: [3, 480, 640] + observation.state: ["${env.state_dim}"] + output_shapes: + action: ["${env.action_dim}"] + + # Normalization / Unnormalization + input_normalization_modes: + observation.images.high: mean_std + observation.images.low: mean_std + observation.state: mean_std + output_normalization_modes: + action: mean_std + + # Architecture. + # Vision backbone. + vision_backbone: resnet18 + pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1 + replace_final_stride_with_dilation: false + # Transformer layers. + pre_norm: false + dim_model: 512 + n_heads: 8 + dim_feedforward: 3200 + feedforward_activation: relu + n_encoder_layers: 4 + # Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code + # that means only the first layer is used. Here we match the original implementation by setting this to 1. + # See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521. + n_decoder_layers: 1 + # VAE. + use_vae: true + latent_dim: 32 + n_vae_encoder_layers: 4 + + # Inference. + temporal_ensemble_momentum: null + + # Training and loss computation. + dropout: 0.1 + kl_weight: 10.0 diff --git a/examples/real_robot_example/train_config/policy/act_real_world_debug.yaml b/examples/real_robot_example/train_config/policy/act_real_world_debug.yaml new file mode 100644 index 00000000..a03eacb4 --- /dev/null +++ b/examples/real_robot_example/train_config/policy/act_real_world_debug.yaml @@ -0,0 +1,103 @@ +# @package _global_ + +# Use `act_real.yaml` to train on real-world Aloha/Aloha2 datasets. +# Compared to `act.yaml`, it contains 4 cameras (i.e. right_wrist, left_wrist, images, +# front) instead of 1 camera (i.e. top). Also, `training.eval_freq` is set to -1. This config is used +# to evaluate checkpoints at a certain frequency of training steps. When it is set to -1, it deactivates evaluation. +# This is because real-world evaluation is done through [dora-lerobot](https://github.com/dora-rs/dora-lerobot). +# Look at its README for more information on how to evaluate a checkpoint in the real-world. +# +# Example of usage for training: +# ```bash +# python lerobot/scripts/train.py \ +# policy=act_real \ +# env=aloha_real +# ``` + +seed: 1000 +dataset_repo_id: ??? + +override_dataset_stats: + observation.images.top: + # stats from imagenet, since we use a pretrained vision model + mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1) + std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1) + observation.images.front: + # stats from imagenet, since we use a pretrained vision model + mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1) + std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1) + +training: + offline_steps: 1000 + online_steps: 0 + eval_freq: -1 + save_freq: 1000 + log_freq: 100 + save_checkpoint: true + + batch_size: 8 + lr: 1e-5 + lr_backbone: 1e-5 + weight_decay: 1e-4 + grad_clip_norm: 10 + online_steps_between_rollouts: 1 + + delta_timestamps: + action: "[i / ${fps} for i in range(1, ${policy.chunk_size} + 1)]" + +eval: + n_episodes: 1 + batch_size: 1 + +# See `configuration_act.py` for more details. +policy: + name: act + + # Input / output structure. + n_obs_steps: 1 + chunk_size: 100 # chunk_size + n_action_steps: 100 + + input_shapes: + # TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env? + observation.images.top: [3, 480, 640] + observation.images.front: [3, 480, 640] + observation.state: ["${env.state_dim}"] + output_shapes: + action: ["${env.action_dim}"] + + # Normalization / Unnormalization + input_normalization_modes: + observation.images.top: mean_std + observation.images.front: mean_std + observation.state: mean_std + output_normalization_modes: + action: mean_std + + # Architecture. + # Vision backbone. + vision_backbone: resnet18 + pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1 + replace_final_stride_with_dilation: false + # Transformer layers. + pre_norm: false + dim_model: 512 + n_heads: 8 + dim_feedforward: 3200 + feedforward_activation: relu + n_encoder_layers: 4 + # Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code + # that means only the first layer is used. Here we match the original implementation by setting this to 1. + # See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521. + n_decoder_layers: 1 + # VAE. + use_vae: true + latent_dim: 32 + n_vae_encoder_layers: 4 + + # Inference. + temporal_ensemble_momentum: null + + # Training and loss computation. + dropout: 0.1 + kl_weight: 10.0 diff --git a/lerobot/common/datasets/factory.py b/lerobot/common/datasets/factory.py index 4732f577..bb10022c 100644 --- a/lerobot/common/datasets/factory.py +++ b/lerobot/common/datasets/factory.py @@ -56,7 +56,7 @@ def make_dataset(cfg, split: str = "train") -> LeRobotDataset | MultiLeRobotData ) # A soft check to warn if the environment matches the dataset. Don't check if we are using a real world env (dora). - if cfg.env.name != "dora": + if not cfg.env.real_world: if isinstance(cfg.dataset_repo_id, str): dataset_repo_ids = [cfg.dataset_repo_id] # single dataset else: diff --git a/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py b/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py index 1c2f066e..13cb8500 100644 --- a/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py +++ b/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py @@ -43,9 +43,6 @@ def get_cameras(hdf5_data): def check_format(raw_dir) -> bool: - # only frames from simulation are uncompressed - compressed_images = "sim" not in raw_dir.name - hdf5_paths = list(raw_dir.glob("episode_*.hdf5")) assert len(hdf5_paths) != 0 for hdf5_path in hdf5_paths: @@ -62,17 +59,15 @@ def check_format(raw_dir) -> bool: for camera in get_cameras(data): assert num_frames == data[f"/observations/images/{camera}"].shape[0] - if compressed_images: - assert data[f"/observations/images/{camera}"].ndim == 2 - else: - assert data[f"/observations/images/{camera}"].ndim == 4 + # ndim 2 when image are compressed and 4 when uncompressed + assert data[f"/observations/images/{camera}"].ndim in [2, 4] + if data[f"/observations/images/{camera}"].ndim == 4: b, h, w, c = data[f"/observations/images/{camera}"].shape assert c < h and c < w, f"Expect (h,w,c) image format but ({h=},{w=},{c=}) provided." def load_from_raw(raw_dir, out_dir, fps, video, debug): # only frames from simulation are uncompressed - compressed_images = "sim" not in raw_dir.name hdf5_files = list(raw_dir.glob("*.hdf5")) ep_dicts = [] @@ -99,7 +94,7 @@ def load_from_raw(raw_dir, out_dir, fps, video, debug): for camera in get_cameras(ep): img_key = f"observation.images.{camera}" - if compressed_images: + if ep[f"/observations/images/{camera}"].ndim == 2: import cv2 # load one compressed image after the other in RAM and uncompress diff --git a/lerobot/common/datasets/push_dataset_to_hub/utils.py b/lerobot/common/datasets/push_dataset_to_hub/utils.py index 4feb1dcf..adfe42a5 100644 --- a/lerobot/common/datasets/push_dataset_to_hub/utils.py +++ b/lerobot/common/datasets/push_dataset_to_hub/utils.py @@ -21,19 +21,24 @@ import PIL import torch -def concatenate_episodes(ep_dicts): +def concatenate_episodes(ep_dicts, drop_episodes_last_frame=False): data_dict = {} keys = ep_dicts[0].keys() for key in keys: if torch.is_tensor(ep_dicts[0][key][0]): - data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts]) + if drop_episodes_last_frame: + data_dict[key] = torch.cat([ep_dict[key][:-1] for ep_dict in ep_dicts]) + else: + data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts]) else: if key not in data_dict: data_dict[key] = [] for ep_dict in ep_dicts: for x in ep_dict[key]: data_dict[key].append(x) + if drop_episodes_last_frame: + data_dict[key].pop() total_frames = data_dict["frame_index"].shape[0] data_dict["index"] = torch.arange(0, total_frames, 1) diff --git a/lerobot/common/envs/utils.py b/lerobot/common/envs/utils.py index 8fce0369..507f86a2 100644 --- a/lerobot/common/envs/utils.py +++ b/lerobot/common/envs/utils.py @@ -29,10 +29,12 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten # map to expected inputs for the policy return_observations = {} - if isinstance(observations["pixels"], dict): + if "pixels" in observations and isinstance(observations["pixels"], dict): imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()} - else: + elif "pixels" in observations and isinstance(observations["pixels"], np.ndarray): imgs = {"observation.image": observations["pixels"]} + else: + imgs = {f"observation.{key}": img for key, img in observations.items() if "images" in key} for imgkey, img in imgs.items(): img = torch.from_numpy(img) diff --git a/lerobot/common/policies/act/configuration_act.py b/lerobot/common/policies/act/configuration_act.py index a4b0b7d2..49e8c70e 100644 --- a/lerobot/common/policies/act/configuration_act.py +++ b/lerobot/common/policies/act/configuration_act.py @@ -129,7 +129,9 @@ class ACTConfig: # Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code # that means only the first layer is used. Here we match the original implementation by setting this to 1. # See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521. + # As a consequence we also remove the final, unused layer normalization, by default n_decoder_layers: int = 1 + decoder_norm: bool = False # VAE. use_vae: bool = True latent_dim: int = 32 diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py index bef59bec..bbbb512d 100644 --- a/lerobot/common/policies/act/modeling_act.py +++ b/lerobot/common/policies/act/modeling_act.py @@ -315,8 +315,14 @@ class ACT(nn.Module): pos_embed = self.vae_encoder_pos_enc.clone().detach() # (1, S+2, D) # Forward pass through VAE encoder to get the latent PDF parameters. + cls_joint_is_pad = torch.full((batch_size, 2), False).to( + batch["observation.state"].device + ) # False: not a padding + key_padding_mask = torch.cat([cls_joint_is_pad, batch["action_is_pad"]], axis=1) # (bs, seq+1) cls_token_out = self.vae_encoder( - vae_encoder_input.permute(1, 0, 2), pos_embed=pos_embed.permute(1, 0, 2) + vae_encoder_input.permute(1, 0, 2), + pos_embed=pos_embed.permute(1, 0, 2), + key_padding_mask=key_padding_mask, )[0] # select the class token, with shape (B, D) latent_pdf_params = self.vae_encoder_latent_output_proj(cls_token_out) mu = latent_pdf_params[:, : self.config.latent_dim] @@ -402,9 +408,11 @@ class ACTEncoder(nn.Module): self.layers = nn.ModuleList([ACTEncoderLayer(config) for _ in range(config.n_encoder_layers)]) self.norm = nn.LayerNorm(config.dim_model) if config.pre_norm else nn.Identity() - def forward(self, x: Tensor, pos_embed: Tensor | None = None) -> Tensor: + def forward( + self, x: Tensor, pos_embed: Tensor | None = None, key_padding_mask: Tensor | None = None + ) -> Tensor: for layer in self.layers: - x = layer(x, pos_embed=pos_embed) + x = layer(x, pos_embed=pos_embed, key_padding_mask=key_padding_mask) x = self.norm(x) return x @@ -427,12 +435,14 @@ class ACTEncoderLayer(nn.Module): self.activation = get_activation_fn(config.feedforward_activation) self.pre_norm = config.pre_norm - def forward(self, x, pos_embed: Tensor | None = None) -> Tensor: + def forward(self, x, pos_embed: Tensor | None = None, key_padding_mask: Tensor | None = None) -> Tensor: skip = x if self.pre_norm: x = self.norm1(x) q = k = x if pos_embed is None else x + pos_embed - x = self.self_attn(q, k, value=x)[0] # select just the output, not the attention weights + x = self.self_attn(q, k, value=x, key_padding_mask=key_padding_mask)[ + 0 + ] # select just the output, not the attention weights x = skip + self.dropout1(x) if self.pre_norm: skip = x @@ -452,7 +462,10 @@ class ACTDecoder(nn.Module): """Convenience module for running multiple decoder layers followed by normalization.""" super().__init__() self.layers = nn.ModuleList([ACTDecoderLayer(config) for _ in range(config.n_decoder_layers)]) - self.norm = nn.LayerNorm(config.dim_model) + if config.decoder_norm: + self.norm = nn.LayerNorm(config.dim_model) + else: + self.norm = nn.Identity() def forward( self, @@ -465,8 +478,7 @@ class ACTDecoder(nn.Module): x = layer( x, encoder_out, decoder_pos_embed=decoder_pos_embed, encoder_pos_embed=encoder_pos_embed ) - if self.norm is not None: - x = self.norm(x) + x = self.norm(x) return x diff --git a/lerobot/common/robot_devices/cameras/intelrealsense.py b/lerobot/common/robot_devices/cameras/intelrealsense.py new file mode 100644 index 00000000..f7691154 --- /dev/null +++ b/lerobot/common/robot_devices/cameras/intelrealsense.py @@ -0,0 +1,325 @@ + + +import argparse +from dataclasses import dataclass, replace +from pathlib import Path +from threading import Thread +import time +import traceback +import cv2 +import numpy as np +import pyrealsense2 as rs + +from lerobot.common.robot_devices.cameras.opencv import find_camera_indices +from lerobot.common.robot_devices.cameras.utils import save_color_image, save_depth_image + +SERIAL_NUMBER_INDEX = 1 + +def find_camera_indices(raise_when_empty=True): + camera_ids = [] + for device in rs.context().query_devices(): + serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX))) + camera_ids.append(serial_number) + + if raise_when_empty and len(camera_ids) == 0: + raise OSError("Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware.") + + return camera_ids + +def benchmark_cameras(cameras, out_dir=None, save_images=False): + if save_images: + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + while True: + now = time.time() + for camera in cameras: + if camera.use_depth: + color_image, depth_image = camera.capture_image("bgr" if save_images else "rgb") + else: + color_image = camera.capture_image("bgr" if save_images else "rgb") + + if save_images: + image_path = out_dir / f"camera_{camera.camera_index:02}.png" + print(f"Write to {image_path}") + save_color_image(color_image, image_path, write_shape=True) + + if camera.use_depth: + # Apply colormap on depth image (image must be converted to 8-bit per pixel first) + depth_image_path = out_dir / f"camera_{camera.camera_index:02}_depth.png" + print(f"Write to {depth_image_path}") + save_depth_image(depth_image_path, depth_image, write_shape=True) + + dt_s = (time.time() - now) + dt_ms = dt_s * 1000 + freq = 1 / dt_s + print(f"Latency (ms): {dt_ms:.2f}\tFrequency: {freq:.2f}") + + if save_images: + break + if cv2.waitKey(1) & 0xFF == ord("q"): + break + +# Pre-defined configs that worked + +@dataclass +class IntelRealSenseCameraConfig: + """ + Example of tested options for Intel Real Sense D405: + + ```python + IntelRealSenseCameraConfig(30, 640, 480) + IntelRealSenseCameraConfig(60, 640, 480) + IntelRealSenseCameraConfig(90, 640, 480) + IntelRealSenseCameraConfig(30, 1280, 720) + + IntelRealSenseCameraConfig(30, 640, 480, use_depth=True) + IntelRealSenseCameraConfig(60, 640, 480, use_depth=True) + IntelRealSenseCameraConfig(90, 640, 480, use_depth=True) + IntelRealSenseCameraConfig(30, 1280, 720, use_depth=True) + ``` + """ + fps: int | None = None + width: int | None = None + height: int | None = None + color: str = "rgb" + use_depth: bool = False + force_hardware_reset: bool = True + + + +class IntelRealSenseCamera(): + # TODO(rcadene): improve dosctring + """ + Using this class requires: + - [installing `librealsense` and its python wrapper `pyrealsense2`](https://github.com/IntelRealSense/librealsense/blob/master/doc/distribution_linux.md) + - [updating the camera(s) firmware](https://dev.intelrealsense.com/docs/firmware-releases-d400) + + Example of getting the `camera_index` for your camera(s): + ```bash + rs-fw-update -l + + > Connected devices: + > 1) [USB] Intel RealSense D405 s/n 128422270109, update serial number: 133323070634, firmware version: 5.16.0.1 + > 2) [USB] Intel RealSense D405 s/n 128422271609, update serial number: 130523070758, firmware version: 5.16.0.1 + > 3) [USB] Intel RealSense D405 s/n 128422271614, update serial number: 133323070576, firmware version: 5.16.0.1 + > 4) [USB] Intel RealSense D405 s/n 128422271393, update serial number: 133323070271, firmware version: 5.16.0.1 + ``` + + Example of uage: + + ```python + camera = IntelRealSenseCamera(128422270109) # serial number (s/n) + color_image = camera.capture_image() + ``` + + Example of capturing additional depth image: + + ```python + config = IntelRealSenseCameraConfig(use_depth=True) + camera = IntelRealSenseCamera(128422270109, config) + color_image, depth_image = camera.capture_image() + ``` + """ + AVAILABLE_CAMERA_INDICES = find_camera_indices() + + def __init__(self, + camera_index: int | None = None, + config: IntelRealSenseCameraConfig | None = None, + **kwargs, + ): + if config is None: + config = IntelRealSenseCameraConfig() + # Overwrite config arguments using kwargs + config = replace(config, **kwargs) + + self.camera_index = camera_index + self.fps = config.fps + self.width = config.width + self.height = config.height + self.color = config.color + self.use_depth = config.use_depth + self.force_hardware_reset = config.force_hardware_reset + + # TODO(rcadene): move these two check in config dataclass + if self.color not in ["rgb", "bgr"]: + raise ValueError(f"Expected color values are 'rgb' or 'bgr', but {self.color} is provided.") + + if (self.fps or self.width or self.height) and not (self.fps and self.width and self.height): + raise ValueError(f"Expected all fps, width and height to be set, when one of them is set, but {self.fps=}, {self.width=}, {self.height=}.") + + if self.camera_index is None: + raise ValueError(f"`camera_index` is expected to be a serial number of one of these available cameras ({IntelRealSenseCamera.AVAILABLE_CAMERA_INDICES}), but {camera_index} is provided instead.") + + self.camera = None + self.is_connected = False + + self.t = Thread(target=self.capture_image_loop, args=()) + self.t.daemon = True + self._color_image = None + + def connect(self): + if self.is_connected: + raise ValueError(f"Camera {self.camera_index} is already connected.") + + config = rs.config() + config.enable_device(str(self.camera_index)) + + if self.fps and self.width and self.height: + # TODO(rcadene): can we set rgb8 directly? + config.enable_stream(rs.stream.color, self.width, self.height, rs.format.rgb8, self.fps) + else: + config.enable_stream(rs.stream.color) + + if self.use_depth: + if self.fps and self.width and self.height: + config.enable_stream(rs.stream.depth, self.width, self.height, rs.format.z16, self.fps) + else: + config.enable_stream(rs.stream.depth) + + self.camera = rs.pipeline() + try: + self.camera.start(config) + except RuntimeError: + # Verify that the provided `camera_index` is valid before printing the traceback + if self.camera_index not in IntelRealSenseCamera.AVAILABLE_CAMERA_INDICES: + raise ValueError(f"`camera_index` is expected to be a serial number of one of these available cameras {IntelRealSenseCamera.AVAILABLE_CAMERA_INDICES}, but {self.camera_index} is provided instead.") + traceback.print_exc() + + self.is_connected = True + self.t.start() + + def capture_image(self, temporary_color: str | None = None) -> np.ndarray | tuple[np.ndarray, np.ndarray]: + frame = self.camera.wait_for_frames() + + color_frame = frame.get_color_frame() + if not color_frame: + raise OSError(f"Can't capture color image from camera {self.camera_index}.") + color_image = np.asanyarray(color_frame.get_data()) + + if temporary_color is None: + requested_color = self.color + else: + requested_color = temporary_color + + if requested_color not in ["rgb", "bgr"]: + raise ValueError(f"Expected color values are 'rgb' or 'bgr', but {requested_color} is provided.") + + # OpenCV uses BGR format as default (blue, green red) for all operations, including displaying images. + # However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks, + # so we convert the image color from BGR to RGB. + # if requested_color == "rgb": + # color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB) + + if self.use_depth: + depth_frame = frame.get_depth_frame() + if not depth_frame: + raise OSError(f"Can't capture depth image from camera {self.camera_index}.") + depth_image = np.asanyarray(depth_frame.get_data()) + + return color_image, depth_image + else: + return color_image + + def capture_image_loop(self): + while True: + self._color_image = self.capture_image() + + def read(self): + while self._color_image is None: + time.sleep(0.1) + return self._color_image + + def disconnect(self): + if getattr(self, "camera", None): + try: + self.camera.stop() + except RuntimeError as e: + if "stop() cannot be called before start()" in str(e): + # skip this runtime error + return + traceback.print_exc() + + def __del__(self): + self.disconnect() + + +def save_images_config(config, out_dir: Path): + camera_ids = IntelRealSenseCamera.AVAILABLE_CAMERA_INDICES + cameras = [] + print(f"Available camera indices: {camera_ids}") + for camera_idx in camera_ids: + camera = IntelRealSenseCamera(camera_idx, config) + cameras.append(camera) + + out_dir = out_dir.parent / f"{out_dir.name}_{config.width}x{config.height}_{config.fps}_depth_{config.use_depth}" + benchmark_cameras(cameras, out_dir, save_images=True) + +def benchmark_config(config, camera_ids: list[int]): + cameras = [IntelRealSenseCamera(idx, config) for idx in camera_ids] + benchmark_cameras(cameras) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--mode", type=str, choices=["save_images", 'benchmark'], default="save_images") + parser.add_argument("--camera-ids", type=int, nargs="*", default=[128422271609, 128422271614, 128422271393]) + parser.add_argument("--fps", type=int, default=30) + parser.add_argument("--width", type=str, default=640) + parser.add_argument("--height", type=str, default=480) + parser.add_argument("--use-depth", type=int, default=0) + parser.add_argument("--out-dir", type=Path, default="outputs/benchmark_cameras/intelrealsense/2024_06_22_1738") + args = parser.parse_args() + + config = IntelRealSenseCameraConfig(args.fps, args.width, args.height, use_depth=bool(args.use_depth)) + # config = IntelRealSenseCameraConfig() + # config = IntelRealSenseCameraConfig(60, 640, 480) + # config = IntelRealSenseCameraConfig(90, 640, 480) + # config = IntelRealSenseCameraConfig(30, 1280, 720) + + if args.mode == "save_images": + save_images_config(config, args.out_dir) + elif args.mode == "benchmark": + benchmark_config(config, args.camera_ids) + else: + raise ValueError(args.mode) + + +# if __name__ == "__main__": +# # Works well! +# # use_depth = False +# # fps = 90 +# # width = 640 +# # height = 480 + +# # # Works well! +# # use_depth = True +# # fps = 90 +# # width = 640 +# # height = 480 + +# # # Doesn't work well, latency varies too much +# # use_depth = True +# # fps = 30 +# # width = 1280 +# # height = 720 + +# # Works well +# use_depth = False +# fps = 30 +# width = 1280 +# height = 720 + +# config = IntelRealSenseCameraConfig() +# # config = IntelRealSenseCameraConfig(fps, width, height, use_depth=use_depth) +# cameras = [ +# # IntelRealSenseCamera(0, config), +# # IntelRealSenseCamera(128422270109, config), +# IntelRealSenseCamera(128422271609, config), +# IntelRealSenseCamera(128422271614, config), +# IntelRealSenseCamera(128422271393, config), +# ] + +# out_dir = "outputs/benchmark_cameras/intelrealsense/2024_06_22_1729" +# out_dir += f"{config.width}x{config.height}_{config.fps}_depth_{config.use_depth}" +# benchmark_cameras(cameras, out_dir, save_images=False) diff --git a/lerobot/common/robot_devices/motors/feetech.py b/lerobot/common/robot_devices/motors/feetech.py new file mode 100644 index 00000000..e415fa91 --- /dev/null +++ b/lerobot/common/robot_devices/motors/feetech.py @@ -0,0 +1,713 @@ +from copy import deepcopy +import enum +import numpy as np + +from scservo_sdk import PacketHandler, PortHandler, COMM_SUCCESS, GroupSyncRead, GroupSyncWrite +from scservo_sdk import SCS_HIBYTE, SCS_HIBYTE, SCS_LOBYTE, SCS_LOWORD + +PROTOCOL_VERSION = 0 +BAUD_RATE = 1_000_000 +TIMEOUT_MS = 1000 + + +def u32_to_i32(value: int | np.array) -> int | np.array: + """ + Convert an unsigned 32-bit integer array to a signed 32-bit integer array. + """ + + if isinstance(value, int): + if value > 2147483647: + value = value - 4294967296 + else: + for i in range(len(value)): + if value[i] is not None and value[i] > 2147483647: + value[i] = value[i] - 4294967296 + + return value + + +def i32_to_u32(value: int | np.array) -> int | np.array: + """ + Convert a signed 32-bit integer array to an unsigned 32-bit integer array. + """ + + if isinstance(value, int): + if value < 0: + value = value + 4294967296 + else: + for i in range(len(value)): + if value[i] is not None and value[i] < 0: + value[i] = value[i] + 4294967296 + + return value + + +def retrieve_ids_and_command(values: np.array, ids: np.array) -> (list[int], np.array): + """ + Convert the values to a chain command. Skip the None values and return the ids and values. + """ + + non_none_values = np.array([value for value in values if value is not None]) + non_none_values_ids = [ids[i] for i, value in enumerate(values) if value is not None] + + return non_none_values_ids, non_none_values + + +class TorqueMode(enum.Enum): + ENABLED = 1 + DISABLED = 0 + + +class OperatingMode(enum.Enum): + pass + + +class DriveMode(enum.Enum): + pass + + +SCS_SERIES_CONTROL_TABLE = [ + ("Model", 3, 2), + ("ID", 5, 1), + ("Baud_Rate", 6, 1), + ("Return_Delay", 7, 1), + ("Response_Status_Level", 8, 1), + ("Min_Angle_Limit", 9, 2), + ("Max_Angle_Limit", 11, 2), + ("Max_Temperature_Limit", 13, 1), + ("Max_Voltage_Limit", 14, 1), + ("Min_Voltage_Limit", 15, 1), + ("Max_Torque_Limit", 16, 2), + ("Phase", 18, 1), + ("Unloading_Condition", 19, 1), + ("LED_Alarm_Condition", 20, 1), + ("P_Coefficient", 21, 1), + ("D_Coefficient", 22, 1), + ("I_Coefficient", 23, 1), + ("Minimum_Startup_Force", 24, 2), + ("CW_Dead_Zone", 26, 1), + ("CCW_Dead_Zone", 27, 1), + ("Protection_Current", 28, 2), + ("Angular_Resolution", 30, 1), + ("Offset", 31, 2), + ("Mode", 33, 1), + ("Protective_Torque", 34, 1), + ("Protection_Time", 35, 1), + ("Overload_Torque", 36, 1), + ("Speed_closed_loop_P_proportional_coefficient", 37, 1), + ("Over_Current_Protection_Time", 38, 1), + ("Velocity_closed_loop_I_integral_coefficient", 39, 1), + ("Torque_Enable", 40, 1), + ("Acceleration", 41, 1), + ("Goal_Position", 42, 2), + ("Goal_Time", 44, 2), + ("Goal_Speed", 46, 2), + ("Lock", 55, 1), + ("Present_Position", 56, 2), + ("Present_Speed", 58, 2), + ("Present_Load", 60, 2), + ("Present_Voltage", 62, 1), + ("Present_Temperature", 63, 1), + ("Status", 65, 1), + ("Moving", 66, 1), + ("Present_Current", 69, 2) +] + +MODEL_CONTROL_TABLE = { + "scs_series": SCS_SERIES_CONTROL_TABLE, + "sts3215": SCS_SERIES_CONTROL_TABLE, +} + + +class FeetechBus: + + def __init__(self, port: str, motor_models: dict[int, str], + extra_model_control_table: dict[str, list[tuple]] | None = None): + self.port = port + self.motor_models = motor_models + + self.model_ctrl_table = deepcopy(MODEL_CONTROL_TABLE) + if extra_model_control_table: + self.model_ctrl_table.update(extra_model_control_table) + + # Find read/write addresses and number of bytes for each motor + self.motor_ctrl = {} + for idx, model in self.motor_models.items(): + for data_name, addr, bytes in self.model_ctrl_table[model]: + if idx not in self.motor_ctrl: + self.motor_ctrl[idx] = {} + self.motor_ctrl[idx][data_name] = { + "addr": addr, + "bytes": bytes, + } + + self.port_handler = PortHandler(self.port) + self.packet_handler = PacketHandler(PROTOCOL_VERSION) + + if not self.port_handler.openPort(): + raise OSError(f"Failed to open port {self.port}") + + self.port_handler.setBaudRate(BAUD_RATE) + self.port_handler.setPacketTimeoutMillis(TIMEOUT_MS) + + self.group_readers = {} + self.group_writers = {} + + @property + def motor_ids(self) -> list[int]: + return list(self.motor_models.keys()) + + def close(self): + self.port_handler.closePort() + + def write(self, data_name, value, motor_idx: int): + + addr = self.motor_ctrl[motor_idx][data_name]["addr"] + bytes = self.motor_ctrl[motor_idx][data_name]["bytes"] + args = (self.port_handler, motor_idx, addr, value) + if bytes == 1: + comm, err = self.packet_handler.write1ByteTxRx(*args) + elif bytes == 2: + comm, err = self.packet_handler.write2ByteTxRx(*args) + elif bytes == 4: + comm, err = self.packet_handler.write4ByteTxRx(*args) + else: + raise NotImplementedError( + f"Value of the number of bytes to be sent is expected to be in [1, 2, 4], but {bytes} " + f"is provided instead.") + + if comm != COMM_SUCCESS: + raise ConnectionError( + f"Write failed due to communication error on port {self.port} for motor {motor_idx}: " + f"{self.packet_handler.getTxRxResult(comm)}" + ) + elif err != 0: + raise ConnectionError( + f"Write failed due to error {err} on port {self.port} for motor {motor_idx}: " + f"{self.packet_handler.getTxRxResult(err)}" + ) + + def read(self, data_name, motor_idx: int): + addr = self.motor_ctrl[motor_idx][data_name]["addr"] + bytes = self.motor_ctrl[motor_idx][data_name]["bytes"] + args = (self.port_handler, motor_idx, addr) + if bytes == 1: + value, comm, err = self.packet_handler.read1ByteTxRx(*args) + elif bytes == 2: + value, comm, err = self.packet_handler.read2ByteTxRx(*args) + elif bytes == 4: + value, comm, err = self.packet_handler.read4ByteTxRx(*args) + else: + raise NotImplementedError( + f"Value of the number of bytes to be sent is expected to be in [1, 2, 4], but " + f"{bytes} is provided instead.") + + if comm != COMM_SUCCESS: + raise ConnectionError( + f"Read failed due to communication error on port {self.port} for motor {motor_idx}: " + f"{self.packet_handler.getTxRxResult(comm)}" + ) + elif err != 0: + raise ConnectionError( + f"Read failed due to error {err} on port {self.port} for motor {motor_idx}: " + f"{self.packet_handler.getTxRxResult(err)}" + ) + + return value + + def sync_read(self, data_name, motor_ids: list[int] | None = None): + if motor_ids is None: + motor_ids = self.motor_ids + + group_key = f"{data_name}_" + "_".join([str(idx) for idx in motor_ids]) + first_motor_idx = list(self.motor_ctrl.keys())[0] + addr = self.motor_ctrl[first_motor_idx][data_name]["addr"] + bytes = self.motor_ctrl[first_motor_idx][data_name]["bytes"] + + if data_name not in self.group_readers: + self.group_readers[group_key] = GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes) + for idx in motor_ids: + self.group_readers[group_key].addParam(idx) + + comm = self.group_readers[group_key].txRxPacket() + if comm != COMM_SUCCESS: + raise ConnectionError( + f"Read failed due to communication error on port {self.port} for group_key {group_key}: " + f"{self.packet_handler.getTxRxResult(comm)}" + ) + + values = [] + for idx in motor_ids: + value = self.group_readers[group_key].getData(idx, addr, bytes) + values.append(value) + + return np.array(values) + + def sync_write(self, data_name, values: int | list[int], motor_ids: int | list[int] | None = None): + if motor_ids is None: + motor_ids = self.motor_ids + + if isinstance(motor_ids, int): + motor_ids = [motor_ids] + + if isinstance(values, (int, np.integer)): + values = [int(values)] * len(motor_ids) + + if isinstance(values, np.ndarray): + values = values.tolist() + + group_key = f"{data_name}_" + "_".join([str(idx) for idx in motor_ids]) + + first_motor_idx = list(self.motor_ctrl.keys())[0] + addr = self.motor_ctrl[first_motor_idx][data_name]["addr"] + bytes = self.motor_ctrl[first_motor_idx][data_name]["bytes"] + init_group = data_name not in self.group_readers + + if init_group: + self.group_writers[group_key] = GroupSyncWrite(self.port_handler, self.packet_handler, addr, bytes) + + for idx, value in zip(motor_ids, values): + if bytes == 1: + data = [ + SCS_LOBYTE(SCS_LOWORD(value)), + ] + elif bytes == 2: + data = [ + SCS_LOBYTE(SCS_LOWORD(value)), + SCS_HIBYTE(SCS_LOWORD(value)), + ] + elif bytes == 4: + data = [ + SCS_LOBYTE(SCS_LOWORD(value)), + SCS_HIBYTE(SCS_LOWORD(value)), + SCS_LOBYTE(SCS_HIBYTE(value)), + SCS_HIBYTE(SCS_HIBYTE(value)), + ] + else: + raise NotImplementedError( + f"Value of the number of bytes to be sent is expected to be in [1, 2, 4], but {bytes} " + f"is provided instead.") + + if init_group: + self.group_writers[group_key].addParam(idx, data) + else: + self.group_writers[group_key].changeParam(idx, data) + + comm = self.group_writers[group_key].txPacket() + if comm != COMM_SUCCESS: + raise ConnectionError( + f"Write failed due to communication error on port {self.port} for group_key {group_key}: " + f"{self.packet_handler.getTxRxResult(comm)}" + ) + + def read_model(self, motor_idx: int): + return self.read("Model", motor_idx) + + def sync_read_model(self, motor_ids: list[int] | None = None): + return self.sync_read("Model", motor_ids) + + def write_id(self, value, motor_idx: int): + self.write("ID", value, motor_idx) + + def read_id(self, motor_idx: int): + return self.read("ID", motor_idx) + + def sync_read_id(self, motor_ids: list[int] | None = None): + return self.sync_read("ID", motor_ids) + + def sync_write_id(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("ID", values, motor_ids) + + def write_baud_rate(self, value, motor_idx: int): + self.write("Baud_Rate", value, motor_idx) + + def read_baud_rate(self, motor_idx: int): + return self.read("Baud_Rate", motor_idx) + + def sync_read_baud_rate(self, motor_ids: list[int] | None = None): + return self.sync_read("Baud_Rate", motor_ids) + + def sync_write_baud_rate(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Baud_Rate", values, motor_ids) + + def read_return_delay(self, motor_idx: int): + return self.read("Return_Delay", motor_idx) + + def sync_read_return_delay(self, motor_ids: list[int] | None = None): + return self.sync_read("Return_Delay", motor_ids) + + def read_response_status_level(self, motor_idx: int): + return self.read("Response_Status_Level", motor_idx) + + def sync_read_response_status_level(self, motor_ids: list[int] | None = None): + return self.sync_read("Response_Status_Level", motor_ids) + + def write_min_angle_limit(self, value, motor_idx: int): + self.write("Min_Angle_Limit", value, motor_idx) + + def read_min_angle_limit(self, motor_idx: int): + return self.read("Min_Angle_Limit", motor_idx) + + def sync_read_min_angle_limit(self, motor_ids: list[int] | None = None): + return self.sync_read("Min_Angle_Limit", motor_ids) + + def sync_write_min_angle_limit(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Min_Angle_Limit", values, motor_ids) + + def write_max_angle_limit(self, value, motor_idx: int): + self.write("Max_Angle_Limit", value, motor_idx) + + def read_max_angle_limit(self, motor_idx: int): + return self.read("Max_Angle_Limit", motor_idx) + + def sync_read_max_angle_limit(self, motor_ids: list[int] | None = None): + return self.sync_read("Max_Angle_Limit", motor_ids) + + def sync_write_max_angle_limit(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Max_Angle_Limit", values, motor_ids) + + def write_max_temperature_limit(self, value, motor_idx: int): + self.write("Max_Temperature_Limit", value, motor_idx) + + def read_max_temperature_limit(self, motor_idx: int): + return self.read("Max_Temperature_Limit", motor_idx) + + def sync_read_max_temperature_limit(self, motor_ids: list[int] | None = None): + return self.sync_read("Max_Temperature_Limit", motor_ids) + + def sync_write_max_temperature_limit(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Max_Temperature_Limit", values, motor_ids) + + def write_max_voltage_limit(self, value, motor_idx: int): + self.write("Max_Voltage_Limit", value, motor_idx) + + def read_max_voltage_limit(self, motor_idx: int): + return self.read("Max_Voltage_Limit", motor_idx) + + def sync_read_max_voltage_limit(self, motor_ids: list[int] | None = None): + return self.sync_read("Max_Voltage_Limit", motor_ids) + + def sync_write_max_voltage_limit(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Max_Voltage_Limit", values, motor_ids) + + def write_min_voltage_limit(self, value, motor_idx: int): + self.write("Min_Voltage_Limit", value, motor_idx) + + def read_min_voltage_limit(self, motor_idx: int): + return self.read("Min_Voltage_Limit", motor_idx) + + def sync_read_min_voltage_limit(self, motor_ids: list[int] | None = None): + return self.sync_read("Min_Voltage_Limit", motor_ids) + + def sync_write_min_voltage_limit(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Min_Voltage_Limit", values, motor_ids) + + def write_max_torque_limit(self, value, motor_idx: int): + self.write("Max_Torque_Limit", value, motor_idx) + + def read_max_torque_limit(self, motor_idx: int): + return self.read("Max_Torque_Limit", motor_idx) + + def sync_read_max_torque_limit(self, motor_ids: list[int] | None = None): + return self.sync_read("Max_Torque_Limit", motor_ids) + + def sync_write_max_torque_limit(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Max_Torque_Limit", values, motor_ids) + + def write_p_coefficient(self, value, motor_idx: int): + self.write("P_Coefficient", value, motor_idx) + + def read_p_coefficient(self, motor_idx: int): + return self.read("P_Coefficient", motor_idx) + + def sync_read_p_coefficient(self, motor_ids: list[int] | None = None): + return self.sync_read("P_Coefficient", motor_ids) + + def sync_write_p_coefficient(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("P_Coefficient", values, motor_ids) + + def write_d_coefficient(self, value, motor_idx: int): + self.write("D_Coefficient", value, motor_idx) + + def read_d_coefficient(self, motor_idx: int): + return self.read("D_Coefficient", motor_idx) + + def sync_read_d_coefficient(self, motor_ids: list[int] | None = None): + return self.sync_read("D_Coefficient", motor_ids) + + def sync_write_d_coefficient(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("D_Coefficient", values, motor_ids) + + def write_i_coefficient(self, value, motor_idx: int): + self.write("I_Coefficient", value, motor_idx) + + def read_i_coefficient(self, motor_idx: int): + return self.read("I_Coefficient", motor_idx) + + def sync_read_i_coefficient(self, motor_ids: list[int] | None = None): + return self.sync_read("I_Coefficient", motor_ids) + + def sync_write_i_coefficient(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("I_Coefficient", values, motor_ids) + + def write_minimum_startup_force(self, value, motor_idx: int): + self.write("Minimum_Startup_Force", value, motor_idx) + + def read_minimum_startup_force(self, motor_idx: int): + return self.read("Minimum_Startup_Force", motor_idx) + + def sync_read_minimum_startup_force(self, motor_ids: list[int] | None = None): + return self.sync_read("Minimum_Startup_Force", motor_ids) + + def sync_write_minimum_startup_force(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Minimum_Startup_Force", values, motor_ids) + + def write_cw_dead_zone(self, value, motor_idx: int): + self.write("CW_Dead_Zone", value, motor_idx) + + def read_cw_dead_zone(self, motor_idx: int): + return self.read("CW_Dead_Zone", motor_idx) + + def sync_read_cw_dead_zone(self, motor_ids: list[int] | None = None): + return self.sync_read("CW_Dead_Zone", motor_ids) + + def sync_write_cw_dead_zone(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("CW_Dead_Zone", values, motor_ids) + + def write_ccw_dead_zone(self, value, motor_idx: int): + self.write("CCW_Dead_Zone", value, motor_idx) + + def read_ccw_dead_zone(self, motor_idx: int): + return self.read("CCW_Dead_Zone", motor_idx) + + def sync_read_ccw_dead_zone(self, motor_ids: list[int] | None = None): + return self.sync_read("CCW_Dead_Zone", motor_ids) + + def sync_write_ccw_dead_zone(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("CCW_Dead_Zone", values, motor_ids) + + def write_protection_current(self, value, motor_idx: int): + self.write("Protection_Current", value, motor_idx) + + def read_protection_current(self, motor_idx: int): + return self.read("Protection_Current", motor_idx) + + def sync_read_protection_current(self, motor_ids: list[int] | None = None): + return self.sync_read("Protection_Current", motor_ids) + + def sync_write_protection_current(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Protection_Current", values, motor_ids) + + def read_angular_resolution(self, motor_idx: int): + return self.read("Angular_Resolution", motor_idx) + + def sync_read_angular_resolution(self, motor_ids: list[int] | None = None): + return self.sync_read("Angular_Resolution", motor_ids) + + def write_offset(self, value, motor_idx: int): + self.write("Offset", value, motor_idx) + + def read_offset(self, motor_idx: int): + return self.read("Offset", motor_idx) + + def sync_read_offset(self, motor_ids: list[int] | None = None): + return self.sync_read("Offset", motor_ids) + + def sync_write_offset(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Offset", values, motor_ids) + + def write_mode(self, value, motor_idx: int): + self.write("Mode", value, motor_idx) + + def read_mode(self, motor_idx: int): + return self.read("Mode", motor_idx) + + def sync_read_mode(self, motor_ids: list[int] | None = None): + return self.sync_read("Mode", motor_ids) + + def sync_write_mode(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Mode", values, motor_ids) + + def write_protective_torque(self, value, motor_idx: int): + self.write("Protective_Torque", value, motor_idx) + + def read_protective_torque(self, motor_idx: int): + return self.read("Protective_Torque", motor_idx) + + def sync_read_protective_torque(self, motor_ids: list[int] | None = None): + return self.sync_read("Protective_Torque", motor_ids) + + def sync_write_protective_torque(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Protective_Torque", values, motor_ids) + + def read_protection_time(self, motor_idx: int): + return self.read("Protection_Time", motor_idx) + + def sync_read_protection_time(self, motor_ids: list[int] | None = None): + return self.sync_read("Protection_Time", motor_ids) + + def write_speed_closed_loop_p_proportional_coefficient(self, value, motor_idx: int): + self.write("Speed_closed_loop_P_proportional_coefficient", value, motor_idx) + + def read_speed_closed_loop_p_proportional_coefficient(self, motor_idx: int): + return self.read("Speed_closed_loop_P_proportional_coefficient", motor_idx) + + def sync_read_speed_closed_loop_p_proportional_coefficient(self, motor_ids: list[int] | None = None): + return self.sync_read("Speed_closed_loop_P_proportional_coefficient", motor_ids) + + def sync_write_speed_closed_loop_p_proportional_coefficient(self, values: int | list[int], + motor_ids: list[int] | None = None): + self.sync_write("Speed_closed_loop_P_proportional_coefficient", values, motor_ids) + + def write_over_current_protection_time(self, value, motor_idx: int): + self.write("Over_Current_Protection_Time", value, motor_idx) + + def read_over_current_protection_time(self, motor_idx: int): + return self.read("Over_Current_Protection_Time", motor_idx) + + def sync_read_over_current_protection_time(self, motor_ids: list[int] | None = None): + return self.sync_read("Over_Current_Protection_Time", motor_ids) + + def sync_write_over_current_protection_time(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Over_Current_Protection_Time", values, motor_ids) + + def write_velocity_closed_loop_i_integral_coefficient(self, value, motor_idx: int): + self.write("Velocity_closed_loop_I_integral_coefficient", value, motor_idx) + + def read_velocity_closed_loop_i_integral_coefficient(self, motor_idx: int): + return self.read("Velocity_closed_loop_I_integral_coefficient", motor_idx) + + def sync_read_velocity_closed_loop_i_integral_coefficient(self, motor_ids: list[int] | None = None): + return self.sync_read("Velocity_closed_loop_I_integral_coefficient", motor_ids) + + def sync_write_velocity_closed_loop_i_integral_coefficient(self, values: int | list[int], + motor_ids: list[int] | None = None): + self.sync_write("Velocity_closed_loop_I_integral_coefficient", values, motor_ids) + + def write_torque_enable(self, value, motor_idx: int): + self.write("Torque_Enable", value, motor_idx) + + def read_torque_enable(self, motor_idx: int): + return self.read("Torque_Enable", motor_idx) + + def sync_read_torque_enable(self, motor_ids: list[int] | None = None): + return self.sync_read("Torque_Enable", motor_ids) + + def sync_write_torque_enable(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Torque_Enable", values, motor_ids) + + def write_goal_position_u32(self, value, motor_idx: int): + self.write("Goal_Position", value, motor_idx) + + def write_goal_position_i32(self, value, motor_idx: int): + self.write("Goal_Position", i32_to_u32(value), motor_idx) + + def read_goal_position_u32(self, motor_idx: int): + return self.read("Goal_Position", motor_idx) + + def read_goal_position_i32(self, motor_idx: int): + goal_position_u32 = self.read_goal_position_u32(motor_idx) + + return u32_to_i32(goal_position_u32) + + def sync_read_goal_position_u32(self, motor_ids: list[int] | None = None): + return self.sync_read("Goal_Position", motor_ids) + + def sync_read_goal_position_i32(self, motor_ids: list[int] | None = None): + goal_position_u32 = self.sync_read_goal_position_u32(motor_ids) + + return u32_to_i32(goal_position_u32) + + def sync_write_goal_position_u32(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Goal_Position", values, motor_ids) + + def sync_write_goal_position_i32(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Goal_Position", i32_to_u32(values), motor_ids) + + def write_goal_time(self, value, motor_idx: int): + self.write("Goal_Time", value, motor_idx) + + def read_goal_time(self, motor_idx: int): + return self.read("Goal_Time", motor_idx) + + def sync_read_goal_time(self, motor_ids: list[int] | None = None): + return self.sync_read("Goal_Time", motor_ids) + + def sync_write_goal_time(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Goal_Time", values, motor_ids) + + def write_goal_speed(self, value, motor_idx: int): + self.write("Goal_Speed", value, motor_idx) + + def read_goal_speed(self, motor_idx: int): + return self.read("Goal_Speed", motor_idx) + + def sync_read_goal_speed(self, motor_ids: list[int] | None = None): + return self.sync_read("Goal_Speed", motor_ids) + + def sync_write_goal_speed(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Goal_Speed", values, motor_ids) + + def write_lock(self, value, motor_idx: int): + self.write("Lock", value, motor_idx) + + def read_lock(self, motor_idx: int): + return self.read("Lock", motor_idx) + + def sync_read_lock(self, motor_ids: list[int] | None = None): + return self.sync_read("Lock", motor_ids) + + def sync_write_lock(self, values: int | list[int], motor_ids: list[int] | None = None): + self.sync_write("Lock", values, motor_ids) + + def read_present_position_u32(self, motor_idx: int): + return self.read("Present_Position", motor_idx) + + def read_present_position_i32(self, motor_idx: int): + present_position_u32 = self.read_present_position_u32(motor_idx) + + return u32_to_i32(present_position_u32) + + def sync_read_present_position_u32(self, motor_ids: list[int] | None = None): + return self.sync_read("Present_Position", motor_ids) + + def sync_read_present_position_i32(self, motor_ids: list[int] | None = None): + present_position_u32 = self.sync_read_present_position_u32(motor_ids) + + return u32_to_i32(present_position_u32) + + def read_present_speed(self, motor_idx: int): + return self.read("Present_Speed", motor_idx) + + def sync_read_present_speed(self, motor_ids: list[int] | None = None): + return self.sync_read("Present_Speed", motor_ids) + + def read_present_load(self, motor_idx: int): + return self.read("Present_Load", motor_idx) + + def sync_read_present_load(self, motor_ids: list[int] | None = None): + return self.sync_read("Present_Load", motor_ids) + + def read_present_voltage(self, motor_idx: int): + return self.read("Present_Voltage", motor_idx) + + def sync_read_present_voltage(self, motor_ids: list[int] | None = None): + return self.sync_read("Present_Voltage", motor_ids) + + def read_present_temperature(self, motor_idx: int): + return self.read("Present_Temperature", motor_idx) + + def sync_read_present_temperature(self, motor_ids: list[int] | None = None): + return self.sync_read("Present_Temperature", motor_ids) + + def read_moving(self, motor_idx: int): + return self.read("Moving", motor_idx) + + def sync_read_moving(self, motor_ids: list[int] | None = None): + return self.sync_read("Moving", motor_ids) + + def read_present_current(self, motor_idx: int): + return self.read("Present_Current", motor_idx) + + def sync_read_present_current(self, motor_ids: list[int] | None = None): + return self.sync_read("Present_Current", motor_ids) diff --git a/lerobot/common/robot_devices/robots/aloha.py b/lerobot/common/robot_devices/robots/aloha.py new file mode 100644 index 00000000..69ae470a --- /dev/null +++ b/lerobot/common/robot_devices/robots/aloha.py @@ -0,0 +1,204 @@ +import copy +from dataclasses import dataclass, field, replace +import numpy as np +import torch +from examples.real_robot_example.gym_real_world.robot import Robot +from lerobot.common.robot_devices.cameras.opencv import OpenCVCamera +from lerobot.common.robot_devices.cameras.utils import Camera + +MAX_LEADER_GRIPPER_RAD = 0.7761942786701344 +MAX_LEADER_GRIPPER_POS = 2567 +MAX_FOLLOWER_GRIPPER_RAD = 1.6827769243105486 +MAX_FOLLOWER_GRIPPER_POS = 3100 + +MIN_LEADER_GRIPPER_RAD = -0.12732040539450828 +MIN_LEADER_GRIPPER_POS = 1984 +MIN_FOLLOWER_GRIPPER_RAD = 0.6933593161243099 +MIN_FOLLOWER_GRIPPER_POS = 2512 + +GRIPPER_INDEX = -1 + +def convert_gripper_range_from_leader_to_follower(leader_pos): + follower_goal_pos = copy.copy(leader_pos) + follower_goal_pos[GRIPPER_INDEX] = \ + (leader_pos[GRIPPER_INDEX] - MIN_LEADER_GRIPPER_POS) \ + / (MAX_LEADER_GRIPPER_POS - MIN_LEADER_GRIPPER_POS) \ + * (MAX_FOLLOWER_GRIPPER_POS - MIN_FOLLOWER_GRIPPER_POS) \ + + MIN_FOLLOWER_GRIPPER_POS + return follower_goal_pos + + + +@dataclass +class AlohaRobotConfig: + """ + Example of usage: + ```python + AlohaRobotConfig() + ``` + + Example of only using left arm: + ```python + AlohaRobotConfig( + activated_leaders=["left"], + activated_followers=["left"], + ) + ``` + """ + + # Define all the components of the robot + leader_devices: dict[str, str] = field( + default_factory=lambda: { + "right": { + #"port": "/dev/ttyDXL_master_right", + "port": "/dev/ttyDXL_master_left", + "servos": [1, 2, 3, 4, 5, 6, 7, 8, 9], + }, + "left": { + "port": "/dev/ttyDXL_master_left", + "servos": [1, 2, 3, 4, 5, 6, 7, 8, 9], + }, + } + ) + follower_devices: dict[str, str] = field( + default_factory=lambda: { + "right": { + "port": "/dev/ttyDXL_puppet_right", + "servos": [1, 2, 3, 4, 5, 6, 7, 8, 9], + }, + "left": { + "port": "/dev/ttyDXL_puppet_left", + "servos": [1, 2, 3, 4, 5, 6, 7, 8, 9], + }, + } + ) + camera_devices: dict[str, Camera] = field( + default_factory=lambda: { + # "cam_high": OpenCVCamera(16), + # "cam_low": OpenCVCamera(4), + # "cam_left_wrist": OpenCVCamera(10), + # "cam_right_wrist": OpenCVCamera(22), + } + ) + + # Allows to easily pick a subset of all devices + activated_leaders: list[str] | None = field( + default_factory=lambda: ["left", "right"] + ) + activated_followers: list[str] | None = field( + default_factory=lambda: ["left", "right"] + ) + activated_cameras: list[str] | None = field( + default_factory=lambda: ["cam_high", "cam_low", "cam_left_wrist", "cam_right_wrist"] + ) + + +class AlohaRobot(): + """ Trossen Robotics + + Example of usage: + ```python + robot = AlohaRobot() + ``` + """ + + def __init__(self, config: AlohaRobotConfig | None = None, **kwargs): + if config is None: + config = AlohaRobotConfig() + # Overwrite config arguments using kwargs + config = replace(config, **kwargs) + self.config = config + + self.leaders = {} + self.followers = {} + self.cameras = {} + + if config.activated_leaders: + for name in config.activated_leaders: + info = config.leader_devices[name] + self.leaders[name] = Robot(info["port"], servo_ids=info["servos"]) + + if config.activated_followers: + for name in config.activated_followers: + info = config.follower_devices[name] + self.followers[name] = Robot(info["port"], servo_ids=info["servos"]) + + if config.activated_cameras: + for name in config.activated_cameras: + self.cameras[name] = config.camera_devices[name] + + def init_teleop(self): + for name in self.followers: + self.followers[name]._enable_torque() + for name in self.cameras: + self.cameras[name].connect() + + def teleop_step(self, record_data=False) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: + # Prepare to assign the positions of the leader to the follower + leader_pos = {} + for name in self.leaders: + leader_pos[name] = self.leaders[name].read_position() + + # Update the position of the follower gripper to account for the different minimum and maximum range + # position in range [0, 4096[ which corresponds to 4096 bins of 360 degrees + # for all our dynamixel servors + # gripper id=8 has a different range from leader to follower + follower_goal_pos = {} + for name in self.leaders: + follower_goal_pos[name] = convert_gripper_range_from_leader_to_follower(leader_pos[name]) + + # Send action + for name in self.followers: + self.followers[name].set_goal_pos(follower_goal_pos[name]) + + # Early exit when recording data is not requested + if not record_data: + return + + # Read follower position + follower_pos = {} + for name in self.followers: + follower_pos[name] = self.followers[name].read_position() + + # Create state by concatenating follower current position + state = [] + for name in ["left", "right"]: + if name in follower_pos: + state.append(follower_pos[name]) + state = np.concatenate(state) + state = pwm2pos(state) + + # Create action by concatenating follower goal position + action = [] + for name in ["left", "right"]: + if name in follower_goal_pos: + action.append(follower_goal_pos[name]) + action = np.concatenate(action) + action = pwm2pos(action) + + # Capture images from cameras + images = {} + for name in self.cameras: + images[name] = self.cameras[name].read() + + # Populate output dictionnaries and format to pytorch + obs_dict, action_dict = {}, {} + obs_dict["observation.state"] = torch.from_numpy(state) + action_dict["action"] = torch.from_numpy(action) + for name in self.cameras: + obs_dict[f"observation.images.{name}"] = torch.from_numpy(images[name]) + + return obs_dict, action_dict + + def send_action(self, action): + from_idx = 0 + to_idx = 0 + follower_goal_pos = {} + for name in ["left", "right"]: + if name in self.followers: + to_idx += len(self.config.follower_devices[name]["servos"]) + follower_goal_pos[name] = pos2pwm(action[from_idx:to_idx].numpy()) + from_idx = to_idx + + for name in self.followers: + self.followers[name].set_goal_pos(follower_goal_pos[name]) diff --git a/lerobot/common/utils/io_utils.py b/lerobot/common/utils/io_utils.py index b85f17c7..07478ecf 100644 --- a/lerobot/common/utils/io_utils.py +++ b/lerobot/common/utils/io_utils.py @@ -25,3 +25,42 @@ def write_video(video_path, stacked_frames, fps): "ignore", "pkg_resources is deprecated as an API", category=DeprecationWarning ) imageio.mimsave(video_path, stacked_frames, fps=fps) + + +import serial +import os +import time + +def reset_usb_port(port): + try: + # Close the serial port if it's open + ser = serial.Serial(port) + ser.close() + except serial.serialutil.SerialException as e: + print(f"Exception while closing the port: {e}") + + # Find the USB device path + usb_device_path = None + for root, dirs, files in os.walk('/sys/bus/usb/drivers/usb'): + for dir_name in dirs: + if port in dir_name: + usb_device_path = os.path.join(root, dir_name) + break + + if usb_device_path: + # Unbind and rebind the USB device + try: + unbind_path = os.path.join(usb_device_path, 'unbind') + bind_path = os.path.join(usb_device_path, 'bind') + usb_id = os.path.basename(usb_device_path) + + with open(unbind_path, 'w') as f: + f.write(usb_id) + time.sleep(1) # Wait for a second + with open(bind_path, 'w') as f: + f.write(usb_id) + print(f"USB port {port} has been reset.") + except Exception as e: + print(f"Exception during USB reset: {e}") + else: + print(f"Could not find USB device path for port: {port}") \ No newline at end of file diff --git a/lerobot/configs/default.yaml b/lerobot/configs/default.yaml index 85b9ceea..15acc392 100644 --- a/lerobot/configs/default.yaml +++ b/lerobot/configs/default.yaml @@ -50,6 +50,8 @@ eval: batch_size: 1 # `use_async_envs` specifies whether to use asynchronous environments (multiprocessing). use_async_envs: false + # Specify the number of episodes to render during evaluation. + max_episodes_rendered: 10 wandb: enable: false diff --git a/lerobot/configs/env/aloha.yaml b/lerobot/configs/env/aloha.yaml index 296a4481..371c32bc 100644 --- a/lerobot/configs/env/aloha.yaml +++ b/lerobot/configs/env/aloha.yaml @@ -9,6 +9,7 @@ env: action_dim: 14 fps: ${fps} episode_length: 400 + real_world: false gym: obs_type: pixels_agent_pos render_mode: rgb_array diff --git a/lerobot/configs/env/dora_aloha_real.yaml b/lerobot/configs/env/dora_aloha_real.yaml index 088781d4..6661c1d3 100644 --- a/lerobot/configs/env/dora_aloha_real.yaml +++ b/lerobot/configs/env/dora_aloha_real.yaml @@ -9,5 +9,6 @@ env: action_dim: 14 fps: ${fps} episode_length: 400 + real_world: true gym: fps: ${fps} diff --git a/lerobot/configs/env/pusht.yaml b/lerobot/configs/env/pusht.yaml index 771fbbf4..dfe69a0d 100644 --- a/lerobot/configs/env/pusht.yaml +++ b/lerobot/configs/env/pusht.yaml @@ -10,6 +10,7 @@ env: action_dim: 2 fps: ${fps} episode_length: 300 + real_world: false gym: obs_type: pixels_agent_pos render_mode: rgb_array diff --git a/lerobot/configs/env/xarm.yaml b/lerobot/configs/env/xarm.yaml index 9dbb96f5..bcee46d6 100644 --- a/lerobot/configs/env/xarm.yaml +++ b/lerobot/configs/env/xarm.yaml @@ -10,6 +10,7 @@ env: action_dim: 4 fps: ${fps} episode_length: 25 + real_world: false gym: obs_type: pixels_agent_pos render_mode: rgb_array diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py index ee78ec93..433335ac 100644 --- a/lerobot/scripts/eval.py +++ b/lerobot/scripts/eval.py @@ -44,6 +44,7 @@ https://huggingface.co/lerobot/diffusion_pusht/tree/main. import argparse import json import logging +import os import threading import time from contextlib import nullcontext @@ -164,7 +165,10 @@ def rollout( # VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't # available of none of the envs finished. if "final_info" in info: - successes = [info["is_success"] if info is not None else False for info in info["final_info"]] + successes = [ + i["is_success"] if (i is not None and "is_success" in i) else False + for i in info["final_info"] + ] else: successes = [False] * env.num_envs @@ -516,6 +520,7 @@ def eval( out_dir = ( f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}" ) + os.makedirs(out_dir, exist_ok=True) if out_dir is None: raise NotImplementedError() @@ -545,7 +550,7 @@ def eval( env, policy, hydra_cfg.eval.n_episodes, - max_episodes_rendered=10, + max_episodes_rendered=hydra_cfg.eval.max_episodes_rendered, video_dir=Path(out_dir) / "eval", start_seed=hydra_cfg.seed, enable_progbar=True, diff --git a/lerobot/scripts/robot_controls/koch_configure.py b/lerobot/scripts/robot_controls/koch_configure.py new file mode 100644 index 00000000..0573d1e4 --- /dev/null +++ b/lerobot/scripts/robot_controls/koch_configure.py @@ -0,0 +1,211 @@ +""" +LCR Auto Configure: This program is used to automatically configure the Low Cost Robot (LCR) for the user. + +The program will: +1. Disable all torque motors of provided LCR. +2. Ask the user to move the LCR to the position 1 (see CONFIGURING.md for more details). +3. Record the position of the LCR. +4. Ask the user to move the LCR to the position 2 (see CONFIGURING.md for more details). +5. Record the position of the LCR. +6. Ask the user to move back the LCR to the position 1. +7. Record the position of the LCR. +8. Calculate the offset of the LCR and save it to the configuration file. + +It will also enable all appropriate operating modes for the LCR according if the LCR is a puppet or a master. +""" + +import argparse +import time + +import numpy as np + +from lerobot.common.robot_devices.motors.dynamixel import DynamixelBus, OperatingMode, DriveMode, TorqueMode + + +def pause(): + """ + Pause the program until the user presses the enter key. + """ + input("Press Enter to continue...") + + +def prepare_configuration(arm: DynamixelBus): + """ + Prepare the configuration for the LCR. + :param arm: DynamixelBus + """ + + # To be configured, all servos must be in "torque disable" mode + arm.sync_write_torque_enable(TorqueMode.DISABLED.value) + + # We need to work with 'extended position mode' (4) for all servos, because in joint mode (1) the servos can't + # rotate more than 360 degrees (from 0 to 4095) And some mistake can happen while assembling the arm, + # you could end up with a servo with a position 0 or 4095 at a crucial point See [ + # https://emanual.robotis.com/docs/en/dxl/x/x_series/#operating-mode11] + arm.sync_write_operating_mode(OperatingMode.EXTENDED_POSITION.value, [1, 2, 3, 4, 5]) + + # Gripper is always 'position control current based' (5) + arm.write_operating_mode(OperatingMode.CURRENT_CONTROLLED_POSITION.value, 6) + + # We need to reset the homing offset for all servos + arm.sync_write_homing_offset(0) + + # We need to work with 'normal drive mode' (0) for all servos + arm.sync_write_drive_mode(DriveMode.NON_INVERTED.value) + + +def invert_appropriate_positions(positions: np.array, inverted: list[bool]) -> np.array: + """ + Invert the appropriate positions. + :param positions: numpy array of positions + :param inverted: list of booleans to determine if the position should be inverted + :return: numpy array of inverted positions + """ + for i, invert in enumerate(inverted): + if not invert and positions[i] is not None: + positions[i] = -positions[i] + + return positions + + +def calculate_corrections(positions: np.array, inverted: list[bool]) -> np.array: + """ + Calculate the corrections for the positions. + :param positions: numpy array of positions + :param inverted: list of booleans to determine if the position should be inverted + :return: numpy array of corrections + """ + + wanted = wanted_position_1() + + correction = invert_appropriate_positions(positions, inverted) + + for i in range(len(positions)): + if correction[i] is not None: + if inverted[i]: + correction[i] -= wanted[i] + else: + correction[i] += wanted[i] + + return correction + + +def calculate_nearest_rounded_positions(positions: np.array) -> np.array: + """ + Calculate the nearest rounded positions. + :param positions: numpy array of positions + :return: numpy array of nearest rounded positions + """ + + return np.array( + [round(positions[i] / 1024) * 1024 if positions[i] is not None else None for i in range(len(positions))]) + + +def configure_homing(arm: DynamixelBus, inverted: list[bool]) -> np.array: + """ + Configure the homing for the LCR. + :param arm: DynamixelBus + :param inverted: list of booleans to determine if the position should be inverted + """ + + # Reset homing offset for the servos + arm.sync_write_homing_offset(0) + + # Get the present positions of the servos + present_positions = arm.sync_read_present_position_i32() + + nearest_positions = calculate_nearest_rounded_positions(present_positions) + + correction = calculate_corrections(nearest_positions, inverted) + + # Write the homing offset for the servos + arm.sync_write_homing_offset(correction) + + +def configure_drive_mode(arm: DynamixelBus): + """ + Configure the drive mode for the LCR. + :param arm: DynamixelBus + :param homing: numpy array of homing + """ + + # Get current positions + present_positions = arm.sync_read_present_position_i32() + + nearest_positions = calculate_nearest_rounded_positions(present_positions) + + # construct 'inverted' list comparing nearest_positions and wanted_position_2 + inverted = [] + + for i in range(len(nearest_positions)): + inverted.append(nearest_positions[i] != wanted_position_2()[i]) + + # Write the drive mode for the servos + arm.sync_write_drive_mode( + [DriveMode.INVERTED.value if i else DriveMode.NON_INVERTED.value for i in inverted]) + + return inverted + + +def wanted_position_1() -> np.array: + """ + The present position wanted in position 1 for the arm + """ + return np.array([0, -1024, 1024, 0, 0, 0]) + + +def wanted_position_2() -> np.array: + """ + The present position wanted in position 2 for the arm + """ + return np.array([1024, 0, 0, 1024, 1024, -1024]) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="LCR Auto Configure: This program is used to automatically configure the Low Cost Robot (LCR) for " + "the user.") + + parser.add_argument("--port", type=str, required=True, help="The port of the LCR.") + + args = parser.parse_args() + + arm = DynamixelBus( + args.port, { + 1: "x_series", + 2: "x_series", + 3: "x_series", + 4: "x_series", + 5: "x_series", + 6: "x_series", + } + ) + + prepare_configuration(arm) + + # Ask the user to move the LCR to the position 1 + print("Please move the LCR to the position 1") + pause() + + configure_homing(arm, [False, False, False, False, False, False]) + + # Ask the user to move the LCR to the position 2 + print("Please move the LCR to the position 2") + pause() + + inverted = configure_drive_mode(arm) + + # Ask the user to move back the LCR to the position 1 + print("Please move back the LCR to the position 1") + pause() + + configure_homing(arm, inverted) + + print("Configuration done!") + print("Make sure everything is working properly:") + + while True: + positions = arm.sync_read_present_position_i32() + print(positions) + + time.sleep(1) diff --git a/lerobot/scripts/robot_controls/record_dataset.py b/lerobot/scripts/robot_controls/record_dataset.py new file mode 100644 index 00000000..451af411 --- /dev/null +++ b/lerobot/scripts/robot_controls/record_dataset.py @@ -0,0 +1,20 @@ + + +import time +from lerobot.common.robot_devices.robots.aloha import AlohaRobot +import torch + + +def record_dataset(): + robot = AlohaRobot(use_cameras=True) + robot.init_teleop() + + while True: + now = time.time() + observation, action = robot.teleop_step(record_data=True) + + dt_s = (time.time() - now) + print(f"Latency (ms): {dt_s * 1000:.2f}\tFrequency: {1 / dt_s:.2f}") + +if __name__ == "__main__": + record_dataset() \ No newline at end of file diff --git a/lerobot/scripts/robot_controls/replay_dataset.py b/lerobot/scripts/robot_controls/replay_dataset.py new file mode 100644 index 00000000..b6f0b932 --- /dev/null +++ b/lerobot/scripts/robot_controls/replay_dataset.py @@ -0,0 +1,19 @@ + +import time +from lerobot.common.robot_devices.robots.aloha import AlohaRobot +import torch + + +def record_dataset(): + robot = AlohaRobot(use_cameras=True) + robot.init_teleop() + + while True: + now = time.time() + observation, action = robot.teleop_step(record_data=True) + + dt_s = (time.time() - now) + print(f"Latency (ms): {dt_s * 1000:.2f}\tFrequency: {1 / dt_s:.2f}") + +if __name__ == "__main__": + record_dataset() \ No newline at end of file diff --git a/lerobot/scripts/robot_controls/run_policy.py b/lerobot/scripts/robot_controls/run_policy.py new file mode 100644 index 00000000..5ffc24e4 --- /dev/null +++ b/lerobot/scripts/robot_controls/run_policy.py @@ -0,0 +1,47 @@ + + +import time +from lerobot.common.robot_devices.robots.aloha import AlohaRobot +import torch + + + +def teleoperate(): + robot = AlohaRobot(use_cameras=False) + robot.init_teleop() + + while True: + now = time.time() + robot.teleop_step(record_data=False) + + dt_s = (time.time() - now) + print(f"Latency (ms): {dt_s * 1000:.2f}\tFrequency: {1 / dt_s:.2f}") + + +def record_teleop_data(): + robot = AlohaRobot(use_cameras=True) + robot.init_teleop() + + while True: + now = time.time() + observation, action = robot.teleop_step(record_data=True) + + dt_s = (time.time() - now) + print(f"Latency (ms): {dt_s * 1000:.2f}\tFrequency: {1 / dt_s:.2f}") + + + +def evaluate_policy(policy): + robot = AlohaRobot(use_cameras=True) + observation = robot.init_evaluate() + + while True: + now = time.time() + with torch.inference_mode(): + action = policy.select_action(observation) + + observation, action = robot.step(action, record_data=False) + + dt_s = (time.time() - now) + print(f"Latency (ms): {dt_s * 1000:.2f}\tFrequency: {1 / dt_s:.2f}") + diff --git a/lerobot/scripts/robot_controls/teleoperate.py b/lerobot/scripts/robot_controls/teleoperate.py new file mode 100644 index 00000000..e65c6cd0 --- /dev/null +++ b/lerobot/scripts/robot_controls/teleoperate.py @@ -0,0 +1,20 @@ + + +import time +from lerobot.common.robot_devices.robots.aloha import AlohaRobot + + +def teleoperate(): + robot = AlohaRobot(use_cameras=False) + robot.init_teleop() + + while True: + now = time.time() + robot.teleop_step(record_data=False) + + dt_s = (time.time() - now) + print(f"Latency (ms): {dt_s * 1000:.2f}\tFrequency: {1 / dt_s:.2f}") + + +if __name__ == "__main__": + teleoperate() \ No newline at end of file diff --git a/lerobot/scripts/robot_controls/teleoperate_cv2.py b/lerobot/scripts/robot_controls/teleoperate_cv2.py new file mode 100644 index 00000000..40f4f0ce --- /dev/null +++ b/lerobot/scripts/robot_controls/teleoperate_cv2.py @@ -0,0 +1,320 @@ +import argparse +from pathlib import Path +import time +import warnings + +import cv2 +import numpy as np +from examples.real_robot_example.gym_real_world.robot import Robot +import signal +import sys +# import pyrealsense2 as rs + +MAX_LEADER_GRIPPER_RAD = 0.7761942786701344 +MAX_LEADER_GRIPPER_POS = 2567 +MAX_FOLLOWER_GRIPPER_RAD = 1.6827769243105486 +MAX_FOLLOWER_GRIPPER_POS = 3100 + +MIN_LEADER_GRIPPER_RAD = -0.12732040539450828 +MIN_LEADER_GRIPPER_POS = 1984 +MIN_FOLLOWER_GRIPPER_RAD = 0.6933593161243099 +MIN_FOLLOWER_GRIPPER_POS = 2512 + +CAMERA_WIDTH = 640 +CAMERA_HEIGHT = 480 + +def convert_gripper_range_from_leader_to_follower(leader_gripper_pos): + follower_gripper_pos = \ + (leader_gripper_pos - MIN_LEADER_GRIPPER_POS) \ + / (MAX_LEADER_GRIPPER_POS - MIN_LEADER_GRIPPER_POS) \ + * (MAX_FOLLOWER_GRIPPER_POS - MIN_FOLLOWER_GRIPPER_POS) \ + + MIN_FOLLOWER_GRIPPER_POS + return follower_gripper_pos + +# alexander koch +# leader_port = "/dev/ttyACM1" +# follower_port = "/dev/ttyACM0" + +def disable_torque(): + leader_right_port = "/dev/ttyDXL_master_right" + leader_left_port = "/dev/ttyDXL_master_left" + follower_right_port = "/dev/ttyDXL_puppet_right" + follower_left_port = "/dev/ttyDXL_puppet_left" + # starts at 1 + all_servo_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9] + leader_right = Robot(leader_right_port, servo_ids=all_servo_ids) + leader_left = Robot(leader_left_port, servo_ids=all_servo_ids) + follower_right = Robot(follower_right_port, servo_ids=all_servo_ids) + follower_left = Robot(follower_left_port, servo_ids=all_servo_ids) + + leader_right._disable_torque() + leader_left._disable_torque() + follower_right._disable_torque() + follower_left._disable_torque() + + +def teleoperate(): + leader_right_port = "/dev/ttyDXL_master_right" + follower_right_port = "/dev/ttyDXL_puppet_right" + leader_left_port = "/dev/ttyDXL_master_left" + follower_left_port = "/dev/ttyDXL_puppet_left" + # starts at 1 + all_servo_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9] + leader_right = Robot(leader_right_port, servo_ids=all_servo_ids) + leader_left = Robot(leader_left_port, servo_ids=all_servo_ids) + follower_right = Robot(follower_right_port, servo_ids=all_servo_ids) + follower_left = Robot(follower_left_port, servo_ids=all_servo_ids) + + follower_right._enable_torque() + follower_left._enable_torque() + + while True: + now = time.time() + # Prepare to assign the positions of the leader to the follower + follower_right_pos = leader_right.read_position() + follower_left_pos = leader_left.read_position() + + # Update the position of the follower gripper to account for the different minimum and maximum range + # position in range [0, 4096[ which corresponds to 4096 bins of 360 degrees + # for all our dynamixel servors + # gripper id=8 has a different range from leader to follower + follower_right_pos[-1] = convert_gripper_range_from_leader_to_follower(follower_right_pos[-1]) + follower_left_pos[-1] = convert_gripper_range_from_leader_to_follower(follower_left_pos[-1]) + + # Assign + follower_right.set_goal_pos(follower_right_pos) + follower_left.set_goal_pos(follower_left_pos) + + print(f"Time to send pos: {(time.time() - now) * 1000}") + + +def capture_frame(camera: cv2.VideoCapture, output_color="rgb"): + # OpenCV acquires frames in BGR format (blue, green red) + ret, frame = camera.read() + if not ret: + raise OSError(f"Camera not found.") + + if output_color == "rgb": + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + return frame + +def find_camera_ids(out_dir="outputs/find_camera_ids/2024_06_19_cv2_1344"): + save_images = True + max_index_search_range = 60 + num_warmup_frames = 4 + + # Works well + codec = "yuyv" + fps = 30 + width = 640 + height = 480 + + # # Works well + # codec = "yuyv" + # fps = 60 + # width = 640 + # height = 480 + + # # Works well + # codec = "yuyv" + # fps = 90 + # width = 640 + # height = 480 + + # # Works well + # codec = "yuyv" + # fps = 30 + # width = 1280 + # height = 720 + + # Doesn't work well (timeout) + # codec = "mjpg" + # fps = 30 + # width = 1280 + # height = 720 + + out_dir += f"_{width}x{height}_{fps}_{codec}" + + camera_ids = [] + for camera_idx in range(max_index_search_range): + camera = cv2.VideoCapture(camera_idx) + is_open = camera.isOpened() + camera.release() + + if is_open: + print(f"Camera found at index {camera_idx}") + camera_ids.append(camera_idx) + + if len(camera_ids) == 0: + raise OSError("No camera has been found") + + # Change camera settings + cameras = [] + for camera_idx in camera_ids: + camera = cv2.VideoCapture(camera_idx) + + camera.set(cv2.CAP_PROP_FPS, fps) + camera.set(cv2.CAP_PROP_FRAME_WIDTH, width) + camera.set(cv2.CAP_PROP_FRAME_HEIGHT, height) + if codec == "mjpg": + camera.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"MJPG")) + + actual_fps = camera.get(cv2.CAP_PROP_FPS) + actual_width = camera.get(cv2.CAP_PROP_FRAME_WIDTH) + actual_height = camera.get(cv2.CAP_PROP_FRAME_HEIGHT) + + if fps != actual_fps: + warnings.warn(f"{fps=} != {actual_fps=}", stacklevel=1) + if width != actual_width: + warnings.warn(f"{width=} != {actual_width=}", stacklevel=1) + if height != actual_height: + warnings.warn(f"{height=} != {actual_height=}", stacklevel=1) + + cameras.append(camera) + + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + print("Capturing a few frames to warmup") + for _ in range(num_warmup_frames): + for camera_idx, camera in zip(camera_ids, cameras): + print(f"Capturing camera {camera_idx}") + try: + frame = capture_frame(camera, output_color="bgr" if save_images else "rgb") + time.sleep(0.01) + except OSError as e: + print(e) + time.sleep(0.1) + + print("Capturing frames") + try: + while True: + now = time.time() + for camera_idx, camera in zip(camera_ids, cameras): + try: + frame = capture_frame(camera, output_color="bgr" if save_images else "rgb") + except OSError as e: + print(e) + + def write_shape(frame): + height, width = frame.shape[:2] + text = f'Width: {width} Height: {height}' + + # Define the font, scale, color, and thickness + font = cv2.FONT_HERSHEY_SIMPLEX + font_scale = 1 + color = (255, 0, 0) # Blue in BGR + thickness = 2 + + position = (10, height - 10) # 10 pixels from the bottom-left corner + cv2.putText(frame, text, position, font, font_scale, color, thickness) + + if save_images: + frame_path = out_dir / f"camera_{camera_idx:02}.png" + print(f"Write to {frame_path}") + write_shape(frame) + cv2.imwrite(str(frame_path), frame) + time.sleep(0.1) + + dt_s = (time.time() - now) + dt_ms = dt_s * 1000 + freq = 1 / dt_s + print(f"Latency (ms): {dt_ms:.2f}\tFrequency: {freq:.2f}") + + if save_images: + break + if cv2.waitKey(1) & 0xFF == ord("q"): + break + finally: + # Stop streaming + for camera in cameras: + camera.release() + + return camera_ids + +def record_data(): + leader_right_port = "/dev/ttyDXL_master_right" + follower_right_port = "/dev/ttyDXL_puppet_right" + leader_left_port = "/dev/ttyDXL_master_left" + follower_left_port = "/dev/ttyDXL_puppet_left" + # starts at 1 + all_servo_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9] + leader_right = Robot(leader_right_port, servo_ids=all_servo_ids) + leader_left = Robot(leader_left_port, servo_ids=all_servo_ids) + follower_right = Robot(follower_right_port, servo_ids=all_servo_ids) + follower_left = Robot(follower_left_port, servo_ids=all_servo_ids) + + follower_right._enable_torque() + follower_left._enable_torque() + + # To get the camera_ids, run: `find_camera_ids()` + camera_high = cv2.VideoCapture(10) + camera_low = cv2.VideoCapture(22) + camera_right_wrist = cv2.VideoCapture(16) + camera_left_wrist = cv2.VideoCapture(4) + + if not camera_high.isOpened(): + raise OSError("Camera high port can't be accessed.") + if not camera_low.isOpened(): + raise OSError("Camera low port can't be accessed.") + if not camera_right_wrist.isOpened(): + raise OSError("Camera right_wrist port can't be accessed.") + if not camera_left_wrist.isOpened(): + raise OSError("Camera left_wrist port can't be accessed.") + + while True: + now = time.time() + + frame_high = capture_frame(camera_high) + frame_low = capture_frame(camera_low) + frame_right_wrist = capture_frame(camera_right_wrist) + frame_left_wrist = capture_frame(camera_left_wrist) + + # cv2.imshow("high", frame_high) + # cv2.imshow("low", frame_low) + # cv2.imshow("right_wrist", frame_right_wrist) + # cv2.imshow("left_wrist", frame_left_wrist) + + # Prepare to assign the positions of the leader to the follower + follower_right_pos = leader_right.read_position() + follower_left_pos = leader_left.read_position() + + # Update the position of the follower gripper to account for the different minimum and maximum range + # position in range [0, 4096[ which corresponds to 4096 bins of 360 degrees + # for all our dynamixel servors + # gripper id=8 has a different range from leader to follower + follower_right_pos[-1] = convert_gripper_range_from_leader_to_follower(follower_right_pos[-1]) + follower_left_pos[-1] = convert_gripper_range_from_leader_to_follower(follower_left_pos[-1]) + + # Assign + follower_right.set_goal_pos(follower_right_pos) + follower_left.set_goal_pos(follower_left_pos) + + dt_s = (time.time() - now) + dt_ms = dt_s * 1000 + freq = 1 / dt_s + print(f"Latency (ms): {dt_ms:.2f}\tFrequency: {freq:.2f}") + + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + camera_high.release() + camera_low.release() + camera_right_wrist.release() + camera_left_wrist.release() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--mode", type=str, choices=["teleoperate", "disable_torque", "record_data", "find_camera_ids"], default="teleoperate") + args = parser.parse_args() + + if args.mode == "teleoperate": + teleoperate() + elif args.mode == "disable_torque": + disable_torque() + elif args.mode == "record_data": + record_data() + elif args.mode == "find_camera_ids": + find_camera_ids() diff --git a/lerobot/scripts/robot_controls/teleoperate_intelrealsense.py b/lerobot/scripts/robot_controls/teleoperate_intelrealsense.py new file mode 100644 index 00000000..c9d64731 --- /dev/null +++ b/lerobot/scripts/robot_controls/teleoperate_intelrealsense.py @@ -0,0 +1,338 @@ +import argparse +from pathlib import Path +import time +import traceback + +import cv2 +import numpy as np +from examples.real_robot_example.gym_real_world.robot import Robot +import signal +import sys +import pyrealsense2 as rs + +MAX_LEADER_GRIPPER_RAD = 0.7761942786701344 +MAX_LEADER_GRIPPER_POS = 2567 +MAX_FOLLOWER_GRIPPER_RAD = 1.6827769243105486 +MAX_FOLLOWER_GRIPPER_POS = 3100 + +MIN_LEADER_GRIPPER_RAD = -0.12732040539450828 +MIN_LEADER_GRIPPER_POS = 1984 +MIN_FOLLOWER_GRIPPER_RAD = 0.6933593161243099 +MIN_FOLLOWER_GRIPPER_POS = 2512 + +CAMERA_WIDTH = 640 +CAMERA_HEIGHT = 480 + +def convert_gripper_range_from_leader_to_follower(leader_gripper_pos): + follower_gripper_pos = \ + (leader_gripper_pos - MIN_LEADER_GRIPPER_POS) \ + / (MAX_LEADER_GRIPPER_POS - MIN_LEADER_GRIPPER_POS) \ + * (MAX_FOLLOWER_GRIPPER_POS - MIN_FOLLOWER_GRIPPER_POS) \ + + MIN_FOLLOWER_GRIPPER_POS + return follower_gripper_pos + +# alexander koch +# leader_port = "/dev/ttyACM1" +# follower_port = "/dev/ttyACM0" + +def disable_torque(): + leader_right_port = "/dev/ttyDXL_master_right" + leader_left_port = "/dev/ttyDXL_master_left" + follower_right_port = "/dev/ttyDXL_puppet_right" + follower_left_port = "/dev/ttyDXL_puppet_left" + # starts at 1 + all_servo_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9] + leader_right = Robot(leader_right_port, servo_ids=all_servo_ids) + leader_left = Robot(leader_left_port, servo_ids=all_servo_ids) + follower_right = Robot(follower_right_port, servo_ids=all_servo_ids) + follower_left = Robot(follower_left_port, servo_ids=all_servo_ids) + + leader_right._disable_torque() + leader_left._disable_torque() + follower_right._disable_torque() + follower_left._disable_torque() + + +def teleoperate(): + leader_right_port = "/dev/ttyDXL_master_right" + follower_right_port = "/dev/ttyDXL_puppet_right" + leader_left_port = "/dev/ttyDXL_master_left" + follower_left_port = "/dev/ttyDXL_puppet_left" + # starts at 1 + all_servo_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9] + leader_right = Robot(leader_right_port, servo_ids=all_servo_ids) + leader_left = Robot(leader_left_port, servo_ids=all_servo_ids) + follower_right = Robot(follower_right_port, servo_ids=all_servo_ids) + follower_left = Robot(follower_left_port, servo_ids=all_servo_ids) + + follower_right._enable_torque() + follower_left._enable_torque() + + while True: + now = time.time() + # Prepare to assign the positions of the leader to the follower + follower_right_pos = leader_right.read_position() + follower_left_pos = leader_left.read_position() + + # Update the position of the follower gripper to account for the different minimum and maximum range + # position in range [0, 4096[ which corresponds to 4096 bins of 360 degrees + # for all our dynamixel servors + # gripper id=8 has a different range from leader to follower + follower_right_pos[-1] = convert_gripper_range_from_leader_to_follower(follower_right_pos[-1]) + follower_left_pos[-1] = convert_gripper_range_from_leader_to_follower(follower_left_pos[-1]) + + # Assign + follower_right.set_goal_pos(follower_right_pos) + follower_left.set_goal_pos(follower_left_pos) + + print(f"Time to send pos: {(time.time() - now) * 1000}") + + +def capture_frame(camera: cv2.VideoCapture, width=CAMERA_WIDTH, height=CAMERA_HEIGHT, output_color="rgb"): + # OpenCV acquires frames in BGR format (blue, green red) + ret, frame = camera.read() + if not ret: + raise OSError(f"Camera not found.") + + if output_color == "rgb": + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + # # Define your crop coordinates (top left corner and bottom right corner) + # x1, y1 = 400, 0 # Example starting coordinates (top left of the crop rectangle) + # x2, y2 = 1600, 900 # Example ending coordinates (bottom right of the crop rectangle) + # # Crop the image + # image = image[y1:y2, x1:x2] + # Resize the image + frame = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA) + return frame + +def write_shape(frame): + height, width = frame.shape[:2] + text = f'Width: {width} Height: {height}' + + # Define the font, scale, color, and thickness + font = cv2.FONT_HERSHEY_SIMPLEX + font_scale = 1 + color = (255, 0, 0) # Blue in BGR + thickness = 2 + + position = (10, height - 10) # 10 pixels from the bottom-left corner + cv2.putText(frame, text, position, font, font_scale, color, thickness) + +def find_camera_ids(out_dir="outputs/find_camera_ids/2024_06_19_1039"): + """ + Install: https://github.com/IntelRealSense/librealsense/blob/master/doc/distribution_linux.md + + List cameras and make sure the firmware is up-to-date: https://dev.intelrealsense.com/docs/firmware-releases-d400 + ```bash + rs-fw-update -l + + > Connected devices: + > 1) [USB] Intel RealSense D405 s/n 128422270109, update serial number: 133323070634, firmware version: 5.16.0.1 + > 2) [USB] Intel RealSense D405 s/n 128422271609, update serial number: 130523070758, firmware version: 5.16.0.1 + > 3) [USB] Intel RealSense D405 s/n 128422271614, update serial number: 133323070576, firmware version: 5.16.0.1 + > 4) [USB] Intel RealSense D405 s/n 128422271393, update serial number: 133323070271, firmware version: 5.16.0.1 + ``` + """ + save_images = False + # enable once, if you reach "Frame didn't arrive" exception + force_hardware_reset = False + + # Works well! + # use_depth = False + # fps = 90 + # width = 640 + # height = 480 + + # # Works well! + # use_depth = True + # fps = 90 + # width = 640 + # height = 480 + + # # Doesn't work well, latency varies too much + # use_depth = True + # fps = 30 + # width = 1280 + # height = 720 + + # Works well + use_depth = False + fps = 30 + width = 1280 + height = 720 + + out_dir += f"_{width}x{height}_{fps}_depth_{use_depth}" + + ctx = rs.context() + + serials = [] + cameras = [] + for device in ctx.query_devices(): + print(device) + if force_hardware_reset: + device.hardware_reset() + + SERIAL_NUMBER_INDEX = 1 + serial_number = device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)) + + config = rs.config() + config.enable_device(serial_number) + + config.enable_stream(rs.stream.color, width, height, rs.format.bgr8, fps) + + if use_depth: + config.enable_stream(rs.stream.depth, width, height, rs.format.z16, fps) + + pipeline = rs.pipeline() + pipeline.start(config) + + serials.append(serial_number) + cameras.append(pipeline) + + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + try: + while True: + now = time.time() + for serial, camera in zip(serials, cameras): + # Wait for a coherent pair of frames: depth and color + try: + frames = camera.wait_for_frames() + except RuntimeError as e: + if "Frame didn't arrive" in str(e): + print(f"{e}: Trying hardware_reset. If it still doesn't work, try `force_hardware_reset=True`.") + device.hardware_reset() + traceback.print_exc() + continue + + # acquire color image + color_frame = frames.get_color_frame() + if not color_frame: + print("Empty color frame") + continue + # to numpy + image = np.asanyarray(color_frame.get_data()) + + if save_images: + image_path = out_dir / f"camera_{serial:02}.png" + print(f"Write to {image_path}") + write_shape(image) + cv2.imwrite(str(image_path), image) + + if use_depth: + # acquire depth image + depth_frame = frames.get_depth_frame() + if not depth_frame: + print("Empty depth frame") + continue + # to numpy + depth = np.asanyarray(depth_frame.get_data()) + + if save_images: + # Apply colormap on depth image (image must be converted to 8-bit per pixel first) + depth_image = cv2.applyColorMap(cv2.convertScaleAbs(depth, alpha=0.03), cv2.COLORMAP_JET) + depth_image_path = out_dir / f"camera_{serial:02}_depth.png" + print(f"Write to {depth_image_path}") + write_shape(depth_image) + cv2.imwrite(str(depth_image_path), depth_image) + + dt_s = (time.time() - now) + dt_ms = dt_s * 1000 + freq = 1 / dt_s + print(f"Latency (ms): {dt_ms:.2f}\tFrequency: {freq:.2f}") + + if save_images: + break + if cv2.waitKey(1) & 0xFF == ord("q"): + break + finally: + # Stop streaming + for camera in cameras: + camera.stop() + + return serials + + +def record_data(): + leader_right_port = "/dev/ttyDXL_master_right" + follower_right_port = "/dev/ttyDXL_puppet_right" + leader_left_port = "/dev/ttyDXL_master_left" + follower_left_port = "/dev/ttyDXL_puppet_left" + # starts at 1 + all_servo_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9] + leader_right = Robot(leader_right_port, servo_ids=all_servo_ids) + leader_left = Robot(leader_left_port, servo_ids=all_servo_ids) + follower_right = Robot(follower_right_port, servo_ids=all_servo_ids) + follower_left = Robot(follower_left_port, servo_ids=all_servo_ids) + + follower_right._enable_torque() + follower_left._enable_torque() + + # To get the camera_ids, run: `find_camera_ids()` + camera_high = cv2.VideoCapture(10) + camera_low = cv2.VideoCapture(22) + camera_right_wrist = cv2.VideoCapture(16) + camera_left_wrist = cv2.VideoCapture(4) + + if not camera_high.isOpened(): + raise OSError("Camera high port can't be accessed.") + if not camera_low.isOpened(): + raise OSError("Camera low port can't be accessed.") + if not camera_right_wrist.isOpened(): + raise OSError("Camera right_wrist port can't be accessed.") + if not camera_left_wrist.isOpened(): + raise OSError("Camera left_wrist port can't be accessed.") + + while True: + now = time.time() + + frame_high = capture_frame(camera_high) + frame_low = capture_frame(camera_low) + frame_right_wrist = capture_frame(camera_right_wrist) + frame_left_wrist = capture_frame(camera_left_wrist) + + # cv2.imshow("high", frame_high) + # cv2.imshow("low", frame_low) + # cv2.imshow("right_wrist", frame_right_wrist) + # cv2.imshow("left_wrist", frame_left_wrist) + + # Prepare to assign the positions of the leader to the follower + follower_right_pos = leader_right.read_position() + follower_left_pos = leader_left.read_position() + + # Update the position of the follower gripper to account for the different minimum and maximum range + # position in range [0, 4096[ which corresponds to 4096 bins of 360 degrees + # for all our dynamixel servors + # gripper id=8 has a different range from leader to follower + follower_right_pos[-1] = convert_gripper_range_from_leader_to_follower(follower_right_pos[-1]) + follower_left_pos[-1] = convert_gripper_range_from_leader_to_follower(follower_left_pos[-1]) + + # Assign + follower_right.set_goal_pos(follower_right_pos) + follower_left.set_goal_pos(follower_left_pos) + + print(f"Time to send pos: {(time.time() - now) * 1000}") + + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + camera_high.release() + camera_low.release() + camera_right_wrist.release() + camera_left_wrist.release() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--mode", type=str, choices=["teleoperate", "disable_torque", "record_data", "find_camera_ids"], default="teleoperate") + args = parser.parse_args() + + if args.mode == "teleoperate": + teleoperate() + elif args.mode == "disable_torque": + disable_torque() + elif args.mode == "record_data": + record_data() + elif args.mode == "find_camera_ids": + find_camera_ids() diff --git a/lerobot/scripts/test.py b/lerobot/scripts/test.py new file mode 100644 index 00000000..0bcf7c4a --- /dev/null +++ b/lerobot/scripts/test.py @@ -0,0 +1,10 @@ +import pyrealsense2 as rs +pipe = rs.pipeline() +profile = pipe.start() +try: + for i in range(0, 100): + frames = pipe.wait_for_frames() + for f in frames: + print(f.profile) +finally: + pipe.stop() \ No newline at end of file diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 860412bd..3b5b8948 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -406,7 +406,8 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No step += 1 - eval_env.close() + if cfg.training.eval_freq > 0: + eval_env.close() logging.info("End of training") diff --git a/tests/test_examples.py b/tests/test_examples.py index 0a6ce422..4ecb716c 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -29,8 +29,8 @@ def _find_and_replace(text: str, finds_and_replaces: list[tuple[str, str]]) -> s return text -def _run_script(path): - subprocess.run([sys.executable, path], check=True) +def _run_script(path, args=None): + subprocess.run([sys.executable, path] + args if args is not None else [], check=True) def _read_file(path): @@ -126,3 +126,22 @@ def test_examples_basic2_basic3_advanced1(): # Restore stdout to its original state sys.stdout = sys.__stdout__ assert "Average loss on validation set" in printed_output + + +def test_real_world_recording(): + path = "examples/real_robot_example/record_training_data.py" + _run_script( + path, + [ + "--data_dir", + "outputs/examples", + "--repo-id", + "real_world_debug", + "--num-episodes", + "2", + "--num-frames", + "10", + "--mock-robot", + ], + ) + assert Path("outputs/examples/real_world_debug/video/episode_0.mp4").exists()