Compare commits

...

70 Commits

Author SHA1 Message Date
Remi Cadene
82df3feaee TOREMOVE: isolate aloha on __init__ to see if it creates the bug 2024-10-07 12:12:32 +02:00
Remi Cadene
2a8a9dc25a TOREMOVE: remove aloha from __init__ to test if this creates the bug 2024-10-07 12:11:24 +02:00
Remi Cadene
dc08c3bfa4 small 2024-10-07 12:10:46 +02:00
Remi Cadene
68fff561de Merge remote-tracking branch 'origin/main' into user/rcadene/2024_09_10_train_aloha 2024-10-04 19:08:55 +02:00
Remi Cadene
433e950348 Merge remote-tracking branch 'origin/main' into user/rcadene/2024_09_10_train_aloha 2024-10-03 17:16:59 +02:00
Remi Cadene
e58e59411a Add num_workers >=1 capabilities (default to 1) 2024-09-28 16:05:54 +02:00
Remi Cadene
3369d351a7 Fix slow fps 2024-09-28 15:41:15 +02:00
Remi Cadene
8b89d03d74 Merge remote-tracking branch 'origin/user/rcadene/2024_09_10_train_aloha' into user/rcadene/2024_09_10_train_aloha 2024-09-28 15:01:15 +02:00
Remi Cadene
77ba43d25b WIP: add multiprocess 2024-09-28 15:00:38 +02:00
Remi Cadene
9b76ee9eb0 Merge remote-tracking branch 'origin/user/rcadene/2024_09_01_mock_robot_devices' into user/rcadene/2024_09_10_train_aloha 2024-09-28 14:32:33 +02:00
Remi Cadene
48911e0cd3 Merge remote-tracking branch 'origin/main' into user/rcadene/2024_09_10_train_aloha 2024-09-28 13:25:51 +02:00
Remi Cadene
5c73bec913 Address Jess comments 2024-09-28 13:11:45 +02:00
Remi
1de04e4756 Merge branch 'main' into user/rcadene/2024_09_01_mock_robot_devices 2024-09-27 18:04:56 +02:00
Remi Cadene
83cfe60783 tests 2024-09-27 17:46:49 +02:00
Remi Cadene
0e63f7c1b5 test 2024-09-27 17:42:48 +02:00
Remi Cadene
bc479cb2d4 test 2024-09-27 17:22:51 +02:00
Remi Cadene
2c9defabdd test 2024-09-27 17:15:21 +02:00
Remi Cadene
cc5c623179 test 2024-09-27 17:12:40 +02:00
Remi Cadene
88c2ed419e fix unit tests 2024-09-27 17:03:27 +02:00
Remi Cadene
2e694fcf8f test 2024-09-27 16:56:53 +02:00
Remi Cadene
9dea00ee9e retest 2024-09-27 16:39:53 +02:00
Remi Cadene
50a979d6de Check if file exists 2024-09-27 16:33:58 +02:00
Remi Cadene
76cc47956a add 2024-09-27 16:21:27 +02:00
Remi Cadene
675d4286c8 add 2024-09-27 16:20:00 +02:00
Remi Cadene
da1888a378 revert to all tests 2024-09-27 14:59:17 +02:00
Remi Cadene
3f9f3dd027 Add pyserial 2024-09-27 14:57:32 +02:00
Remi Cadene
c704eb94c0 improve except 2024-09-27 13:54:32 +02:00
Remi Cadene
0352c61b00 Add more exception except 2024-09-27 13:44:41 +02:00
Remi Cadene
e499d60742 fix unit test 2024-09-27 12:29:58 +02:00
Remi Cadene
81f17d505e if not '~cameras' in overrides 2024-09-27 12:21:06 +02:00
Remi Cadene
bf7e906b70 add +COLOR_RGB2BGR 2024-09-27 12:11:48 +02:00
Remi Cadene
a7350d9b65 add mock=False 2024-09-27 12:02:14 +02:00
Remi Cadene
8da08935d4 move mock_motor in test_motors.py 2024-09-26 16:45:04 +02:00
Remi Cadene
7450adc72b no more require_mock_motor 2024-09-26 16:40:24 +02:00
Remi Cadene
e66900e387 mock_motor instead of require_mock_motor 2024-09-26 16:35:37 +02:00
Remi Cadene
89b2b7397e fix unit tests 2024-09-26 16:31:23 +02:00
Remi Cadene
48be576cc6 fix unit tests 2024-09-26 16:28:08 +02:00
Remi Cadene
395720a5de Revert "Remove @require_x"
This reverts commit 8a7b5c45c7.
2024-09-26 14:35:26 +02:00
Remi Cadene
8a7b5c45c7 Remove @require_x 2024-09-26 14:35:17 +02:00
Remi Cadene
b6b7fda5f8 custom pytest speedup (TOREMOVE) 2024-09-26 13:53:31 +02:00
Remi Cadene
8b36223832 fix unit tests 2024-09-26 13:51:45 +02:00
Remi Cadene
a236382590 fix unit tests 2024-09-26 13:19:29 +02:00
Remi Cadene
3cb85bcd4b Fix unit test 2024-09-26 13:09:08 +02:00
Remi Cadene
f2b1842d69 fix unit test 2024-09-26 11:48:22 +02:00
Remi Cadene
500d505bf6 Add support for video=False in record (no tested yet) 2024-09-26 11:41:32 +02:00
Remi Cadene
2c0171632f fix aloha mock 2024-09-25 15:18:21 +02:00
Remi Cadene
bded8cbbe9 Fix unit tests 2024-09-25 14:11:28 +02:00
Remi Cadene
6377d2a96c mock) 2024-09-25 12:29:53 +02:00
Remi Cadene
558420115e mock=False 2024-09-25 12:22:22 +02:00
Remi Cadene
bcf27b8c01 Skip mocking tests with minimal pytest 2024-09-25 12:11:27 +02:00
Remi
f0452c222a Merge branch 'main' into user/rcadene/2024_09_01_mock_robot_devices 2024-09-25 11:36:58 +02:00
Remi Cadene
1bf284562e pre-commit run --all-files 2024-09-25 11:36:08 +02:00
Simon Alibert
886923a890 Fix opencv segmentation fault (#442)
Co-authored-by: Remi <remi.cadene@huggingface.co>
2024-09-25 11:29:59 +02:00
Remi Cadene
adc8dc9bfb Address comments 2024-09-16 14:53:45 +02:00
Remi Cadene
624551bea9 Address comments 2024-09-16 14:52:27 +02:00
Remi Cadene
6636db5b51 Address comments 2024-09-16 14:51:25 +02:00
Remi
ccc0586d45 Apply suggestions from code review
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
2024-09-16 14:49:19 +02:00
Remi
bab19d9b1d Merge branch 'main' into user/rcadene/2024_09_10_train_aloha 2024-09-15 17:44:52 +02:00
Remi Cadene
783b78ae9a Fix unit test test_policies, backward, Remove no_state from test 2024-09-15 17:30:48 +02:00
Remi Cadene
e47856add6 Fix unit test test_policies, backward, Remove no_state from test 2024-09-15 17:22:12 +02:00
Remi Cadene
3f993d5250 fix typo 2024-09-12 02:44:46 +02:00
Remi Cadene
cd4d2257d3 Fix unit test 2024-09-12 02:43:21 +02:00
Remi Cadene
53ebf9cf9f Mock robots (WIP segmentation fault) 2024-09-12 01:43:32 +02:00
Remi Cadene
4151630c24 Mock dynamixel_sdk 2024-09-12 01:08:44 +02:00
Remi Cadene
bc0e691280 force push aloha_real.yaml 2024-09-10 23:31:05 +02:00
Remi Cadene
e1763aa906 Clean + Add act_aloha_real.yaml + Add act_real.yaml 2024-09-10 19:45:59 +02:00
Remi Cadene
3bd5ea4d7a WIP 2024-09-10 18:30:39 +02:00
Remi Cadene
44b8394365 add dynamic import for cv2 and pyrealsense2 2024-09-09 19:32:35 +02:00
Remi Cadene
2469c99053 fix unit tests 2024-09-09 19:19:05 +02:00
Remi Cadene
96cc2433d6 Mock OpenCVCamera 2024-09-09 13:37:37 +02:00
17 changed files with 112 additions and 160 deletions

View File

@@ -195,8 +195,8 @@ available_policies = [
# lists all available robots from `lerobot/common/robot_devices/robots`
available_robots = [
"koch",
"koch_bimanual",
# "koch",
# "koch_bimanual",
"aloha",
]
@@ -216,7 +216,9 @@ available_policies_per_env = {
"aloha": ["act"],
"pusht": ["diffusion", "vqbet"],
"xarm": ["tdmpc"],
"dora_aloha_real": ["act_real"],
"koch_real": ["act_koch_real"],
"aloha_real": ["act_aloha_real"],
"dora_aloha_real": ["act_aloha_real"],
}
env_task_pairs = [(env, task) for env, tasks in available_tasks_per_env.items() for task in tasks]

10
lerobot/configs/env/aloha_real.yaml vendored Normal file
View File

@@ -0,0 +1,10 @@
# @package _global_
fps: 30
env:
name: real_world
task: null
state_dim: 14
action_dim: 14
fps: ${fps}

View File

@@ -1,16 +1,22 @@
# @package _global_
# Use `act_real.yaml` to train on real-world Aloha/Aloha2 datasets.
# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, images,
# cam_low) instead of 1 camera (i.e. top). Also, `training.eval_freq` is set to -1. This config is used
# to evaluate checkpoints at a certain frequency of training steps. When it is set to -1, it deactivates evaluation.
# This is because real-world evaluation is done through [dora-lerobot](https://github.com/dora-rs/dora-lerobot).
# Look at its README for more information on how to evaluate a checkpoint in the real-world.
# Use `act_aloha_real.yaml` to train on real-world datasets collected on Aloha or Aloha-2 robots.
# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, cam_high, cam_low) instead of 1 camera (i.e. top).
# Also, `training.eval_freq` is set to -1. This config is used to evaluate checkpoints at a certain frequency of training steps.
# When it is set to -1, it deactivates evaluation. This is because real-world evaluation is done through our `control_robot.py` script.
# Look at the documentation in header of `control_robot.py` for more information on how to collect data , train and evaluate a policy.
#
# Example of usage for training:
# Example of usage for training and inference with `control_robot.py`:
# ```bash
# python lerobot/scripts/train.py \
# policy=act_real \
# policy=act_aloha_real \
# env=aloha_real
# ```
#
# Example of usage for training and inference with [Dora-rs](https://github.com/dora-rs/dora-lerobot):
# ```bash
# python lerobot/scripts/train.py \
# policy=act_aloha_real \
# env=dora_aloha_real
# ```
@@ -36,10 +42,11 @@ override_dataset_stats:
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
training:
offline_steps: 100000
offline_steps: 80000
online_steps: 0
eval_freq: -1
save_freq: 20000
save_freq: 10000
log_freq: 100
save_checkpoint: true
batch_size: 8
@@ -62,7 +69,7 @@ policy:
# Input / output structure.
n_obs_steps: 1
chunk_size: 100 # chunk_size
chunk_size: 100
n_action_steps: 100
input_shapes:
@@ -107,7 +114,7 @@ policy:
n_vae_encoder_layers: 4
# Inference.
temporal_ensemble_coeff: null
temporal_ensemble_momentum: null
# Training and loss computation.
dropout: 0.1

View File

@@ -1,110 +0,0 @@
# @package _global_
# Use `act_real_no_state.yaml` to train on real-world Aloha/Aloha2 datasets when cameras are moving (e.g. wrist cameras)
# Compared to `act_real.yaml`, it is camera only and does not use the state as input which is vector of robot joint positions.
# We validated experimentaly that not using state reaches better success rate. Our hypothesis is that `act_real.yaml` might
# overfits to the state, because the images are more complex to learn from since they are moving.
#
# Example of usage for training:
# ```bash
# python lerobot/scripts/train.py \
# policy=act_real_no_state \
# env=dora_aloha_real
# ```
seed: 1000
dataset_repo_id: lerobot/aloha_static_vinh_cup
override_dataset_stats:
observation.images.cam_right_wrist:
# stats from imagenet, since we use a pretrained vision model
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
observation.images.cam_left_wrist:
# stats from imagenet, since we use a pretrained vision model
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
observation.images.cam_high:
# stats from imagenet, since we use a pretrained vision model
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
observation.images.cam_low:
# stats from imagenet, since we use a pretrained vision model
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
training:
offline_steps: 100000
online_steps: 0
eval_freq: -1
save_freq: 20000
save_checkpoint: true
batch_size: 8
lr: 1e-5
lr_backbone: 1e-5
weight_decay: 1e-4
grad_clip_norm: 10
online_steps_between_rollouts: 1
delta_timestamps:
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
eval:
n_episodes: 50
batch_size: 50
# See `configuration_act.py` for more details.
policy:
name: act
# Input / output structure.
n_obs_steps: 1
chunk_size: 100 # chunk_size
n_action_steps: 100
input_shapes:
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
observation.images.cam_right_wrist: [3, 480, 640]
observation.images.cam_left_wrist: [3, 480, 640]
observation.images.cam_high: [3, 480, 640]
observation.images.cam_low: [3, 480, 640]
output_shapes:
action: ["${env.action_dim}"]
# Normalization / Unnormalization
input_normalization_modes:
observation.images.cam_right_wrist: mean_std
observation.images.cam_left_wrist: mean_std
observation.images.cam_high: mean_std
observation.images.cam_low: mean_std
output_normalization_modes:
action: mean_std
# Architecture.
# Vision backbone.
vision_backbone: resnet18
pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
replace_final_stride_with_dilation: false
# Transformer layers.
pre_norm: false
dim_model: 512
n_heads: 8
dim_feedforward: 3200
feedforward_activation: relu
n_encoder_layers: 4
# Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code
# that means only the first layer is used. Here we match the original implementation by setting this to 1.
# See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521.
n_decoder_layers: 1
# VAE.
use_vae: true
latent_dim: 32
n_vae_encoder_layers: 4
# Inference.
temporal_ensemble_coeff: null
# Training and loss computation.
dropout: 0.1
kl_weight: 10.0

View File

@@ -102,6 +102,7 @@ import argparse
import concurrent.futures
import json
import logging
import multiprocessing
import os
import platform
import shutil
@@ -239,6 +240,48 @@ def is_headless():
return True
def loop_to_save_frame_in_threads(frame_queue, num_image_writers):
with concurrent.futures.ThreadPoolExecutor(max_workers=num_image_writers) as executor:
futures = []
while True:
# Blocks until a frame is available
frame_data = frame_queue.get()
# Exit if we send None to stop the worker
if frame_data is None:
# Wait for all submitted futures to complete before exiting
for _ in tqdm.tqdm(
concurrent.futures.as_completed(futures), total=len(futures), desc="Writting images"
):
pass
break
frame, key, frame_index, episode_index, videos_dir = frame_data
futures.append(executor.submit(save_image, frame, key, frame_index, episode_index, videos_dir))
def start_frame_workers(frame_queue, num_image_writers, num_workers=1):
workers = []
for _ in range(num_workers):
worker = multiprocessing.Process(
target=loop_to_save_frame_in_threads,
args=(frame_queue, num_image_writers),
)
worker.start()
workers.append(worker)
return workers
def stop_workers(workers, frame_queue):
# Send None to each process to signal it to stop
for _ in workers:
frame_queue.put(None)
# Wait for all processes to terminate
for process in workers:
process.join()
def has_method(_object: object, method_name: str):
return hasattr(_object, method_name) and callable(getattr(_object, method_name))
@@ -465,10 +508,13 @@ def record(
# Save images using threads to reach high fps (30 and more)
# Using `with` to exist smoothly if an execption is raised.
futures = []
num_image_writers = num_image_writers_per_camera * len(robot.cameras)
num_image_writers = max(num_image_writers, 1)
with concurrent.futures.ThreadPoolExecutor(max_workers=num_image_writers) as executor:
frame_queue = multiprocessing.Queue()
frame_workers = start_frame_workers(frame_queue, num_image_writers)
# Using `try` to exist smoothly if an exception is raised
try:
# Start recording all episodes
while episode_index < num_episodes:
logging.info(f"Recording episode {episode_index}")
@@ -489,11 +535,7 @@ def record(
not_image_keys = [key for key in observation if "image" not in key]
for key in image_keys:
futures += [
executor.submit(
save_image, observation[key], key, frame_index, episode_index, videos_dir
)
]
frame_queue.put((observation[key], key, frame_index, episode_index, videos_dir))
if display_cameras and not is_headless():
image_keys = [key for key in observation if "image" in key]
@@ -640,11 +682,11 @@ def record(
listener.stop()
logging.info("Waiting for threads writing the images on disk to terminate...")
for _ in tqdm.tqdm(
concurrent.futures.as_completed(futures), total=len(futures), desc="Writting images"
):
pass
break
stop_workers(frame_workers, frame_queue)
except Exception:
traceback.print_exc()
stop_workers(frame_workers, frame_queue)
robot.disconnect()
if display_cameras and not is_headless():

2
poetry.lock generated
View File

@@ -5245,7 +5245,7 @@ docs = ["sphinx", "sphinx-automodapi", "sphinx-rtd-theme"]
name = "pyserial"
version = "3.5"
description = "Python Serial Port Extension"
optional = true
optional = false
python-versions = "*"
files = [
{file = "pyserial-3.5-py2.py3-none-any.whl", hash = "sha256:c4451db6ba391ca6ca299fb3ec7bae67a5c55dde170964c7a14ceefec02f2cf0"},

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b5a9f73a2356aff9c717cdfd0d37a6da08b0cf2cc09c98edbc9492501b7f64a5
size 5104

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:28738b3cfad17af0ac5181effdd796acdf7953cd5bcca3f421a11ddfd6b0076f
size 30800

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4bb8a197a40456fdbc16029126268e6bcef3eca1837d88235165dc7e14618bea
size 68

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bea60cce42d324f539dd3bca1e66b5ba6391838fdcadb00efc25f3240edb529a
size 33600

View File

@@ -145,13 +145,28 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
replay(robot, episode=0, fps=30, root=root, repo_id=repo_id)
# TODO(rcadene, aliberts): rethink this design
if robot_type == "aloha":
env_name = "aloha_real"
policy_name = "act_aloha_real"
elif robot_type in ["koch", "koch_bimanual"]:
env_name = "koch_real"
policy_name = "act_koch_real"
else:
raise NotImplementedError(robot_type)
overrides = [
f"env={env_name}",
f"policy={policy_name}",
f"device={DEVICE}",
]
if robot_type == "koch_bimanual":
overrides += ["env.state_dim=12", "env.action_dim=12"]
cfg = init_hydra_config(
DEFAULT_CONFIG_PATH,
overrides=[
f"env={env_name}",
f"policy={policy_name}",
f"device={DEVICE}",
],
overrides=overrides,
)
policy = make_policy(hydra_cfg=cfg, dataset_stats=dataset.stats)

View File

@@ -308,12 +308,11 @@ def test_flatten_unflatten_dict():
# "lerobot/cmu_stretch",
],
)
# TODO(rcadene, aliberts): all these tests fail locally on Mac M1, but not on Linux
def test_backward_compatibility(repo_id):
"""The artifacts for this test have been generated by `tests/scripts/save_dataset_to_safetensors.py`."""
dataset = LeRobotDataset(
repo_id,
)
dataset = LeRobotDataset(repo_id)
test_dir = Path("tests/data/save_dataset_to_safetensors") / repo_id

View File

@@ -367,8 +367,7 @@ def test_normalize(insert_temporal_dim):
),
("aloha", "act", ["policy.n_action_steps=10"], ""),
("aloha", "act", ["policy.n_action_steps=1000", "policy.chunk_size=1000"], "_1000_steps"),
("dora_aloha_real", "act_real", ["policy.n_action_steps=10"], ""),
("dora_aloha_real", "act_real_no_state", ["policy.n_action_steps=10"], ""),
("dora_aloha_real", "act_aloha_real", ["policy.n_action_steps=10"], ""),
],
)
# As artifacts have been generated on an x86_64 kernel, this test won't