Compare commits
70 Commits
main
...
user/rcade
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
82df3feaee | ||
|
|
2a8a9dc25a | ||
|
|
dc08c3bfa4 | ||
|
|
68fff561de | ||
|
|
433e950348 | ||
|
|
e58e59411a | ||
|
|
3369d351a7 | ||
|
|
8b89d03d74 | ||
|
|
77ba43d25b | ||
|
|
9b76ee9eb0 | ||
|
|
48911e0cd3 | ||
|
|
5c73bec913 | ||
|
|
1de04e4756 | ||
|
|
83cfe60783 | ||
|
|
0e63f7c1b5 | ||
|
|
bc479cb2d4 | ||
|
|
2c9defabdd | ||
|
|
cc5c623179 | ||
|
|
88c2ed419e | ||
|
|
2e694fcf8f | ||
|
|
9dea00ee9e | ||
|
|
50a979d6de | ||
|
|
76cc47956a | ||
|
|
675d4286c8 | ||
|
|
da1888a378 | ||
|
|
3f9f3dd027 | ||
|
|
c704eb94c0 | ||
|
|
0352c61b00 | ||
|
|
e499d60742 | ||
|
|
81f17d505e | ||
|
|
bf7e906b70 | ||
|
|
a7350d9b65 | ||
|
|
8da08935d4 | ||
|
|
7450adc72b | ||
|
|
e66900e387 | ||
|
|
89b2b7397e | ||
|
|
48be576cc6 | ||
|
|
395720a5de | ||
|
|
8a7b5c45c7 | ||
|
|
b6b7fda5f8 | ||
|
|
8b36223832 | ||
|
|
a236382590 | ||
|
|
3cb85bcd4b | ||
|
|
f2b1842d69 | ||
|
|
500d505bf6 | ||
|
|
2c0171632f | ||
|
|
bded8cbbe9 | ||
|
|
6377d2a96c | ||
|
|
558420115e | ||
|
|
bcf27b8c01 | ||
|
|
f0452c222a | ||
|
|
1bf284562e | ||
|
|
886923a890 | ||
|
|
adc8dc9bfb | ||
|
|
624551bea9 | ||
|
|
6636db5b51 | ||
|
|
ccc0586d45 | ||
|
|
bab19d9b1d | ||
|
|
783b78ae9a | ||
|
|
e47856add6 | ||
|
|
3f993d5250 | ||
|
|
cd4d2257d3 | ||
|
|
53ebf9cf9f | ||
|
|
4151630c24 | ||
|
|
bc0e691280 | ||
|
|
e1763aa906 | ||
|
|
3bd5ea4d7a | ||
|
|
44b8394365 | ||
|
|
2469c99053 | ||
|
|
96cc2433d6 |
@@ -195,8 +195,8 @@ available_policies = [
|
||||
|
||||
# lists all available robots from `lerobot/common/robot_devices/robots`
|
||||
available_robots = [
|
||||
"koch",
|
||||
"koch_bimanual",
|
||||
# "koch",
|
||||
# "koch_bimanual",
|
||||
"aloha",
|
||||
]
|
||||
|
||||
@@ -216,7 +216,9 @@ available_policies_per_env = {
|
||||
"aloha": ["act"],
|
||||
"pusht": ["diffusion", "vqbet"],
|
||||
"xarm": ["tdmpc"],
|
||||
"dora_aloha_real": ["act_real"],
|
||||
"koch_real": ["act_koch_real"],
|
||||
"aloha_real": ["act_aloha_real"],
|
||||
"dora_aloha_real": ["act_aloha_real"],
|
||||
}
|
||||
|
||||
env_task_pairs = [(env, task) for env, tasks in available_tasks_per_env.items() for task in tasks]
|
||||
|
||||
10
lerobot/configs/env/aloha_real.yaml
vendored
Normal file
10
lerobot/configs/env/aloha_real.yaml
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# @package _global_
|
||||
|
||||
fps: 30
|
||||
|
||||
env:
|
||||
name: real_world
|
||||
task: null
|
||||
state_dim: 14
|
||||
action_dim: 14
|
||||
fps: ${fps}
|
||||
@@ -1,16 +1,22 @@
|
||||
# @package _global_
|
||||
|
||||
# Use `act_real.yaml` to train on real-world Aloha/Aloha2 datasets.
|
||||
# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, images,
|
||||
# cam_low) instead of 1 camera (i.e. top). Also, `training.eval_freq` is set to -1. This config is used
|
||||
# to evaluate checkpoints at a certain frequency of training steps. When it is set to -1, it deactivates evaluation.
|
||||
# This is because real-world evaluation is done through [dora-lerobot](https://github.com/dora-rs/dora-lerobot).
|
||||
# Look at its README for more information on how to evaluate a checkpoint in the real-world.
|
||||
# Use `act_aloha_real.yaml` to train on real-world datasets collected on Aloha or Aloha-2 robots.
|
||||
# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, cam_high, cam_low) instead of 1 camera (i.e. top).
|
||||
# Also, `training.eval_freq` is set to -1. This config is used to evaluate checkpoints at a certain frequency of training steps.
|
||||
# When it is set to -1, it deactivates evaluation. This is because real-world evaluation is done through our `control_robot.py` script.
|
||||
# Look at the documentation in header of `control_robot.py` for more information on how to collect data , train and evaluate a policy.
|
||||
#
|
||||
# Example of usage for training:
|
||||
# Example of usage for training and inference with `control_robot.py`:
|
||||
# ```bash
|
||||
# python lerobot/scripts/train.py \
|
||||
# policy=act_real \
|
||||
# policy=act_aloha_real \
|
||||
# env=aloha_real
|
||||
# ```
|
||||
#
|
||||
# Example of usage for training and inference with [Dora-rs](https://github.com/dora-rs/dora-lerobot):
|
||||
# ```bash
|
||||
# python lerobot/scripts/train.py \
|
||||
# policy=act_aloha_real \
|
||||
# env=dora_aloha_real
|
||||
# ```
|
||||
|
||||
@@ -36,10 +42,11 @@ override_dataset_stats:
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
|
||||
training:
|
||||
offline_steps: 100000
|
||||
offline_steps: 80000
|
||||
online_steps: 0
|
||||
eval_freq: -1
|
||||
save_freq: 20000
|
||||
save_freq: 10000
|
||||
log_freq: 100
|
||||
save_checkpoint: true
|
||||
|
||||
batch_size: 8
|
||||
@@ -62,7 +69,7 @@ policy:
|
||||
|
||||
# Input / output structure.
|
||||
n_obs_steps: 1
|
||||
chunk_size: 100 # chunk_size
|
||||
chunk_size: 100
|
||||
n_action_steps: 100
|
||||
|
||||
input_shapes:
|
||||
@@ -107,7 +114,7 @@ policy:
|
||||
n_vae_encoder_layers: 4
|
||||
|
||||
# Inference.
|
||||
temporal_ensemble_coeff: null
|
||||
temporal_ensemble_momentum: null
|
||||
|
||||
# Training and loss computation.
|
||||
dropout: 0.1
|
||||
@@ -1,110 +0,0 @@
|
||||
# @package _global_
|
||||
|
||||
# Use `act_real_no_state.yaml` to train on real-world Aloha/Aloha2 datasets when cameras are moving (e.g. wrist cameras)
|
||||
# Compared to `act_real.yaml`, it is camera only and does not use the state as input which is vector of robot joint positions.
|
||||
# We validated experimentaly that not using state reaches better success rate. Our hypothesis is that `act_real.yaml` might
|
||||
# overfits to the state, because the images are more complex to learn from since they are moving.
|
||||
#
|
||||
# Example of usage for training:
|
||||
# ```bash
|
||||
# python lerobot/scripts/train.py \
|
||||
# policy=act_real_no_state \
|
||||
# env=dora_aloha_real
|
||||
# ```
|
||||
|
||||
seed: 1000
|
||||
dataset_repo_id: lerobot/aloha_static_vinh_cup
|
||||
|
||||
override_dataset_stats:
|
||||
observation.images.cam_right_wrist:
|
||||
# stats from imagenet, since we use a pretrained vision model
|
||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
observation.images.cam_left_wrist:
|
||||
# stats from imagenet, since we use a pretrained vision model
|
||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
observation.images.cam_high:
|
||||
# stats from imagenet, since we use a pretrained vision model
|
||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
observation.images.cam_low:
|
||||
# stats from imagenet, since we use a pretrained vision model
|
||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
|
||||
training:
|
||||
offline_steps: 100000
|
||||
online_steps: 0
|
||||
eval_freq: -1
|
||||
save_freq: 20000
|
||||
save_checkpoint: true
|
||||
|
||||
batch_size: 8
|
||||
lr: 1e-5
|
||||
lr_backbone: 1e-5
|
||||
weight_decay: 1e-4
|
||||
grad_clip_norm: 10
|
||||
online_steps_between_rollouts: 1
|
||||
|
||||
delta_timestamps:
|
||||
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
|
||||
|
||||
eval:
|
||||
n_episodes: 50
|
||||
batch_size: 50
|
||||
|
||||
# See `configuration_act.py` for more details.
|
||||
policy:
|
||||
name: act
|
||||
|
||||
# Input / output structure.
|
||||
n_obs_steps: 1
|
||||
chunk_size: 100 # chunk_size
|
||||
n_action_steps: 100
|
||||
|
||||
input_shapes:
|
||||
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
|
||||
observation.images.cam_right_wrist: [3, 480, 640]
|
||||
observation.images.cam_left_wrist: [3, 480, 640]
|
||||
observation.images.cam_high: [3, 480, 640]
|
||||
observation.images.cam_low: [3, 480, 640]
|
||||
output_shapes:
|
||||
action: ["${env.action_dim}"]
|
||||
|
||||
# Normalization / Unnormalization
|
||||
input_normalization_modes:
|
||||
observation.images.cam_right_wrist: mean_std
|
||||
observation.images.cam_left_wrist: mean_std
|
||||
observation.images.cam_high: mean_std
|
||||
observation.images.cam_low: mean_std
|
||||
output_normalization_modes:
|
||||
action: mean_std
|
||||
|
||||
# Architecture.
|
||||
# Vision backbone.
|
||||
vision_backbone: resnet18
|
||||
pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
|
||||
replace_final_stride_with_dilation: false
|
||||
# Transformer layers.
|
||||
pre_norm: false
|
||||
dim_model: 512
|
||||
n_heads: 8
|
||||
dim_feedforward: 3200
|
||||
feedforward_activation: relu
|
||||
n_encoder_layers: 4
|
||||
# Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code
|
||||
# that means only the first layer is used. Here we match the original implementation by setting this to 1.
|
||||
# See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521.
|
||||
n_decoder_layers: 1
|
||||
# VAE.
|
||||
use_vae: true
|
||||
latent_dim: 32
|
||||
n_vae_encoder_layers: 4
|
||||
|
||||
# Inference.
|
||||
temporal_ensemble_coeff: null
|
||||
|
||||
# Training and loss computation.
|
||||
dropout: 0.1
|
||||
kl_weight: 10.0
|
||||
@@ -102,6 +102,7 @@ import argparse
|
||||
import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
@@ -239,6 +240,48 @@ def is_headless():
|
||||
return True
|
||||
|
||||
|
||||
def loop_to_save_frame_in_threads(frame_queue, num_image_writers):
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_image_writers) as executor:
|
||||
futures = []
|
||||
while True:
|
||||
# Blocks until a frame is available
|
||||
frame_data = frame_queue.get()
|
||||
|
||||
# Exit if we send None to stop the worker
|
||||
if frame_data is None:
|
||||
# Wait for all submitted futures to complete before exiting
|
||||
for _ in tqdm.tqdm(
|
||||
concurrent.futures.as_completed(futures), total=len(futures), desc="Writting images"
|
||||
):
|
||||
pass
|
||||
break
|
||||
|
||||
frame, key, frame_index, episode_index, videos_dir = frame_data
|
||||
futures.append(executor.submit(save_image, frame, key, frame_index, episode_index, videos_dir))
|
||||
|
||||
|
||||
def start_frame_workers(frame_queue, num_image_writers, num_workers=1):
|
||||
workers = []
|
||||
for _ in range(num_workers):
|
||||
worker = multiprocessing.Process(
|
||||
target=loop_to_save_frame_in_threads,
|
||||
args=(frame_queue, num_image_writers),
|
||||
)
|
||||
worker.start()
|
||||
workers.append(worker)
|
||||
return workers
|
||||
|
||||
|
||||
def stop_workers(workers, frame_queue):
|
||||
# Send None to each process to signal it to stop
|
||||
for _ in workers:
|
||||
frame_queue.put(None)
|
||||
|
||||
# Wait for all processes to terminate
|
||||
for process in workers:
|
||||
process.join()
|
||||
|
||||
|
||||
def has_method(_object: object, method_name: str):
|
||||
return hasattr(_object, method_name) and callable(getattr(_object, method_name))
|
||||
|
||||
@@ -465,10 +508,13 @@ def record(
|
||||
|
||||
# Save images using threads to reach high fps (30 and more)
|
||||
# Using `with` to exist smoothly if an execption is raised.
|
||||
futures = []
|
||||
num_image_writers = num_image_writers_per_camera * len(robot.cameras)
|
||||
num_image_writers = max(num_image_writers, 1)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_image_writers) as executor:
|
||||
frame_queue = multiprocessing.Queue()
|
||||
frame_workers = start_frame_workers(frame_queue, num_image_writers)
|
||||
|
||||
# Using `try` to exist smoothly if an exception is raised
|
||||
try:
|
||||
# Start recording all episodes
|
||||
while episode_index < num_episodes:
|
||||
logging.info(f"Recording episode {episode_index}")
|
||||
@@ -489,11 +535,7 @@ def record(
|
||||
not_image_keys = [key for key in observation if "image" not in key]
|
||||
|
||||
for key in image_keys:
|
||||
futures += [
|
||||
executor.submit(
|
||||
save_image, observation[key], key, frame_index, episode_index, videos_dir
|
||||
)
|
||||
]
|
||||
frame_queue.put((observation[key], key, frame_index, episode_index, videos_dir))
|
||||
|
||||
if display_cameras and not is_headless():
|
||||
image_keys = [key for key in observation if "image" in key]
|
||||
@@ -640,11 +682,11 @@ def record(
|
||||
listener.stop()
|
||||
|
||||
logging.info("Waiting for threads writing the images on disk to terminate...")
|
||||
for _ in tqdm.tqdm(
|
||||
concurrent.futures.as_completed(futures), total=len(futures), desc="Writting images"
|
||||
):
|
||||
pass
|
||||
break
|
||||
stop_workers(frame_workers, frame_queue)
|
||||
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
stop_workers(frame_workers, frame_queue)
|
||||
|
||||
robot.disconnect()
|
||||
if display_cameras and not is_headless():
|
||||
|
||||
2
poetry.lock
generated
2
poetry.lock
generated
@@ -5245,7 +5245,7 @@ docs = ["sphinx", "sphinx-automodapi", "sphinx-rtd-theme"]
|
||||
name = "pyserial"
|
||||
version = "3.5"
|
||||
description = "Python Serial Port Extension"
|
||||
optional = true
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pyserial-3.5-py2.py3-none-any.whl", hash = "sha256:c4451db6ba391ca6ca299fb3ec7bae67a5c55dde170964c7a14ceefec02f2cf0"},
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b5a9f73a2356aff9c717cdfd0d37a6da08b0cf2cc09c98edbc9492501b7f64a5
|
||||
size 5104
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:28738b3cfad17af0ac5181effdd796acdf7953cd5bcca3f421a11ddfd6b0076f
|
||||
size 30800
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4bb8a197a40456fdbc16029126268e6bcef3eca1837d88235165dc7e14618bea
|
||||
size 68
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bea60cce42d324f539dd3bca1e66b5ba6391838fdcadb00efc25f3240edb529a
|
||||
size 33600
|
||||
@@ -145,13 +145,28 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
|
||||
replay(robot, episode=0, fps=30, root=root, repo_id=repo_id)
|
||||
|
||||
# TODO(rcadene, aliberts): rethink this design
|
||||
if robot_type == "aloha":
|
||||
env_name = "aloha_real"
|
||||
policy_name = "act_aloha_real"
|
||||
elif robot_type in ["koch", "koch_bimanual"]:
|
||||
env_name = "koch_real"
|
||||
policy_name = "act_koch_real"
|
||||
else:
|
||||
raise NotImplementedError(robot_type)
|
||||
|
||||
overrides = [
|
||||
f"env={env_name}",
|
||||
f"policy={policy_name}",
|
||||
f"device={DEVICE}",
|
||||
]
|
||||
|
||||
if robot_type == "koch_bimanual":
|
||||
overrides += ["env.state_dim=12", "env.action_dim=12"]
|
||||
|
||||
cfg = init_hydra_config(
|
||||
DEFAULT_CONFIG_PATH,
|
||||
overrides=[
|
||||
f"env={env_name}",
|
||||
f"policy={policy_name}",
|
||||
f"device={DEVICE}",
|
||||
],
|
||||
overrides=overrides,
|
||||
)
|
||||
|
||||
policy = make_policy(hydra_cfg=cfg, dataset_stats=dataset.stats)
|
||||
|
||||
@@ -308,12 +308,11 @@ def test_flatten_unflatten_dict():
|
||||
# "lerobot/cmu_stretch",
|
||||
],
|
||||
)
|
||||
# TODO(rcadene, aliberts): all these tests fail locally on Mac M1, but not on Linux
|
||||
def test_backward_compatibility(repo_id):
|
||||
"""The artifacts for this test have been generated by `tests/scripts/save_dataset_to_safetensors.py`."""
|
||||
|
||||
dataset = LeRobotDataset(
|
||||
repo_id,
|
||||
)
|
||||
dataset = LeRobotDataset(repo_id)
|
||||
|
||||
test_dir = Path("tests/data/save_dataset_to_safetensors") / repo_id
|
||||
|
||||
|
||||
@@ -367,8 +367,7 @@ def test_normalize(insert_temporal_dim):
|
||||
),
|
||||
("aloha", "act", ["policy.n_action_steps=10"], ""),
|
||||
("aloha", "act", ["policy.n_action_steps=1000", "policy.chunk_size=1000"], "_1000_steps"),
|
||||
("dora_aloha_real", "act_real", ["policy.n_action_steps=10"], ""),
|
||||
("dora_aloha_real", "act_real_no_state", ["policy.n_action_steps=10"], ""),
|
||||
("dora_aloha_real", "act_aloha_real", ["policy.n_action_steps=10"], ""),
|
||||
],
|
||||
)
|
||||
# As artifacts have been generated on an x86_64 kernel, this test won't
|
||||
|
||||
Reference in New Issue
Block a user