forked from tangger/lerobot
merge but remove refactor of save_camera_images
This commit is contained in:
@@ -28,6 +28,8 @@ Example:
|
||||
print(lerobot.available_policies)
|
||||
print(lerobot.available_policies_per_env)
|
||||
print(lerobot.available_robots)
|
||||
print(lerobot.available_cameras)
|
||||
print(lerobot.available_motors)
|
||||
```
|
||||
|
||||
When implementing a new dataset loadable with LeRobotDataset follow these steps:
|
||||
@@ -198,12 +200,25 @@ available_robots = [
|
||||
"aloha",
|
||||
]
|
||||
|
||||
# lists all available cameras from `lerobot/common/robot_devices/cameras`
|
||||
available_cameras = [
|
||||
"opencv",
|
||||
"intelrealsense",
|
||||
]
|
||||
|
||||
# lists all available motors from `lerobot/common/robot_devices/motors`
|
||||
available_motors = [
|
||||
"dynamixel",
|
||||
]
|
||||
|
||||
# keys and values refer to yaml files
|
||||
available_policies_per_env = {
|
||||
"aloha": ["act"],
|
||||
"pusht": ["diffusion", "vqbet"],
|
||||
"xarm": ["tdmpc"],
|
||||
"dora_aloha_real": ["act_real"],
|
||||
"koch_real": ["act_koch_real"],
|
||||
"aloha_real": ["act_aloha_real"],
|
||||
"dora_aloha_real": ["act_aloha_real"],
|
||||
}
|
||||
|
||||
env_task_pairs = [(env, task) for env, tasks in available_tasks_per_env.items() for task in tasks]
|
||||
|
||||
@@ -68,7 +68,7 @@ def get_stats_einops_patterns(dataset, num_workers=0):
|
||||
return stats_patterns
|
||||
|
||||
|
||||
def compute_stats(dataset, batch_size=32, num_workers=16, max_num_samples=None):
|
||||
def compute_stats(dataset, batch_size=8, num_workers=8, max_num_samples=None):
|
||||
"""Compute mean/std and min/max statistics of all data keys in a LeRobotDataset."""
|
||||
if max_num_samples is None:
|
||||
max_num_samples = len(dataset)
|
||||
|
||||
468
lerobot/common/datasets/populate_dataset.py
Normal file
468
lerobot/common/datasets/populate_dataset.py
Normal file
@@ -0,0 +1,468 @@
|
||||
"""Functions to create an empty dataset, and populate it with frames."""
|
||||
# TODO(rcadene, aliberts): to adapt as class methods of next version of LeRobotDataset
|
||||
|
||||
import concurrent
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import tqdm
|
||||
from PIL import Image
|
||||
|
||||
from lerobot.common.datasets.compute_stats import compute_stats
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
|
||||
from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import to_hf_dataset
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, get_default_encoding
|
||||
from lerobot.common.datasets.utils import calculate_episode_data_index, create_branch
|
||||
from lerobot.common.datasets.video_utils import encode_video_frames
|
||||
from lerobot.common.utils.utils import log_say
|
||||
from lerobot.scripts.push_dataset_to_hub import (
|
||||
push_dataset_card_to_hub,
|
||||
push_meta_data_to_hub,
|
||||
push_videos_to_hub,
|
||||
save_meta_data,
|
||||
)
|
||||
|
||||
########################################################################################
|
||||
# Asynchrounous saving of images on disk
|
||||
########################################################################################
|
||||
|
||||
|
||||
def safe_stop_image_writer(func):
|
||||
# TODO(aliberts): Allow to pass custom exceptions
|
||||
# (e.g. ThreadServiceExit, KeyboardInterrupt, SystemExit, UnpluggedError, DynamixelCommError)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
image_writer = kwargs.get("dataset", {}).get("image_writer")
|
||||
if image_writer is not None:
|
||||
print("Waiting for image writer to terminate...")
|
||||
stop_image_writer(image_writer, timeout=20)
|
||||
raise e
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def save_image(img_tensor, key, frame_index, episode_index, videos_dir: str):
|
||||
img = Image.fromarray(img_tensor.numpy())
|
||||
path = Path(videos_dir) / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(str(path), quality=100)
|
||||
|
||||
|
||||
def loop_to_save_images_in_threads(image_queue, num_threads):
|
||||
if num_threads < 1:
|
||||
raise NotImplementedError(f"Only `num_threads>=1` is supported for now, but {num_threads=} given.")
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = []
|
||||
while True:
|
||||
# Blocks until a frame is available
|
||||
frame_data = image_queue.get()
|
||||
|
||||
# As usually done, exit loop when receiving None to stop the worker
|
||||
if frame_data is None:
|
||||
break
|
||||
|
||||
image, key, frame_index, episode_index, videos_dir = frame_data
|
||||
futures.append(executor.submit(save_image, image, key, frame_index, episode_index, videos_dir))
|
||||
|
||||
# Before exiting function, wait for all threads to complete
|
||||
with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
|
||||
concurrent.futures.wait(futures)
|
||||
progress_bar.update(len(futures))
|
||||
|
||||
|
||||
def start_image_writer_processes(image_queue, num_processes, num_threads_per_process):
|
||||
if num_processes < 1:
|
||||
raise ValueError(f"Only `num_processes>=1` is supported, but {num_processes=} given.")
|
||||
|
||||
if num_threads_per_process < 1:
|
||||
raise NotImplementedError(
|
||||
"Only `num_threads_per_process>=1` is supported for now, but {num_threads_per_process=} given."
|
||||
)
|
||||
|
||||
processes = []
|
||||
for _ in range(num_processes):
|
||||
process = multiprocessing.Process(
|
||||
target=loop_to_save_images_in_threads,
|
||||
args=(image_queue, num_threads_per_process),
|
||||
)
|
||||
process.start()
|
||||
processes.append(process)
|
||||
return processes
|
||||
|
||||
|
||||
def stop_processes(processes, queue, timeout):
|
||||
# Send None to each process to signal them to stop
|
||||
for _ in processes:
|
||||
queue.put(None)
|
||||
|
||||
# Wait maximum 20 seconds for all processes to terminate
|
||||
for process in processes:
|
||||
process.join(timeout=timeout)
|
||||
|
||||
# If not terminated after 20 seconds, force termination
|
||||
if process.is_alive():
|
||||
process.terminate()
|
||||
|
||||
# Close the queue, no more items can be put in the queue
|
||||
queue.close()
|
||||
|
||||
# Ensure all background queue threads have finished
|
||||
queue.join_thread()
|
||||
|
||||
|
||||
def start_image_writer(num_processes, num_threads):
|
||||
"""This function abstract away the initialisation of processes or/and threads to
|
||||
save images on disk asynchrounously, which is critical to control a robot and record data
|
||||
at a high frame rate.
|
||||
|
||||
When `num_processes=0`, it returns a dictionary containing a threads pool of size `num_threads`.
|
||||
When `num_processes>0`, it returns a dictionary containing a processes pool of size `num_processes`,
|
||||
where each subprocess starts their own threads pool of size `num_threads`.
|
||||
|
||||
The optimal number of processes and threads depends on your computer capabilities.
|
||||
We advise to use 4 threads per camera with 0 processes. If the fps is not stable, try to increase or lower
|
||||
the number of threads. If it is still not stable, try to use 1 subprocess, or more.
|
||||
"""
|
||||
image_writer = {}
|
||||
|
||||
if num_processes == 0:
|
||||
futures = []
|
||||
threads_pool = concurrent.futures.ThreadPoolExecutor(max_workers=num_threads)
|
||||
image_writer["threads_pool"], image_writer["futures"] = threads_pool, futures
|
||||
else:
|
||||
# TODO(rcadene): When using num_processes>1, `multiprocessing.Manager().Queue()`
|
||||
# might be better than `multiprocessing.Queue()`. Source: https://www.geeksforgeeks.org/python-multiprocessing-queue-vs-multiprocessing-manager-queue
|
||||
image_queue = multiprocessing.Queue()
|
||||
processes_pool = start_image_writer_processes(
|
||||
image_queue, num_processes=num_processes, num_threads_per_process=num_threads
|
||||
)
|
||||
image_writer["processes_pool"], image_writer["image_queue"] = processes_pool, image_queue
|
||||
|
||||
return image_writer
|
||||
|
||||
|
||||
def async_save_image(image_writer, image, key, frame_index, episode_index, videos_dir):
|
||||
"""This function abstract away the saving of an image on disk asynchrounously. It uses a dictionary
|
||||
called image writer which contains either a pool of processes or a pool of threads.
|
||||
"""
|
||||
if "threads_pool" in image_writer:
|
||||
threads_pool, futures = image_writer["threads_pool"], image_writer["futures"]
|
||||
futures.append(threads_pool.submit(save_image, image, key, frame_index, episode_index, videos_dir))
|
||||
else:
|
||||
image_queue = image_writer["image_queue"]
|
||||
image_queue.put((image, key, frame_index, episode_index, videos_dir))
|
||||
|
||||
|
||||
def stop_image_writer(image_writer, timeout):
|
||||
if "threads_pool" in image_writer:
|
||||
futures = image_writer["futures"]
|
||||
# Before exiting function, wait for all threads to complete
|
||||
with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
|
||||
concurrent.futures.wait(futures, timeout=timeout)
|
||||
progress_bar.update(len(futures))
|
||||
else:
|
||||
processes_pool, image_queue = image_writer["processes_pool"], image_writer["image_queue"]
|
||||
stop_processes(processes_pool, image_queue, timeout=timeout)
|
||||
|
||||
|
||||
########################################################################################
|
||||
# Functions to initialize, resume and populate a dataset
|
||||
########################################################################################
|
||||
|
||||
|
||||
def init_dataset(
|
||||
repo_id,
|
||||
root,
|
||||
force_override,
|
||||
fps,
|
||||
video,
|
||||
write_images,
|
||||
num_image_writer_processes,
|
||||
num_image_writer_threads,
|
||||
):
|
||||
local_dir = Path(root) / repo_id
|
||||
if local_dir.exists() and force_override:
|
||||
shutil.rmtree(local_dir)
|
||||
|
||||
episodes_dir = local_dir / "episodes"
|
||||
episodes_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
videos_dir = local_dir / "videos"
|
||||
videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Logic to resume data recording
|
||||
rec_info_path = episodes_dir / "data_recording_info.json"
|
||||
if rec_info_path.exists():
|
||||
with open(rec_info_path) as f:
|
||||
rec_info = json.load(f)
|
||||
num_episodes = rec_info["last_episode_index"] + 1
|
||||
else:
|
||||
num_episodes = 0
|
||||
|
||||
dataset = {
|
||||
"repo_id": repo_id,
|
||||
"local_dir": local_dir,
|
||||
"videos_dir": videos_dir,
|
||||
"episodes_dir": episodes_dir,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
"rec_info_path": rec_info_path,
|
||||
"num_episodes": num_episodes,
|
||||
}
|
||||
|
||||
if write_images:
|
||||
# Initialize processes or/and threads dedicated to save images on disk asynchronously,
|
||||
# which is critical to control a robot and record data at a high frame rate.
|
||||
image_writer = start_image_writer(
|
||||
num_processes=num_image_writer_processes,
|
||||
num_threads=num_image_writer_threads,
|
||||
)
|
||||
dataset["image_writer"] = image_writer
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def add_frame(dataset, observation, action):
|
||||
if "current_episode" not in dataset:
|
||||
# initialize episode dictionary
|
||||
ep_dict = {}
|
||||
for key in observation:
|
||||
if key not in ep_dict:
|
||||
ep_dict[key] = []
|
||||
for key in action:
|
||||
if key not in ep_dict:
|
||||
ep_dict[key] = []
|
||||
|
||||
ep_dict["episode_index"] = []
|
||||
ep_dict["frame_index"] = []
|
||||
ep_dict["timestamp"] = []
|
||||
ep_dict["next.done"] = []
|
||||
|
||||
dataset["current_episode"] = ep_dict
|
||||
dataset["current_frame_index"] = 0
|
||||
|
||||
ep_dict = dataset["current_episode"]
|
||||
episode_index = dataset["num_episodes"]
|
||||
frame_index = dataset["current_frame_index"]
|
||||
videos_dir = dataset["videos_dir"]
|
||||
video = dataset["video"]
|
||||
fps = dataset["fps"]
|
||||
|
||||
ep_dict["episode_index"].append(episode_index)
|
||||
ep_dict["frame_index"].append(frame_index)
|
||||
ep_dict["timestamp"].append(frame_index / fps)
|
||||
ep_dict["next.done"].append(False)
|
||||
|
||||
img_keys = [key for key in observation if "image" in key]
|
||||
non_img_keys = [key for key in observation if "image" not in key]
|
||||
|
||||
# Save all observed modalities except images
|
||||
for key in non_img_keys:
|
||||
ep_dict[key].append(observation[key])
|
||||
|
||||
# Save actions
|
||||
for key in action:
|
||||
ep_dict[key].append(action[key])
|
||||
|
||||
if "image_writer" not in dataset:
|
||||
dataset["current_frame_index"] += 1
|
||||
return
|
||||
|
||||
# Save images
|
||||
image_writer = dataset["image_writer"]
|
||||
for key in img_keys:
|
||||
imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
|
||||
async_save_image(
|
||||
image_writer,
|
||||
image=observation[key],
|
||||
key=key,
|
||||
frame_index=frame_index,
|
||||
episode_index=episode_index,
|
||||
videos_dir=str(videos_dir),
|
||||
)
|
||||
|
||||
if video:
|
||||
fname = f"{key}_episode_{episode_index:06d}.mp4"
|
||||
frame_info = {"path": f"videos/{fname}", "timestamp": frame_index / fps}
|
||||
else:
|
||||
frame_info = str(imgs_dir / f"frame_{frame_index:06d}.png")
|
||||
|
||||
ep_dict[key].append(frame_info)
|
||||
|
||||
dataset["current_frame_index"] += 1
|
||||
|
||||
|
||||
def delete_current_episode(dataset):
|
||||
del dataset["current_episode"]
|
||||
del dataset["current_frame_index"]
|
||||
|
||||
# delete temporary images
|
||||
episode_index = dataset["num_episodes"]
|
||||
videos_dir = dataset["videos_dir"]
|
||||
for tmp_imgs_dir in videos_dir.glob(f"*_episode_{episode_index:06d}"):
|
||||
shutil.rmtree(tmp_imgs_dir)
|
||||
|
||||
|
||||
def save_current_episode(dataset):
|
||||
episode_index = dataset["num_episodes"]
|
||||
ep_dict = dataset["current_episode"]
|
||||
episodes_dir = dataset["episodes_dir"]
|
||||
rec_info_path = dataset["rec_info_path"]
|
||||
|
||||
ep_dict["next.done"][-1] = True
|
||||
|
||||
for key in ep_dict:
|
||||
if "observation" in key and "image" not in key:
|
||||
ep_dict[key] = torch.stack(ep_dict[key])
|
||||
|
||||
ep_dict["action"] = torch.stack(ep_dict["action"])
|
||||
ep_dict["episode_index"] = torch.tensor(ep_dict["episode_index"])
|
||||
ep_dict["frame_index"] = torch.tensor(ep_dict["frame_index"])
|
||||
ep_dict["timestamp"] = torch.tensor(ep_dict["timestamp"])
|
||||
ep_dict["next.done"] = torch.tensor(ep_dict["next.done"])
|
||||
|
||||
ep_path = episodes_dir / f"episode_{episode_index}.pth"
|
||||
torch.save(ep_dict, ep_path)
|
||||
|
||||
rec_info = {
|
||||
"last_episode_index": episode_index,
|
||||
}
|
||||
with open(rec_info_path, "w") as f:
|
||||
json.dump(rec_info, f)
|
||||
|
||||
# force re-initialization of episode dictionnary during add_frame
|
||||
del dataset["current_episode"]
|
||||
|
||||
dataset["num_episodes"] += 1
|
||||
|
||||
|
||||
def encode_videos(dataset, image_keys, play_sounds):
|
||||
log_say("Encoding videos", play_sounds)
|
||||
|
||||
num_episodes = dataset["num_episodes"]
|
||||
videos_dir = dataset["videos_dir"]
|
||||
local_dir = dataset["local_dir"]
|
||||
fps = dataset["fps"]
|
||||
|
||||
# Use ffmpeg to convert frames stored as png into mp4 videos
|
||||
for episode_index in tqdm.tqdm(range(num_episodes)):
|
||||
for key in image_keys:
|
||||
# key = f"observation.images.{name}"
|
||||
tmp_imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
|
||||
fname = f"{key}_episode_{episode_index:06d}.mp4"
|
||||
video_path = local_dir / "videos" / fname
|
||||
if video_path.exists():
|
||||
# Skip if video is already encoded. Could be the case when resuming data recording.
|
||||
continue
|
||||
# note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
|
||||
# since video encoding with ffmpeg is already using multithreading.
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps, overwrite=True)
|
||||
shutil.rmtree(tmp_imgs_dir)
|
||||
|
||||
|
||||
def from_dataset_to_lerobot_dataset(dataset, play_sounds):
|
||||
log_say("Consolidate episodes", play_sounds)
|
||||
|
||||
num_episodes = dataset["num_episodes"]
|
||||
episodes_dir = dataset["episodes_dir"]
|
||||
videos_dir = dataset["videos_dir"]
|
||||
video = dataset["video"]
|
||||
fps = dataset["fps"]
|
||||
repo_id = dataset["repo_id"]
|
||||
|
||||
ep_dicts = []
|
||||
for episode_index in tqdm.tqdm(range(num_episodes)):
|
||||
ep_path = episodes_dir / f"episode_{episode_index}.pth"
|
||||
ep_dict = torch.load(ep_path)
|
||||
ep_dicts.append(ep_dict)
|
||||
data_dict = concatenate_episodes(ep_dicts)
|
||||
|
||||
if video:
|
||||
image_keys = [key for key in data_dict if "image" in key]
|
||||
encode_videos(dataset, image_keys, play_sounds)
|
||||
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
if video:
|
||||
info["encoding"] = get_default_encoding()
|
||||
|
||||
lerobot_dataset = LeRobotDataset.from_preloaded(
|
||||
repo_id=repo_id,
|
||||
hf_dataset=hf_dataset,
|
||||
episode_data_index=episode_data_index,
|
||||
info=info,
|
||||
videos_dir=videos_dir,
|
||||
)
|
||||
|
||||
return lerobot_dataset
|
||||
|
||||
|
||||
def save_lerobot_dataset_on_disk(lerobot_dataset):
|
||||
hf_dataset = lerobot_dataset.hf_dataset
|
||||
info = lerobot_dataset.info
|
||||
stats = lerobot_dataset.stats
|
||||
episode_data_index = lerobot_dataset.episode_data_index
|
||||
local_dir = lerobot_dataset.videos_dir.parent
|
||||
meta_data_dir = local_dir / "meta_data"
|
||||
|
||||
hf_dataset = hf_dataset.with_format(None) # to remove transforms that cant be saved
|
||||
hf_dataset.save_to_disk(str(local_dir / "train"))
|
||||
|
||||
save_meta_data(info, stats, episode_data_index, meta_data_dir)
|
||||
|
||||
|
||||
def push_lerobot_dataset_to_hub(lerobot_dataset, tags):
|
||||
hf_dataset = lerobot_dataset.hf_dataset
|
||||
local_dir = lerobot_dataset.videos_dir.parent
|
||||
videos_dir = lerobot_dataset.videos_dir
|
||||
repo_id = lerobot_dataset.repo_id
|
||||
video = lerobot_dataset.video
|
||||
meta_data_dir = local_dir / "meta_data"
|
||||
|
||||
if not (local_dir / "train").exists():
|
||||
raise ValueError(
|
||||
"You need to run `save_lerobot_dataset_on_disk(lerobot_dataset)` before pushing to the hub."
|
||||
)
|
||||
|
||||
hf_dataset.push_to_hub(repo_id, revision="main")
|
||||
push_meta_data_to_hub(repo_id, meta_data_dir, revision="main")
|
||||
push_dataset_card_to_hub(repo_id, revision="main", tags=tags)
|
||||
if video:
|
||||
push_videos_to_hub(repo_id, videos_dir, revision="main")
|
||||
create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
|
||||
|
||||
|
||||
def create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds):
|
||||
if "image_writer" in dataset:
|
||||
logging.info("Waiting for image writer to terminate...")
|
||||
image_writer = dataset["image_writer"]
|
||||
stop_image_writer(image_writer, timeout=20)
|
||||
|
||||
lerobot_dataset = from_dataset_to_lerobot_dataset(dataset, play_sounds)
|
||||
|
||||
if run_compute_stats:
|
||||
log_say("Computing dataset statistics", play_sounds)
|
||||
lerobot_dataset.stats = compute_stats(lerobot_dataset)
|
||||
else:
|
||||
logging.info("Skipping computation of the dataset statistics")
|
||||
lerobot_dataset.stats = {}
|
||||
|
||||
save_lerobot_dataset_on_disk(lerobot_dataset)
|
||||
|
||||
if push_to_hub:
|
||||
push_lerobot_dataset_to_hub(lerobot_dataset, tags)
|
||||
|
||||
return lerobot_dataset
|
||||
@@ -32,7 +32,7 @@ DATASET_CARD_TEMPLATE = """
|
||||
---
|
||||
# Metadata will go there
|
||||
---
|
||||
This dataset was created using [🤗 LeRobot](https://github.com/huggingface/lerobot).
|
||||
This dataset was created using [LeRobot](https://github.com/huggingface/lerobot).
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -189,7 +189,7 @@ class Logger:
|
||||
training_state["scheduler"] = scheduler.state_dict()
|
||||
torch.save(training_state, save_dir / self.training_state_file_name)
|
||||
|
||||
def save_checkpont(
|
||||
def save_checkpoint(
|
||||
self,
|
||||
train_step: int,
|
||||
policy: Policy,
|
||||
|
||||
@@ -2,41 +2,151 @@
|
||||
This file contains utilities for recording frames from Intel Realsense cameras.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import logging
|
||||
import math
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass, replace
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pyrealsense2 as rs
|
||||
from PIL import Image
|
||||
|
||||
from lerobot.common.robot_devices.utils import (
|
||||
RobotDeviceAlreadyConnectedError,
|
||||
RobotDeviceNotConnectedError,
|
||||
busy_wait,
|
||||
)
|
||||
from lerobot.common.utils.utils import capture_timestamp_utc
|
||||
|
||||
SERIAL_NUMBER_INDEX = 1
|
||||
|
||||
|
||||
def find_camera_indices(raise_when_empty=True) -> list[int]:
|
||||
def find_cameras(raise_when_empty=True, mock=False) -> list[dict]:
|
||||
"""
|
||||
Find the serial numbers of the Intel RealSense cameras
|
||||
Find the names and the serial numbers of the Intel RealSense cameras
|
||||
connected to the computer.
|
||||
"""
|
||||
camera_ids = []
|
||||
if mock:
|
||||
import tests.mock_pyrealsense2 as rs
|
||||
else:
|
||||
import pyrealsense2 as rs
|
||||
|
||||
cameras = []
|
||||
for device in rs.context().query_devices():
|
||||
serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
|
||||
camera_ids.append(serial_number)
|
||||
name = device.get_info(rs.camera_info.name)
|
||||
cameras.append(
|
||||
{
|
||||
"serial_number": serial_number,
|
||||
"name": name,
|
||||
}
|
||||
)
|
||||
|
||||
if raise_when_empty and len(camera_ids) == 0:
|
||||
if raise_when_empty and len(cameras) == 0:
|
||||
raise OSError(
|
||||
"Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
|
||||
)
|
||||
|
||||
return camera_ids
|
||||
return cameras
|
||||
|
||||
|
||||
def save_image(img_array, serial_number, frame_index, images_dir):
|
||||
try:
|
||||
img = Image.fromarray(img_array)
|
||||
path = images_dir / f"camera_{serial_number}_frame_{frame_index:06d}.png"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(str(path), quality=100)
|
||||
logging.info(f"Saved image: {path}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to save image for camera {serial_number} frame {frame_index}: {e}")
|
||||
|
||||
|
||||
def save_images_from_cameras(
|
||||
images_dir: Path,
|
||||
serial_numbers: list[int] | None = None,
|
||||
fps=None,
|
||||
width=None,
|
||||
height=None,
|
||||
record_time_s=2,
|
||||
mock=False,
|
||||
):
|
||||
"""
|
||||
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
|
||||
associated to a given serial number.
|
||||
"""
|
||||
if serial_numbers is None or len(serial_numbers) == 0:
|
||||
camera_infos = find_cameras(mock=mock)
|
||||
serial_numbers = [cam["serial_number"] for cam in camera_infos]
|
||||
|
||||
if mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
print("Connecting cameras")
|
||||
cameras = []
|
||||
for cam_sn in serial_numbers:
|
||||
print(f"{cam_sn=}")
|
||||
camera = IntelRealSenseCamera(cam_sn, fps=fps, width=width, height=height, mock=mock)
|
||||
camera.connect()
|
||||
print(
|
||||
f"IntelRealSenseCamera({camera.serial_number}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
|
||||
)
|
||||
cameras.append(camera)
|
||||
|
||||
images_dir = Path(images_dir)
|
||||
if images_dir.exists():
|
||||
shutil.rmtree(
|
||||
images_dir,
|
||||
)
|
||||
images_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Saving images to {images_dir}")
|
||||
frame_index = 0
|
||||
start_time = time.perf_counter()
|
||||
try:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
while True:
|
||||
now = time.perf_counter()
|
||||
|
||||
for camera in cameras:
|
||||
# If we use async_read when fps is None, the loop will go full speed, and we will end up
|
||||
# saving the same images from the cameras multiple times until the RAM/disk is full.
|
||||
image = camera.read() if fps is None else camera.async_read()
|
||||
if image is None:
|
||||
print("No Frame")
|
||||
|
||||
bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
executor.submit(
|
||||
save_image,
|
||||
bgr_converted_image,
|
||||
camera.serial_number,
|
||||
frame_index,
|
||||
images_dir,
|
||||
)
|
||||
|
||||
if fps is not None:
|
||||
dt_s = time.perf_counter() - now
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
if time.perf_counter() - start_time > record_time_s:
|
||||
break
|
||||
|
||||
print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
|
||||
|
||||
frame_index += 1
|
||||
finally:
|
||||
print(f"Images have been saved to {images_dir}")
|
||||
for camera in cameras:
|
||||
camera.disconnect()
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -50,6 +160,7 @@ class IntelRealSenseCameraConfig:
|
||||
IntelRealSenseCameraConfig(90, 640, 480)
|
||||
IntelRealSenseCameraConfig(30, 1280, 720)
|
||||
IntelRealSenseCameraConfig(30, 640, 480, use_depth=True)
|
||||
IntelRealSenseCameraConfig(30, 640, 480, rotation=90)
|
||||
```
|
||||
"""
|
||||
|
||||
@@ -59,6 +170,8 @@ class IntelRealSenseCameraConfig:
|
||||
color_mode: str = "rgb"
|
||||
use_depth: bool = False
|
||||
force_hardware_reset: bool = True
|
||||
rotation: int | None = None
|
||||
mock: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
if self.color_mode not in ["rgb", "bgr"]:
|
||||
@@ -66,19 +179,23 @@ class IntelRealSenseCameraConfig:
|
||||
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
|
||||
)
|
||||
|
||||
if (self.fps or self.width or self.height) and not (self.fps and self.width and self.height):
|
||||
at_least_one_is_not_none = self.fps is not None or self.width is not None or self.height is not None
|
||||
at_least_one_is_none = self.fps is None or self.width is None or self.height is None
|
||||
if at_least_one_is_not_none and at_least_one_is_none:
|
||||
raise ValueError(
|
||||
"For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
|
||||
f"but {self.fps=}, {self.width=}, {self.height=} were provided."
|
||||
)
|
||||
|
||||
if self.rotation not in [-90, None, 90, 180]:
|
||||
raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
|
||||
|
||||
|
||||
class IntelRealSenseCamera:
|
||||
"""
|
||||
The IntelRealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras:
|
||||
- camera_index corresponds to the serial number of the camera,
|
||||
- camera_index won't randomly change as it can be the case of OpenCVCamera for Linux,
|
||||
- read is more reliable than OpenCVCamera,
|
||||
- is instantiated with the serial number of the camera - won't randomly change as it can be the case of OpenCVCamera for Linux,
|
||||
- can also be instantiated with the camera's name — if it's unique — using IntelRealSenseCamera.init_from_name(),
|
||||
- depth map can be returned.
|
||||
|
||||
To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera:
|
||||
@@ -93,8 +210,10 @@ class IntelRealSenseCamera:
|
||||
|
||||
Example of usage:
|
||||
```python
|
||||
camera_index = 128422271347
|
||||
camera = IntelRealSenseCamera(camera_index)
|
||||
# Instantiate with its serial number
|
||||
camera = IntelRealSenseCamera(128422271347)
|
||||
# Or by its name if it's unique
|
||||
camera = IntelRealSenseCamera.init_from_name("Intel RealSense D405")
|
||||
camera.connect()
|
||||
color_image = camera.read()
|
||||
# when done using the camera, consider disconnecting
|
||||
@@ -103,19 +222,19 @@ class IntelRealSenseCamera:
|
||||
|
||||
Example of changing default fps, width, height and color_mode:
|
||||
```python
|
||||
camera = IntelRealSenseCamera(camera_index, fps=30, width=1280, height=720)
|
||||
camera = IntelRealSenseCamera(serial_number, fps=30, width=1280, height=720)
|
||||
camera = connect() # applies the settings, might error out if these settings are not compatible with the camera
|
||||
|
||||
camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480)
|
||||
camera = IntelRealSenseCamera(serial_number, fps=90, width=640, height=480)
|
||||
camera = connect()
|
||||
|
||||
camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480, color_mode="bgr")
|
||||
camera = IntelRealSenseCamera(serial_number, fps=90, width=640, height=480, color_mode="bgr")
|
||||
camera = connect()
|
||||
```
|
||||
|
||||
Example of returning depth:
|
||||
```python
|
||||
camera = IntelRealSenseCamera(camera_index, use_depth=True)
|
||||
camera = IntelRealSenseCamera(serial_number, use_depth=True)
|
||||
camera.connect()
|
||||
color_image, depth_map = camera.read()
|
||||
```
|
||||
@@ -123,7 +242,7 @@ class IntelRealSenseCamera:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
camera_index: int,
|
||||
serial_number: int,
|
||||
config: IntelRealSenseCameraConfig | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
@@ -133,13 +252,14 @@ class IntelRealSenseCamera:
|
||||
# Overwrite the config arguments using kwargs
|
||||
config = replace(config, **kwargs)
|
||||
|
||||
self.camera_index = camera_index
|
||||
self.serial_number = serial_number
|
||||
self.fps = config.fps
|
||||
self.width = config.width
|
||||
self.height = config.height
|
||||
self.color_mode = config.color_mode
|
||||
self.use_depth = config.use_depth
|
||||
self.force_hardware_reset = config.force_hardware_reset
|
||||
self.mock = config.mock
|
||||
|
||||
self.camera = None
|
||||
self.is_connected = False
|
||||
@@ -149,14 +269,55 @@ class IntelRealSenseCamera:
|
||||
self.depth_map = None
|
||||
self.logs = {}
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
# TODO(alibets): Do we keep original width/height or do we define them after rotation?
|
||||
self.rotation = None
|
||||
if config.rotation == -90:
|
||||
self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
|
||||
elif config.rotation == 90:
|
||||
self.rotation = cv2.ROTATE_90_CLOCKWISE
|
||||
elif config.rotation == 180:
|
||||
self.rotation = cv2.ROTATE_180
|
||||
|
||||
@classmethod
|
||||
def init_from_name(cls, name: str, config: IntelRealSenseCameraConfig | None = None, **kwargs):
|
||||
camera_infos = find_cameras()
|
||||
camera_names = [cam["name"] for cam in camera_infos]
|
||||
this_name_count = Counter(camera_names)[name]
|
||||
if this_name_count > 1:
|
||||
# TODO(aliberts): Test this with multiple identical cameras (Aloha)
|
||||
raise ValueError(
|
||||
f"Multiple {name} cameras have been detected. Please use their serial number to instantiate them."
|
||||
)
|
||||
|
||||
name_to_serial_dict = {cam["name"]: cam["serial_number"] for cam in camera_infos}
|
||||
cam_sn = name_to_serial_dict[name]
|
||||
|
||||
if config is None:
|
||||
config = IntelRealSenseCameraConfig()
|
||||
|
||||
# Overwrite the config arguments using kwargs
|
||||
config = replace(config, **kwargs)
|
||||
|
||||
return cls(serial_number=cam_sn, config=config, **kwargs)
|
||||
|
||||
def connect(self):
|
||||
if self.is_connected:
|
||||
raise RobotDeviceAlreadyConnectedError(
|
||||
f"IntelRealSenseCamera({self.camera_index}) is already connected."
|
||||
f"IntelRealSenseCamera({self.serial_number}) is already connected."
|
||||
)
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_pyrealsense2 as rs
|
||||
else:
|
||||
import pyrealsense2 as rs
|
||||
|
||||
config = rs.config()
|
||||
config.enable_device(str(self.camera_index))
|
||||
config.enable_device(str(self.serial_number))
|
||||
|
||||
if self.fps and self.width and self.height:
|
||||
# TODO(rcadene): can we set rgb8 directly?
|
||||
@@ -172,7 +333,7 @@ class IntelRealSenseCamera:
|
||||
|
||||
self.camera = rs.pipeline()
|
||||
try:
|
||||
self.camera.start(config)
|
||||
profile = self.camera.start(config)
|
||||
is_camera_open = True
|
||||
except RuntimeError:
|
||||
is_camera_open = False
|
||||
@@ -181,15 +342,41 @@ class IntelRealSenseCamera:
|
||||
# If the camera doesn't work, display the camera indices corresponding to
|
||||
# valid cameras.
|
||||
if not is_camera_open:
|
||||
# Verify that the provided `camera_index` is valid before printing the traceback
|
||||
available_cam_ids = find_camera_indices()
|
||||
if self.camera_index not in available_cam_ids:
|
||||
# Verify that the provided `serial_number` is valid before printing the traceback
|
||||
camera_infos = find_cameras()
|
||||
serial_numbers = [cam["serial_number"] for cam in camera_infos]
|
||||
if self.serial_number not in serial_numbers:
|
||||
raise ValueError(
|
||||
f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
|
||||
"To find the camera index you should use, run `python lerobot/scripts/save_images_from_cameras.py --driver intelrealsense`."
|
||||
f"`serial_number` is expected to be one of these available cameras {serial_numbers}, but {self.serial_number} is provided instead. "
|
||||
"To find the serial number you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
|
||||
)
|
||||
|
||||
raise OSError(f"Can't access IntelRealSenseCamera({self.camera_index}).")
|
||||
raise OSError(f"Can't access IntelRealSenseCamera({self.serial_number}).")
|
||||
|
||||
color_stream = profile.get_stream(rs.stream.color)
|
||||
color_profile = color_stream.as_video_stream_profile()
|
||||
actual_fps = color_profile.fps()
|
||||
actual_width = color_profile.width()
|
||||
actual_height = color_profile.height()
|
||||
|
||||
# Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
|
||||
if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
|
||||
# Using `OSError` since it's a broad that encompasses issues related to device communication
|
||||
raise OSError(
|
||||
f"Can't set {self.fps=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_fps}."
|
||||
)
|
||||
if self.width is not None and self.width != actual_width:
|
||||
raise OSError(
|
||||
f"Can't set {self.width=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_width}."
|
||||
)
|
||||
if self.height is not None and self.height != actual_height:
|
||||
raise OSError(
|
||||
f"Can't set {self.height=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_height}."
|
||||
)
|
||||
|
||||
self.fps = round(actual_fps)
|
||||
self.width = round(actual_width)
|
||||
self.height = round(actual_height)
|
||||
|
||||
self.is_connected = True
|
||||
|
||||
@@ -205,9 +392,14 @@ class IntelRealSenseCamera:
|
||||
"""
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
||||
f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
start_time = time.perf_counter()
|
||||
|
||||
frame = self.camera.wait_for_frames(timeout_ms=5000)
|
||||
@@ -215,7 +407,7 @@ class IntelRealSenseCamera:
|
||||
color_frame = frame.get_color_frame()
|
||||
|
||||
if not color_frame:
|
||||
raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.camera_index}).")
|
||||
raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.serial_number}).")
|
||||
|
||||
color_image = np.asanyarray(color_frame.get_data())
|
||||
|
||||
@@ -235,6 +427,9 @@ class IntelRealSenseCamera:
|
||||
f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
|
||||
)
|
||||
|
||||
if self.rotation is not None:
|
||||
color_image = cv2.rotate(color_image, self.rotation)
|
||||
|
||||
# log the number of seconds it took to read the image
|
||||
self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
|
||||
|
||||
@@ -244,7 +439,7 @@ class IntelRealSenseCamera:
|
||||
if self.use_depth:
|
||||
depth_frame = frame.get_depth_frame()
|
||||
if not depth_frame:
|
||||
raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.camera_index}).")
|
||||
raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.serial_number}).")
|
||||
|
||||
depth_map = np.asanyarray(depth_frame.get_data())
|
||||
|
||||
@@ -254,12 +449,15 @@ class IntelRealSenseCamera:
|
||||
f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
|
||||
)
|
||||
|
||||
if self.rotation is not None:
|
||||
depth_map = cv2.rotate(depth_map, self.rotation)
|
||||
|
||||
return color_image, depth_map
|
||||
else:
|
||||
return color_image
|
||||
|
||||
def read_loop(self):
|
||||
while self.stop_event is None or not self.stop_event.is_set():
|
||||
while not self.stop_event.is_set():
|
||||
if self.use_depth:
|
||||
self.color_image, self.depth_map = self.read()
|
||||
else:
|
||||
@@ -269,7 +467,7 @@ class IntelRealSenseCamera:
|
||||
"""Access the latest color image"""
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
||||
f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.thread is None:
|
||||
@@ -280,6 +478,7 @@ class IntelRealSenseCamera:
|
||||
|
||||
num_tries = 0
|
||||
while self.color_image is None:
|
||||
# TODO(rcadene, aliberts): intelrealsense has diverged compared to opencv over here
|
||||
num_tries += 1
|
||||
time.sleep(1 / self.fps)
|
||||
if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
|
||||
@@ -295,7 +494,7 @@ class IntelRealSenseCamera:
|
||||
def disconnect(self):
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
||||
f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.thread is not None and self.thread.is_alive():
|
||||
@@ -313,3 +512,48 @@ class IntelRealSenseCamera:
|
||||
def __del__(self):
|
||||
if getattr(self, "is_connected", False):
|
||||
self.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Save a few frames using `IntelRealSenseCamera` for all cameras connected to the computer, or a selected subset."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--serial-numbers",
|
||||
type=int,
|
||||
nargs="*",
|
||||
default=None,
|
||||
help="List of serial numbers used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fps",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--width",
|
||||
type=str,
|
||||
default=640,
|
||||
help="Set the width for all cameras. If not provided, use the default width of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--height",
|
||||
type=str,
|
||||
default=480,
|
||||
help="Set the height for all cameras. If not provided, use the default height of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--images-dir",
|
||||
type=Path,
|
||||
default="outputs/images_from_intelrealsense_cameras",
|
||||
help="Set directory to save a few frames for each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--record-time-s",
|
||||
type=float,
|
||||
default=2.0,
|
||||
help="Set the number of seconds used to record the frames. By default, 2 seconds.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
save_images_from_cameras(**vars(args))
|
||||
|
||||
@@ -2,27 +2,27 @@
|
||||
This file contains utilities for recording frames from cameras. For more info look at `OpenCVCamera` docstring.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import math
|
||||
import platform
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, replace
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from lerobot.common.robot_devices.utils import (
|
||||
RobotDeviceAlreadyConnectedError,
|
||||
RobotDeviceNotConnectedError,
|
||||
busy_wait,
|
||||
)
|
||||
from lerobot.common.utils.utils import capture_timestamp_utc
|
||||
|
||||
# Use 1 thread to avoid blocking the main thread. Especially useful during data collection
|
||||
# when other threads are used to save the images.
|
||||
cv2.setNumThreads(1)
|
||||
|
||||
# The maximum opencv device index depends on your operating system. For instance,
|
||||
# if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
|
||||
# on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
|
||||
@@ -31,20 +31,44 @@ cv2.setNumThreads(1)
|
||||
MAX_OPENCV_INDEX = 60
|
||||
|
||||
|
||||
def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX):
|
||||
def find_cameras(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False) -> list[dict]:
|
||||
cameras = []
|
||||
if platform.system() == "Linux":
|
||||
# Linux uses camera ports
|
||||
print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
|
||||
possible_camera_ids = []
|
||||
for port in Path("/dev").glob("video*"):
|
||||
camera_idx = int(str(port).replace("/dev/video", ""))
|
||||
possible_camera_ids.append(camera_idx)
|
||||
possible_ports = [str(port) for port in Path("/dev").glob("video*")]
|
||||
ports = _find_cameras(possible_ports, mock=mock)
|
||||
for port in ports:
|
||||
cameras.append(
|
||||
{
|
||||
"port": port,
|
||||
"index": int(port.removeprefix("/dev/video")),
|
||||
}
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"Mac or Windows detected. Finding available camera indices through "
|
||||
f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
|
||||
)
|
||||
possible_camera_ids = range(max_index_search_range)
|
||||
possible_indices = range(max_index_search_range)
|
||||
indices = _find_cameras(possible_indices, mock=mock)
|
||||
for index in indices:
|
||||
cameras.append(
|
||||
{
|
||||
"port": None,
|
||||
"index": index,
|
||||
}
|
||||
)
|
||||
|
||||
return cameras
|
||||
|
||||
|
||||
def _find_cameras(
|
||||
possible_camera_ids: list[int | str], raise_when_empty=False, mock=False
|
||||
) -> list[int | str]:
|
||||
if mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
camera_ids = []
|
||||
for camera_idx in possible_camera_ids:
|
||||
@@ -65,6 +89,92 @@ def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENC
|
||||
return camera_ids
|
||||
|
||||
|
||||
def is_valid_unix_path(path: str) -> bool:
|
||||
"""Note: if 'path' points to a symlink, this will return True only if the target exists"""
|
||||
p = Path(path)
|
||||
return p.is_absolute() and p.exists()
|
||||
|
||||
|
||||
def get_camera_index_from_unix_port(port: Path) -> int:
|
||||
return int(str(port.resolve()).removeprefix("/dev/video"))
|
||||
|
||||
|
||||
def save_image(img_array, camera_index, frame_index, images_dir):
|
||||
img = Image.fromarray(img_array)
|
||||
path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(str(path), quality=100)
|
||||
|
||||
|
||||
def save_images_from_cameras(
|
||||
images_dir: Path,
|
||||
camera_ids: list | None = None,
|
||||
fps=None,
|
||||
width=None,
|
||||
height=None,
|
||||
record_time_s=2,
|
||||
mock=False,
|
||||
):
|
||||
"""
|
||||
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
|
||||
associated to a given camera index.
|
||||
"""
|
||||
if camera_ids is None or len(camera_ids) == 0:
|
||||
camera_infos = find_cameras(mock=mock)
|
||||
camera_ids = [cam["index"] for cam in camera_infos]
|
||||
|
||||
print("Connecting cameras")
|
||||
cameras = []
|
||||
for cam_idx in camera_ids:
|
||||
camera = OpenCVCamera(cam_idx, fps=fps, width=width, height=height, mock=mock)
|
||||
camera.connect()
|
||||
print(
|
||||
f"OpenCVCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, "
|
||||
f"height={camera.height}, color_mode={camera.color_mode})"
|
||||
)
|
||||
cameras.append(camera)
|
||||
|
||||
images_dir = Path(images_dir)
|
||||
if images_dir.exists():
|
||||
shutil.rmtree(
|
||||
images_dir,
|
||||
)
|
||||
images_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Saving images to {images_dir}")
|
||||
frame_index = 0
|
||||
start_time = time.perf_counter()
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
while True:
|
||||
now = time.perf_counter()
|
||||
|
||||
for camera in cameras:
|
||||
# If we use async_read when fps is None, the loop will go full speed, and we will endup
|
||||
# saving the same images from the cameras multiple times until the RAM/disk is full.
|
||||
image = camera.read() if fps is None else camera.async_read()
|
||||
|
||||
executor.submit(
|
||||
save_image,
|
||||
image,
|
||||
camera.camera_index,
|
||||
frame_index,
|
||||
images_dir,
|
||||
)
|
||||
|
||||
if fps is not None:
|
||||
dt_s = time.perf_counter() - now
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
|
||||
|
||||
if time.perf_counter() - start_time > record_time_s:
|
||||
break
|
||||
|
||||
frame_index += 1
|
||||
|
||||
print(f"Images have been saved to {images_dir}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenCVCameraConfig:
|
||||
"""
|
||||
@@ -82,6 +192,8 @@ class OpenCVCameraConfig:
|
||||
width: int | None = None
|
||||
height: int | None = None
|
||||
color_mode: str = "rgb"
|
||||
rotation: int | None = None
|
||||
mock: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
if self.color_mode not in ["rgb", "bgr"]:
|
||||
@@ -89,6 +201,9 @@ class OpenCVCameraConfig:
|
||||
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
|
||||
)
|
||||
|
||||
if self.rotation not in [-90, None, 90, 180]:
|
||||
raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
|
||||
|
||||
|
||||
class OpenCVCamera:
|
||||
"""
|
||||
@@ -131,7 +246,7 @@ class OpenCVCamera:
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, camera_index: int, config: OpenCVCameraConfig | None = None, **kwargs):
|
||||
def __init__(self, camera_index: int | str, config: OpenCVCameraConfig | None = None, **kwargs):
|
||||
if config is None:
|
||||
config = OpenCVCameraConfig()
|
||||
|
||||
@@ -139,10 +254,24 @@ class OpenCVCamera:
|
||||
config = replace(config, **kwargs)
|
||||
|
||||
self.camera_index = camera_index
|
||||
self.port = None
|
||||
|
||||
# Linux uses ports for connecting to cameras
|
||||
if platform.system() == "Linux":
|
||||
if isinstance(self.camera_index, int):
|
||||
self.port = Path(f"/dev/video{self.camera_index}")
|
||||
elif isinstance(self.camera_index, str) and is_valid_unix_path(self.camera_index):
|
||||
self.port = Path(self.camera_index)
|
||||
# Retrieve the camera index from a potentially symlinked path
|
||||
self.camera_index = get_camera_index_from_unix_port(self.port)
|
||||
else:
|
||||
raise ValueError(f"Please check the provided camera_index: {camera_index}")
|
||||
|
||||
self.fps = config.fps
|
||||
self.width = config.width
|
||||
self.height = config.height
|
||||
self.color_mode = config.color_mode
|
||||
self.mock = config.mock
|
||||
|
||||
self.camera = None
|
||||
self.is_connected = False
|
||||
@@ -151,43 +280,60 @@ class OpenCVCamera:
|
||||
self.color_image = None
|
||||
self.logs = {}
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
# TODO(aliberts): Do we keep original width/height or do we define them after rotation?
|
||||
self.rotation = None
|
||||
if config.rotation == -90:
|
||||
self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
|
||||
elif config.rotation == 90:
|
||||
self.rotation = cv2.ROTATE_90_CLOCKWISE
|
||||
elif config.rotation == 180:
|
||||
self.rotation = cv2.ROTATE_180
|
||||
|
||||
def connect(self):
|
||||
if self.is_connected:
|
||||
raise RobotDeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
# Use 1 thread to avoid blocking the main thread. Especially useful during data collection
|
||||
# when other threads are used to save the images.
|
||||
cv2.setNumThreads(1)
|
||||
|
||||
camera_idx = f"/dev/video{self.camera_index}" if platform.system() == "Linux" else self.camera_index
|
||||
# First create a temporary camera trying to access `camera_index`,
|
||||
# and verify it is a valid camera by calling `isOpened`.
|
||||
|
||||
if platform.system() == "Linux":
|
||||
# Linux uses ports for connecting to cameras
|
||||
tmp_camera = cv2.VideoCapture(f"/dev/video{self.camera_index}")
|
||||
else:
|
||||
tmp_camera = cv2.VideoCapture(self.camera_index)
|
||||
|
||||
tmp_camera = cv2.VideoCapture(camera_idx)
|
||||
is_camera_open = tmp_camera.isOpened()
|
||||
# Release camera to make it accessible for `find_camera_indices`
|
||||
tmp_camera.release()
|
||||
del tmp_camera
|
||||
|
||||
# If the camera doesn't work, display the camera indices corresponding to
|
||||
# valid cameras.
|
||||
if not is_camera_open:
|
||||
# Verify that the provided `camera_index` is valid before printing the traceback
|
||||
available_cam_ids = find_camera_indices()
|
||||
cameras_info = find_cameras()
|
||||
available_cam_ids = [cam["index"] for cam in cameras_info]
|
||||
if self.camera_index not in available_cam_ids:
|
||||
raise ValueError(
|
||||
f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
|
||||
"To find the camera index you should use, run `python lerobot/scripts/save_images_from_cameras.py --driver opencv`."
|
||||
)
|
||||
|
||||
raise OSError(f"Can't access OpenCVCamera({self.camera_index}).")
|
||||
raise OSError(f"Can't access OpenCVCamera({camera_idx}).")
|
||||
|
||||
# Secondly, create the camera that will be used downstream.
|
||||
# Note: For some unknown reason, calling `isOpened` blocks the camera which then
|
||||
# needs to be re-created.
|
||||
if platform.system() == "Linux":
|
||||
self.camera = cv2.VideoCapture(f"/dev/video{self.camera_index}")
|
||||
else:
|
||||
self.camera = cv2.VideoCapture(self.camera_index)
|
||||
self.camera = cv2.VideoCapture(camera_idx)
|
||||
|
||||
if self.fps is not None:
|
||||
self.camera.set(cv2.CAP_PROP_FPS, self.fps)
|
||||
@@ -200,22 +346,24 @@ class OpenCVCamera:
|
||||
actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
|
||||
actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
||||
|
||||
# Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
|
||||
if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
|
||||
# Using `OSError` since it's a broad that encompasses issues related to device communication
|
||||
raise OSError(
|
||||
f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
|
||||
)
|
||||
if self.width is not None and self.width != actual_width:
|
||||
if self.width is not None and not math.isclose(self.width, actual_width, rel_tol=1e-3):
|
||||
raise OSError(
|
||||
f"Can't set {self.width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
|
||||
)
|
||||
if self.height is not None and self.height != actual_height:
|
||||
if self.height is not None and not math.isclose(self.height, actual_height, rel_tol=1e-3):
|
||||
raise OSError(
|
||||
f"Can't set {self.height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
|
||||
)
|
||||
|
||||
self.fps = actual_fps
|
||||
self.width = actual_width
|
||||
self.height = actual_height
|
||||
self.fps = round(actual_fps)
|
||||
self.width = round(actual_width)
|
||||
self.height = round(actual_height)
|
||||
|
||||
self.is_connected = True
|
||||
|
||||
@@ -234,6 +382,7 @@ class OpenCVCamera:
|
||||
start_time = time.perf_counter()
|
||||
|
||||
ret, color_image = self.camera.read()
|
||||
|
||||
if not ret:
|
||||
raise OSError(f"Can't capture color image from camera {self.camera_index}.")
|
||||
|
||||
@@ -248,6 +397,11 @@ class OpenCVCamera:
|
||||
# However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
|
||||
# so we convert the image color from BGR to RGB.
|
||||
if requested_color_mode == "rgb":
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
h, w, _ = color_image.shape
|
||||
@@ -256,17 +410,25 @@ class OpenCVCamera:
|
||||
f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
|
||||
)
|
||||
|
||||
if self.rotation is not None:
|
||||
color_image = cv2.rotate(color_image, self.rotation)
|
||||
|
||||
# log the number of seconds it took to read the image
|
||||
self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
|
||||
|
||||
# log the utc time at which the image was received
|
||||
self.logs["timestamp_utc"] = capture_timestamp_utc()
|
||||
|
||||
self.color_image = color_image
|
||||
|
||||
return color_image
|
||||
|
||||
def read_loop(self):
|
||||
while self.stop_event is None or not self.stop_event.is_set():
|
||||
self.color_image = self.read()
|
||||
while not self.stop_event.is_set():
|
||||
try:
|
||||
self.color_image = self.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading in thread: {e}")
|
||||
|
||||
def async_read(self):
|
||||
if not self.is_connected:
|
||||
@@ -281,15 +443,14 @@ class OpenCVCamera:
|
||||
self.thread.start()
|
||||
|
||||
num_tries = 0
|
||||
while self.color_image is None:
|
||||
num_tries += 1
|
||||
time.sleep(1 / self.fps)
|
||||
if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
|
||||
raise Exception(
|
||||
"The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called."
|
||||
)
|
||||
while True:
|
||||
if self.color_image is not None:
|
||||
return self.color_image
|
||||
|
||||
return self.color_image
|
||||
time.sleep(1 / self.fps)
|
||||
num_tries += 1
|
||||
if num_tries > self.fps * 2:
|
||||
raise TimeoutError("Timed out waiting for async_read() to start.")
|
||||
|
||||
def disconnect(self):
|
||||
if not self.is_connected:
|
||||
@@ -297,18 +458,61 @@ class OpenCVCamera:
|
||||
f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.thread is not None and self.thread.is_alive():
|
||||
# wait for the thread to finish
|
||||
if self.thread is not None:
|
||||
self.stop_event.set()
|
||||
self.thread.join()
|
||||
self.thread.join() # wait for the thread to finish
|
||||
self.thread = None
|
||||
self.stop_event = None
|
||||
|
||||
self.camera.release()
|
||||
self.camera = None
|
||||
|
||||
self.is_connected = False
|
||||
|
||||
def __del__(self):
|
||||
if getattr(self, "is_connected", False):
|
||||
self.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Save a few frames using `OpenCVCamera` for all cameras connected to the computer, or a selected subset."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--camera-ids",
|
||||
type=int,
|
||||
nargs="*",
|
||||
default=None,
|
||||
help="List of camera indices used to instantiate the `OpenCVCamera`. If not provided, find and use all available camera indices.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fps",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--width",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Set the width for all cameras. If not provided, use the default width of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--height",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Set the height for all cameras. If not provided, use the default height of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--images-dir",
|
||||
type=Path,
|
||||
default="outputs/images_from_opencv_cameras",
|
||||
help="Set directory to save a few frames for each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--record-time-s",
|
||||
type=float,
|
||||
default=4.0,
|
||||
help="Set the number of seconds used to record the frames. By default, 2 seconds.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
save_images_from_cameras(**vars(args))
|
||||
|
||||
@@ -1,55 +1,8 @@
|
||||
from pathlib import Path
|
||||
from typing import Protocol
|
||||
|
||||
import cv2
|
||||
import einops
|
||||
import numpy as np
|
||||
|
||||
|
||||
def write_shape_on_image_inplace(image):
|
||||
height, width = image.shape[:2]
|
||||
text = f"Width: {width} Height: {height}"
|
||||
|
||||
# Define the font, scale, color, and thickness
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 1
|
||||
color = (255, 0, 0) # Blue in BGR
|
||||
thickness = 2
|
||||
|
||||
position = (10, height - 10) # 10 pixels from the bottom-left corner
|
||||
cv2.putText(image, text, position, font, font_scale, color, thickness)
|
||||
|
||||
|
||||
def save_color_image(image, path, write_shape=False):
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if write_shape:
|
||||
write_shape_on_image_inplace(image)
|
||||
cv2.imwrite(str(path), image)
|
||||
|
||||
|
||||
def save_depth_image(depth, path, write_shape=False):
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Apply colormap on depth image (image must be converted to 8-bit per pixel first)
|
||||
depth_image = cv2.applyColorMap(cv2.convertScaleAbs(depth, alpha=0.03), cv2.COLORMAP_JET)
|
||||
|
||||
if write_shape:
|
||||
write_shape_on_image_inplace(depth_image)
|
||||
cv2.imwrite(str(path), depth_image)
|
||||
|
||||
|
||||
def convert_torch_image_to_cv2(tensor, rgb_to_bgr=True):
|
||||
assert tensor.ndim == 3
|
||||
c, h, w = tensor.shape
|
||||
assert c < h and c < w
|
||||
color_image = einops.rearrange(tensor, "c h w -> h w c").numpy()
|
||||
if rgb_to_bgr:
|
||||
color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
|
||||
return color_image
|
||||
|
||||
|
||||
# Defines a camera type
|
||||
class Camera(Protocol):
|
||||
def connect(self): ...
|
||||
|
||||
327
lerobot/common/robot_devices/control_utils.py
Normal file
327
lerobot/common/robot_devices/control_utils.py
Normal file
@@ -0,0 +1,327 @@
|
||||
########################################################################################
|
||||
# Utilities
|
||||
########################################################################################
|
||||
|
||||
|
||||
import logging
|
||||
import time
|
||||
import traceback
|
||||
from contextlib import nullcontext
|
||||
from copy import copy
|
||||
from functools import cache
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import tqdm
|
||||
from termcolor import colored
|
||||
|
||||
from lerobot.common.datasets.populate_dataset import add_frame, safe_stop_image_writer
|
||||
from lerobot.common.policies.factory import make_policy
|
||||
from lerobot.common.robot_devices.robots.utils import Robot
|
||||
from lerobot.common.robot_devices.utils import busy_wait
|
||||
from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, set_global_seed
|
||||
from lerobot.scripts.eval import get_pretrained_policy_path
|
||||
|
||||
|
||||
def log_control_info(robot: Robot, dt_s, episode_index=None, frame_index=None, fps=None):
|
||||
log_items = []
|
||||
if episode_index is not None:
|
||||
log_items.append(f"ep:{episode_index}")
|
||||
if frame_index is not None:
|
||||
log_items.append(f"frame:{frame_index}")
|
||||
|
||||
def log_dt(shortname, dt_val_s):
|
||||
nonlocal log_items, fps
|
||||
info_str = f"{shortname}:{dt_val_s * 1000:5.2f} ({1/ dt_val_s:3.1f}hz)"
|
||||
if fps is not None:
|
||||
actual_fps = 1 / dt_val_s
|
||||
if actual_fps < fps - 1:
|
||||
info_str = colored(info_str, "yellow")
|
||||
log_items.append(info_str)
|
||||
|
||||
# total step time displayed in milliseconds and its frequency
|
||||
log_dt("dt", dt_s)
|
||||
|
||||
# TODO(aliberts): move robot-specific logs logic in robot.print_logs()
|
||||
if not robot.robot_type.startswith("stretch"):
|
||||
for name in robot.leader_arms:
|
||||
key = f"read_leader_{name}_pos_dt_s"
|
||||
if key in robot.logs:
|
||||
log_dt("dtRlead", robot.logs[key])
|
||||
|
||||
for name in robot.follower_arms:
|
||||
key = f"write_follower_{name}_goal_pos_dt_s"
|
||||
if key in robot.logs:
|
||||
log_dt("dtWfoll", robot.logs[key])
|
||||
|
||||
key = f"read_follower_{name}_pos_dt_s"
|
||||
if key in robot.logs:
|
||||
log_dt("dtRfoll", robot.logs[key])
|
||||
|
||||
for name in robot.cameras:
|
||||
key = f"read_camera_{name}_dt_s"
|
||||
if key in robot.logs:
|
||||
log_dt(f"dtR{name}", robot.logs[key])
|
||||
|
||||
info_str = " ".join(log_items)
|
||||
logging.info(info_str)
|
||||
|
||||
|
||||
@cache
|
||||
def is_headless():
|
||||
"""Detects if python is running without a monitor."""
|
||||
try:
|
||||
import pynput # noqa
|
||||
|
||||
return False
|
||||
except Exception:
|
||||
print(
|
||||
"Error trying to import pynput. Switching to headless mode. "
|
||||
"As a result, the video stream from the cameras won't be shown, "
|
||||
"and you won't be able to change the control flow with keyboards. "
|
||||
"For more info, see traceback below.\n"
|
||||
)
|
||||
traceback.print_exc()
|
||||
print()
|
||||
return True
|
||||
|
||||
|
||||
def has_method(_object: object, method_name: str):
|
||||
return hasattr(_object, method_name) and callable(getattr(_object, method_name))
|
||||
|
||||
|
||||
def predict_action(observation, policy, device, use_amp):
|
||||
observation = copy(observation)
|
||||
with (
|
||||
torch.inference_mode(),
|
||||
torch.autocast(device_type=device.type) if device.type == "cuda" and use_amp else nullcontext(),
|
||||
):
|
||||
# Convert to pytorch format: channel first and float32 in [0,1] with batch dimension
|
||||
for name in observation:
|
||||
if "image" in name:
|
||||
observation[name] = observation[name].type(torch.float32) / 255
|
||||
observation[name] = observation[name].permute(2, 0, 1).contiguous()
|
||||
observation[name] = observation[name].unsqueeze(0)
|
||||
observation[name] = observation[name].to(device)
|
||||
|
||||
# Compute the next action with the policy
|
||||
# based on the current observation
|
||||
action = policy.select_action(observation)
|
||||
|
||||
# Remove batch dimension
|
||||
action = action.squeeze(0)
|
||||
|
||||
# Move to cpu, if not already the case
|
||||
action = action.to("cpu")
|
||||
|
||||
return action
|
||||
|
||||
|
||||
def init_keyboard_listener():
|
||||
# Allow to exit early while recording an episode or resetting the environment,
|
||||
# by tapping the right arrow key '->'. This might require a sudo permission
|
||||
# to allow your terminal to monitor keyboard events.
|
||||
events = {}
|
||||
events["exit_early"] = False
|
||||
events["rerecord_episode"] = False
|
||||
events["stop_recording"] = False
|
||||
|
||||
if is_headless():
|
||||
logging.warning(
|
||||
"Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
|
||||
)
|
||||
listener = None
|
||||
return listener, events
|
||||
|
||||
# Only import pynput if not in a headless environment
|
||||
from pynput import keyboard
|
||||
|
||||
def on_press(key):
|
||||
try:
|
||||
if key == keyboard.Key.right:
|
||||
print("Right arrow key pressed. Exiting loop...")
|
||||
events["exit_early"] = True
|
||||
elif key == keyboard.Key.left:
|
||||
print("Left arrow key pressed. Exiting loop and rerecord the last episode...")
|
||||
events["rerecord_episode"] = True
|
||||
events["exit_early"] = True
|
||||
elif key == keyboard.Key.esc:
|
||||
print("Escape key pressed. Stopping data recording...")
|
||||
events["stop_recording"] = True
|
||||
events["exit_early"] = True
|
||||
except Exception as e:
|
||||
print(f"Error handling key press: {e}")
|
||||
|
||||
listener = keyboard.Listener(on_press=on_press)
|
||||
listener.start()
|
||||
|
||||
return listener, events
|
||||
|
||||
|
||||
def init_policy(pretrained_policy_name_or_path, policy_overrides):
|
||||
"""Instantiate the policy and load fps, device and use_amp from config yaml"""
|
||||
pretrained_policy_path = get_pretrained_policy_path(pretrained_policy_name_or_path)
|
||||
hydra_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", policy_overrides)
|
||||
policy = make_policy(hydra_cfg=hydra_cfg, pretrained_policy_name_or_path=pretrained_policy_path)
|
||||
|
||||
# Check device is available
|
||||
device = get_safe_torch_device(hydra_cfg.device, log=True)
|
||||
use_amp = hydra_cfg.use_amp
|
||||
policy_fps = hydra_cfg.env.fps
|
||||
|
||||
policy.eval()
|
||||
policy.to(device)
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
set_global_seed(hydra_cfg.seed)
|
||||
return policy, policy_fps, device, use_amp
|
||||
|
||||
|
||||
def warmup_record(
|
||||
robot,
|
||||
events,
|
||||
enable_teloperation,
|
||||
warmup_time_s,
|
||||
display_cameras,
|
||||
fps,
|
||||
):
|
||||
control_loop(
|
||||
robot=robot,
|
||||
control_time_s=warmup_time_s,
|
||||
display_cameras=display_cameras,
|
||||
events=events,
|
||||
fps=fps,
|
||||
teleoperate=enable_teloperation,
|
||||
)
|
||||
|
||||
|
||||
def record_episode(
|
||||
robot,
|
||||
dataset,
|
||||
events,
|
||||
episode_time_s,
|
||||
display_cameras,
|
||||
policy,
|
||||
device,
|
||||
use_amp,
|
||||
fps,
|
||||
):
|
||||
control_loop(
|
||||
robot=robot,
|
||||
control_time_s=episode_time_s,
|
||||
display_cameras=display_cameras,
|
||||
dataset=dataset,
|
||||
events=events,
|
||||
policy=policy,
|
||||
device=device,
|
||||
use_amp=use_amp,
|
||||
fps=fps,
|
||||
teleoperate=policy is None,
|
||||
)
|
||||
|
||||
|
||||
@safe_stop_image_writer
|
||||
def control_loop(
|
||||
robot,
|
||||
control_time_s,
|
||||
teleoperate=False,
|
||||
display_cameras=False,
|
||||
dataset=None,
|
||||
events=None,
|
||||
policy=None,
|
||||
device=None,
|
||||
use_amp=None,
|
||||
fps=None,
|
||||
):
|
||||
# TODO(rcadene): Add option to record logs
|
||||
if not robot.is_connected:
|
||||
robot.connect()
|
||||
|
||||
if events is None:
|
||||
events = {"exit_early": False}
|
||||
|
||||
if teleoperate and policy is not None:
|
||||
raise ValueError("When `teleoperate` is True, `policy` should be None.")
|
||||
|
||||
if dataset is not None and fps is not None and dataset["fps"] != fps:
|
||||
raise ValueError(f"The dataset fps should be equal to requested fps ({dataset['fps']} != {fps}).")
|
||||
|
||||
timestamp = 0
|
||||
start_episode_t = time.perf_counter()
|
||||
while timestamp < control_time_s:
|
||||
start_loop_t = time.perf_counter()
|
||||
|
||||
if teleoperate:
|
||||
observation, action = robot.teleop_step(record_data=True)
|
||||
else:
|
||||
observation = robot.capture_observation()
|
||||
|
||||
if policy is not None:
|
||||
pred_action = predict_action(observation, policy, device, use_amp)
|
||||
# Action can eventually be clipped using `max_relative_target`,
|
||||
# so action actually sent is saved in the dataset.
|
||||
action = robot.send_action(pred_action)
|
||||
action = {"action": action}
|
||||
|
||||
if dataset is not None:
|
||||
add_frame(dataset, observation, action)
|
||||
|
||||
if display_cameras and not is_headless():
|
||||
image_keys = [key for key in observation if "image" in key]
|
||||
for key in image_keys:
|
||||
cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
|
||||
cv2.waitKey(1)
|
||||
|
||||
if fps is not None:
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
log_control_info(robot, dt_s, fps=fps)
|
||||
|
||||
timestamp = time.perf_counter() - start_episode_t
|
||||
if events["exit_early"]:
|
||||
events["exit_early"] = False
|
||||
break
|
||||
|
||||
|
||||
def reset_environment(robot, events, reset_time_s):
|
||||
# TODO(rcadene): refactor warmup_record and reset_environment
|
||||
# TODO(alibets): allow for teleop during reset
|
||||
if has_method(robot, "teleop_safety_stop"):
|
||||
robot.teleop_safety_stop()
|
||||
|
||||
timestamp = 0
|
||||
start_vencod_t = time.perf_counter()
|
||||
|
||||
# Wait if necessary
|
||||
with tqdm.tqdm(total=reset_time_s, desc="Waiting") as pbar:
|
||||
while timestamp < reset_time_s:
|
||||
time.sleep(1)
|
||||
timestamp = time.perf_counter() - start_vencod_t
|
||||
pbar.update(1)
|
||||
if events["exit_early"]:
|
||||
events["exit_early"] = False
|
||||
break
|
||||
|
||||
|
||||
def stop_recording(robot, listener, display_cameras):
|
||||
robot.disconnect()
|
||||
|
||||
if not is_headless():
|
||||
if listener is not None:
|
||||
listener.stop()
|
||||
|
||||
if display_cameras:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def sanity_check_dataset_name(repo_id, policy):
|
||||
_, dataset_name = repo_id.split("/")
|
||||
# either repo_id doesnt start with "eval_" and there is no policy
|
||||
# or repo_id starts with "eval_" and there is a policy
|
||||
if dataset_name.startswith("eval_") == (policy is None):
|
||||
raise ValueError(
|
||||
f"Your dataset name begins by 'eval_' ({dataset_name}) but no policy is provided ({policy})."
|
||||
)
|
||||
@@ -7,17 +7,6 @@ from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
import tqdm
|
||||
from dynamixel_sdk import (
|
||||
COMM_SUCCESS,
|
||||
DXL_HIBYTE,
|
||||
DXL_HIWORD,
|
||||
DXL_LOBYTE,
|
||||
DXL_LOWORD,
|
||||
GroupSyncRead,
|
||||
GroupSyncWrite,
|
||||
PacketHandler,
|
||||
PortHandler,
|
||||
)
|
||||
|
||||
from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
|
||||
from lerobot.common.utils.utils import capture_timestamp_utc
|
||||
@@ -165,24 +154,29 @@ def convert_degrees_to_steps(degrees: float | np.ndarray, models: str | list[str
|
||||
return steps
|
||||
|
||||
|
||||
def convert_to_bytes(value, bytes):
|
||||
def convert_to_bytes(value, bytes, mock=False):
|
||||
if mock:
|
||||
return value
|
||||
|
||||
import dynamixel_sdk as dxl
|
||||
|
||||
# Note: No need to convert back into unsigned int, since this byte preprocessing
|
||||
# already handles it for us.
|
||||
if bytes == 1:
|
||||
data = [
|
||||
DXL_LOBYTE(DXL_LOWORD(value)),
|
||||
dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
|
||||
]
|
||||
elif bytes == 2:
|
||||
data = [
|
||||
DXL_LOBYTE(DXL_LOWORD(value)),
|
||||
DXL_HIBYTE(DXL_LOWORD(value)),
|
||||
dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
|
||||
dxl.DXL_HIBYTE(dxl.DXL_LOWORD(value)),
|
||||
]
|
||||
elif bytes == 4:
|
||||
data = [
|
||||
DXL_LOBYTE(DXL_LOWORD(value)),
|
||||
DXL_HIBYTE(DXL_LOWORD(value)),
|
||||
DXL_LOBYTE(DXL_HIWORD(value)),
|
||||
DXL_HIBYTE(DXL_HIWORD(value)),
|
||||
dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
|
||||
dxl.DXL_HIBYTE(dxl.DXL_LOWORD(value)),
|
||||
dxl.DXL_LOBYTE(dxl.DXL_HIWORD(value)),
|
||||
dxl.DXL_HIBYTE(dxl.DXL_HIWORD(value)),
|
||||
]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
@@ -303,9 +297,11 @@ class DynamixelMotorsBus:
|
||||
motors: dict[str, tuple[int, str]],
|
||||
extra_model_control_table: dict[str, list[tuple]] | None = None,
|
||||
extra_model_resolution: dict[str, int] | None = None,
|
||||
mock=False,
|
||||
):
|
||||
self.port = port
|
||||
self.motors = motors
|
||||
self.mock = mock
|
||||
|
||||
self.model_ctrl_table = deepcopy(MODEL_CONTROL_TABLE)
|
||||
if extra_model_control_table:
|
||||
@@ -329,8 +325,13 @@ class DynamixelMotorsBus:
|
||||
f"DynamixelMotorsBus({self.port}) is already connected. Do not call `motors_bus.connect()` twice."
|
||||
)
|
||||
|
||||
self.port_handler = PortHandler(self.port)
|
||||
self.packet_handler = PacketHandler(PROTOCOL_VERSION)
|
||||
if self.mock:
|
||||
import tests.mock_dynamixel_sdk as dxl
|
||||
else:
|
||||
import dynamixel_sdk as dxl
|
||||
|
||||
self.port_handler = dxl.PortHandler(self.port)
|
||||
self.packet_handler = dxl.PacketHandler(PROTOCOL_VERSION)
|
||||
|
||||
try:
|
||||
if not self.port_handler.openPort():
|
||||
@@ -348,10 +349,17 @@ class DynamixelMotorsBus:
|
||||
self.port_handler.setPacketTimeoutMillis(TIMEOUT_MS)
|
||||
|
||||
def reconnect(self):
|
||||
self.port_handler = PortHandler(self.port)
|
||||
self.packet_handler = PacketHandler(PROTOCOL_VERSION)
|
||||
if self.mock:
|
||||
import tests.mock_dynamixel_sdk as dxl
|
||||
else:
|
||||
import dynamixel_sdk as dxl
|
||||
|
||||
self.port_handler = dxl.PortHandler(self.port)
|
||||
self.packet_handler = dxl.PacketHandler(PROTOCOL_VERSION)
|
||||
|
||||
if not self.port_handler.openPort():
|
||||
raise OSError(f"Failed to open port '{self.port}'.")
|
||||
|
||||
self.is_connected = True
|
||||
|
||||
def are_motors_configured(self):
|
||||
@@ -631,6 +639,11 @@ class DynamixelMotorsBus:
|
||||
return values
|
||||
|
||||
def read_with_motor_ids(self, motor_models, motor_ids, data_name):
|
||||
if self.mock:
|
||||
import tests.mock_dynamixel_sdk as dxl
|
||||
else:
|
||||
import dynamixel_sdk as dxl
|
||||
|
||||
return_list = True
|
||||
if not isinstance(motor_ids, list):
|
||||
return_list = False
|
||||
@@ -638,12 +651,12 @@ class DynamixelMotorsBus:
|
||||
|
||||
assert_same_address(self.model_ctrl_table, self.motor_models, data_name)
|
||||
addr, bytes = self.model_ctrl_table[motor_models[0]][data_name]
|
||||
group = GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
|
||||
group = dxl.GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
|
||||
for idx in motor_ids:
|
||||
group.addParam(idx)
|
||||
|
||||
comm = group.txRxPacket()
|
||||
if comm != COMM_SUCCESS:
|
||||
if comm != dxl.COMM_SUCCESS:
|
||||
raise ConnectionError(
|
||||
f"Read failed due to communication error on port {self.port_handler.port_name} for indices {motor_ids}: "
|
||||
f"{self.packet_handler.getTxRxResult(comm)}"
|
||||
@@ -667,6 +680,11 @@ class DynamixelMotorsBus:
|
||||
|
||||
start_time = time.perf_counter()
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_dynamixel_sdk as dxl
|
||||
else:
|
||||
import dynamixel_sdk as dxl
|
||||
|
||||
if motor_names is None:
|
||||
motor_names = self.motor_names
|
||||
|
||||
@@ -686,16 +704,18 @@ class DynamixelMotorsBus:
|
||||
|
||||
if data_name not in self.group_readers:
|
||||
# create new group reader
|
||||
self.group_readers[group_key] = GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
|
||||
self.group_readers[group_key] = dxl.GroupSyncRead(
|
||||
self.port_handler, self.packet_handler, addr, bytes
|
||||
)
|
||||
for idx in motor_ids:
|
||||
self.group_readers[group_key].addParam(idx)
|
||||
|
||||
for _ in range(NUM_READ_RETRY):
|
||||
comm = self.group_readers[group_key].txRxPacket()
|
||||
if comm == COMM_SUCCESS:
|
||||
if comm == dxl.COMM_SUCCESS:
|
||||
break
|
||||
|
||||
if comm != COMM_SUCCESS:
|
||||
if comm != dxl.COMM_SUCCESS:
|
||||
raise ConnectionError(
|
||||
f"Read failed due to communication error on port {self.port} for group_key {group_key}: "
|
||||
f"{self.packet_handler.getTxRxResult(comm)}"
|
||||
@@ -726,6 +746,11 @@ class DynamixelMotorsBus:
|
||||
return values
|
||||
|
||||
def write_with_motor_ids(self, motor_models, motor_ids, data_name, values):
|
||||
if self.mock:
|
||||
import tests.mock_dynamixel_sdk as dxl
|
||||
else:
|
||||
import dynamixel_sdk as dxl
|
||||
|
||||
if not isinstance(motor_ids, list):
|
||||
motor_ids = [motor_ids]
|
||||
if not isinstance(values, list):
|
||||
@@ -733,13 +758,13 @@ class DynamixelMotorsBus:
|
||||
|
||||
assert_same_address(self.model_ctrl_table, motor_models, data_name)
|
||||
addr, bytes = self.model_ctrl_table[motor_models[0]][data_name]
|
||||
group = GroupSyncWrite(self.port_handler, self.packet_handler, addr, bytes)
|
||||
group = dxl.GroupSyncWrite(self.port_handler, self.packet_handler, addr, bytes)
|
||||
for idx, value in zip(motor_ids, values, strict=True):
|
||||
data = convert_to_bytes(value, bytes)
|
||||
data = convert_to_bytes(value, bytes, self.mock)
|
||||
group.addParam(idx, data)
|
||||
|
||||
comm = group.txPacket()
|
||||
if comm != COMM_SUCCESS:
|
||||
if comm != dxl.COMM_SUCCESS:
|
||||
raise ConnectionError(
|
||||
f"Write failed due to communication error on port {self.port_handler.port_name} for indices {motor_ids}: "
|
||||
f"{self.packet_handler.getTxRxResult(comm)}"
|
||||
@@ -753,6 +778,11 @@ class DynamixelMotorsBus:
|
||||
|
||||
start_time = time.perf_counter()
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_dynamixel_sdk as dxl
|
||||
else:
|
||||
import dynamixel_sdk as dxl
|
||||
|
||||
if motor_names is None:
|
||||
motor_names = self.motor_names
|
||||
|
||||
@@ -782,19 +812,19 @@ class DynamixelMotorsBus:
|
||||
|
||||
init_group = data_name not in self.group_readers
|
||||
if init_group:
|
||||
self.group_writers[group_key] = GroupSyncWrite(
|
||||
self.group_writers[group_key] = dxl.GroupSyncWrite(
|
||||
self.port_handler, self.packet_handler, addr, bytes
|
||||
)
|
||||
|
||||
for idx, value in zip(motor_ids, values, strict=True):
|
||||
data = convert_to_bytes(value, bytes)
|
||||
data = convert_to_bytes(value, bytes, self.mock)
|
||||
if init_group:
|
||||
self.group_writers[group_key].addParam(idx, data)
|
||||
else:
|
||||
self.group_writers[group_key].changeParam(idx, data)
|
||||
|
||||
comm = self.group_writers[group_key].txPacket()
|
||||
if comm != COMM_SUCCESS:
|
||||
if comm != dxl.COMM_SUCCESS:
|
||||
raise ConnectionError(
|
||||
f"Write failed due to communication error on port {self.port} for group_key {group_key}: "
|
||||
f"{self.packet_handler.getTxRxResult(comm)}"
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import hydra
|
||||
from omegaconf import DictConfig
|
||||
|
||||
from lerobot.common.robot_devices.robots.utils import Robot
|
||||
|
||||
def make_robot(cfg: DictConfig):
|
||||
|
||||
def make_robot(cfg: DictConfig) -> Robot:
|
||||
robot = hydra.utils.instantiate(cfg)
|
||||
return robot
|
||||
|
||||
@@ -350,6 +350,25 @@ class ManipulatorRobot:
|
||||
self.is_connected = False
|
||||
self.logs = {}
|
||||
|
||||
@property
|
||||
def has_camera(self):
|
||||
return len(self.cameras) > 0
|
||||
|
||||
@property
|
||||
def num_cameras(self):
|
||||
return len(self.cameras)
|
||||
|
||||
@property
|
||||
def available_arms(self):
|
||||
available_arms = []
|
||||
for name in self.follower_arms:
|
||||
arm_id = get_arm_id(name, "follower")
|
||||
available_arms.append(arm_id)
|
||||
for name in self.leader_arms:
|
||||
arm_id = get_arm_id(name, "leader")
|
||||
available_arms.append(arm_id)
|
||||
return available_arms
|
||||
|
||||
def connect(self):
|
||||
if self.is_connected:
|
||||
raise RobotDeviceAlreadyConnectedError(
|
||||
@@ -539,8 +558,8 @@ class ManipulatorRobot:
|
||||
self.follower_arms[name].write("P_Coefficient", 32, "shoulder_pan")
|
||||
# self.follower_arms[name].write("D_Coefficient", 230, "shoulder_pan")
|
||||
self.follower_arms[name].write("D_Coefficient", 32, "shoulder_pan")
|
||||
#self.follower_arms[name].write("Acceleration", 254)
|
||||
#self.follower_arms[name].write("Minimum_Startup_Force", 16)
|
||||
# self.follower_arms[name].write("Acceleration", 254)
|
||||
# self.follower_arms[name].write("Minimum_Startup_Force", 16)
|
||||
self.follower_arms[name].write("Lock", 0)
|
||||
self.follower_arms[name].write("Maximum_Acceleration", 250)
|
||||
|
||||
@@ -709,6 +728,10 @@ class ManipulatorRobot:
|
||||
|
||||
return torch.cat(action_sent)
|
||||
|
||||
def print_logs(self):
|
||||
pass
|
||||
# TODO(aliberts): move robot-specific logs logic here
|
||||
|
||||
def disconnect(self):
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
|
||||
216
lerobot/common/robot_devices/robots/stretch.py
Normal file
216
lerobot/common/robot_devices/robots/stretch.py
Normal file
@@ -0,0 +1,216 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field, replace
|
||||
|
||||
import torch
|
||||
from stretch_body.gamepad_teleop import GamePadTeleop
|
||||
from stretch_body.robot import Robot as StretchAPI
|
||||
from stretch_body.robot_params import RobotParams
|
||||
|
||||
from lerobot.common.robot_devices.cameras.utils import Camera
|
||||
|
||||
|
||||
@dataclass
|
||||
class StretchRobotConfig:
|
||||
robot_type: str | None = "stretch"
|
||||
cameras: dict[str, Camera] = field(default_factory=lambda: {})
|
||||
# TODO(aliberts): add feature with max_relative target
|
||||
# TODO(aliberts): add comment on max_relative target
|
||||
max_relative_target: list[float] | float | None = None
|
||||
|
||||
|
||||
class StretchRobot(StretchAPI):
|
||||
"""Wrapper of stretch_body.robot.Robot"""
|
||||
|
||||
def __init__(self, config: StretchRobotConfig | None = None, **kwargs):
|
||||
super().__init__()
|
||||
if config is None:
|
||||
config = StretchRobotConfig()
|
||||
# Overwrite config arguments using kwargs
|
||||
self.config = replace(config, **kwargs)
|
||||
|
||||
self.robot_type = self.config.robot_type
|
||||
self.cameras = self.config.cameras
|
||||
self.is_connected = False
|
||||
self.teleop = None
|
||||
self.logs = {}
|
||||
|
||||
# TODO(aliberts): test this
|
||||
RobotParams.set_logging_level("WARNING")
|
||||
RobotParams.set_logging_formatter("brief_console_formatter")
|
||||
|
||||
self.state_keys = None
|
||||
self.action_keys = None
|
||||
|
||||
def connect(self) -> None:
|
||||
self.is_connected = self.startup()
|
||||
if not self.is_connected:
|
||||
print("Another process is already using Stretch. Try running 'stretch_free_robot_process.py'")
|
||||
raise ConnectionError()
|
||||
|
||||
for name in self.cameras:
|
||||
self.cameras[name].connect()
|
||||
self.is_connected = self.is_connected and self.cameras[name].is_connected
|
||||
|
||||
if not self.is_connected:
|
||||
print("Could not connect to the cameras, check that all cameras are plugged-in.")
|
||||
raise ConnectionError()
|
||||
|
||||
self.run_calibration()
|
||||
|
||||
def run_calibration(self) -> None:
|
||||
if not self.is_homed():
|
||||
self.home()
|
||||
|
||||
def teleop_step(
|
||||
self, record_data=False
|
||||
) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
|
||||
# TODO(aliberts): return ndarrays instead of torch.Tensors
|
||||
if not self.is_connected:
|
||||
raise ConnectionError()
|
||||
|
||||
if self.teleop is None:
|
||||
self.teleop = GamePadTeleop(robot_instance=False)
|
||||
self.teleop.startup(robot=self)
|
||||
|
||||
before_read_t = time.perf_counter()
|
||||
state = self.get_state()
|
||||
action = self.teleop.gamepad_controller.get_state()
|
||||
self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
|
||||
|
||||
before_write_t = time.perf_counter()
|
||||
self.teleop.do_motion(robot=self)
|
||||
self.push_command()
|
||||
self.logs["write_pos_dt_s"] = time.perf_counter() - before_write_t
|
||||
|
||||
if self.state_keys is None:
|
||||
self.state_keys = list(state)
|
||||
|
||||
if not record_data:
|
||||
return
|
||||
|
||||
state = torch.as_tensor(list(state.values()))
|
||||
action = torch.as_tensor(list(action.values()))
|
||||
|
||||
# Capture images from cameras
|
||||
images = {}
|
||||
for name in self.cameras:
|
||||
before_camread_t = time.perf_counter()
|
||||
images[name] = self.cameras[name].async_read()
|
||||
images[name] = torch.from_numpy(images[name])
|
||||
self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
|
||||
self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
|
||||
|
||||
# Populate output dictionnaries
|
||||
obs_dict, action_dict = {}, {}
|
||||
obs_dict["observation.state"] = state
|
||||
action_dict["action"] = action
|
||||
for name in self.cameras:
|
||||
obs_dict[f"observation.images.{name}"] = images[name]
|
||||
|
||||
return obs_dict, action_dict
|
||||
|
||||
def get_state(self) -> dict:
|
||||
status = self.get_status()
|
||||
return {
|
||||
"head_pan.pos": status["head"]["head_pan"]["pos"],
|
||||
"head_tilt.pos": status["head"]["head_tilt"]["pos"],
|
||||
"lift.pos": status["lift"]["pos"],
|
||||
"arm.pos": status["arm"]["pos"],
|
||||
"wrist_pitch.pos": status["end_of_arm"]["wrist_pitch"]["pos"],
|
||||
"wrist_roll.pos": status["end_of_arm"]["wrist_roll"]["pos"],
|
||||
"wrist_yaw.pos": status["end_of_arm"]["wrist_yaw"]["pos"],
|
||||
"gripper.pos": status["end_of_arm"]["stretch_gripper"]["pos"],
|
||||
"base_x.vel": status["base"]["x_vel"],
|
||||
"base_y.vel": status["base"]["y_vel"],
|
||||
"base_theta.vel": status["base"]["theta_vel"],
|
||||
}
|
||||
|
||||
def capture_observation(self) -> dict:
|
||||
# TODO(aliberts): return ndarrays instead of torch.Tensors
|
||||
before_read_t = time.perf_counter()
|
||||
state = self.get_state()
|
||||
self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
|
||||
|
||||
if self.state_keys is None:
|
||||
self.state_keys = list(state)
|
||||
|
||||
state = torch.as_tensor(list(state.values()))
|
||||
|
||||
# Capture images from cameras
|
||||
images = {}
|
||||
for name in self.cameras:
|
||||
before_camread_t = time.perf_counter()
|
||||
images[name] = self.cameras[name].async_read()
|
||||
images[name] = torch.from_numpy(images[name])
|
||||
self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
|
||||
self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
|
||||
|
||||
# Populate output dictionnaries
|
||||
obs_dict = {}
|
||||
obs_dict["observation.state"] = state
|
||||
for name in self.cameras:
|
||||
obs_dict[f"observation.images.{name}"] = images[name]
|
||||
|
||||
return obs_dict
|
||||
|
||||
def send_action(self, action: torch.Tensor) -> torch.Tensor:
|
||||
# TODO(aliberts): return ndarrays instead of torch.Tensors
|
||||
if not self.is_connected:
|
||||
raise ConnectionError()
|
||||
|
||||
if self.teleop is None:
|
||||
self.teleop = GamePadTeleop(robot_instance=False)
|
||||
self.teleop.startup(robot=self)
|
||||
|
||||
if self.action_keys is None:
|
||||
dummy_action = self.teleop.gamepad_controller.get_state()
|
||||
self.action_keys = list(dummy_action.keys())
|
||||
|
||||
action_dict = dict(zip(self.action_keys, action.tolist(), strict=True))
|
||||
|
||||
before_write_t = time.perf_counter()
|
||||
self.teleop.do_motion(state=action_dict, robot=self)
|
||||
self.push_command()
|
||||
self.logs["write_pos_dt_s"] = time.perf_counter() - before_write_t
|
||||
|
||||
# TODO(aliberts): return action_sent when motion is limited
|
||||
return action
|
||||
|
||||
def print_logs(self) -> None:
|
||||
pass
|
||||
# TODO(aliberts): move robot-specific logs logic here
|
||||
|
||||
def teleop_safety_stop(self) -> None:
|
||||
if self.teleop is not None:
|
||||
self.teleop._safety_stop(robot=self)
|
||||
|
||||
def disconnect(self) -> None:
|
||||
self.stop()
|
||||
if self.teleop is not None:
|
||||
self.teleop.gamepad_controller.stop()
|
||||
self.teleop.stop()
|
||||
|
||||
if len(self.cameras) > 0:
|
||||
for cam in self.cameras.values():
|
||||
cam.disconnect()
|
||||
|
||||
self.is_connected = False
|
||||
|
||||
def __del__(self):
|
||||
self.disconnect()
|
||||
@@ -9,8 +9,12 @@ def get_arm_id(name, arm_type):
|
||||
|
||||
|
||||
class Robot(Protocol):
|
||||
def init_teleop(self): ...
|
||||
# TODO(rcadene, aliberts): Add unit test checking the protocol is implemented in the corresponding classes
|
||||
robot_type: str
|
||||
|
||||
def connect(self): ...
|
||||
def run_calibration(self): ...
|
||||
def teleop_step(self, record_data=False): ...
|
||||
def capture_observation(self): ...
|
||||
def send_action(self, action): ...
|
||||
def disconnect(self): ...
|
||||
|
||||
@@ -16,6 +16,20 @@ def busy_wait(seconds):
|
||||
time.sleep(seconds)
|
||||
|
||||
|
||||
def safe_disconnect(func):
|
||||
# TODO(aliberts): Allow to pass custom exceptions
|
||||
# (e.g. ThreadServiceExit, KeyboardInterrupt, SystemExit, UnpluggedError, DynamixelCommError)
|
||||
def wrapper(robot, *args, **kwargs):
|
||||
try:
|
||||
return func(robot, *args, **kwargs)
|
||||
except Exception as e:
|
||||
if robot.is_connected:
|
||||
robot.disconnect()
|
||||
raise e
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class RobotDeviceNotConnectedError(Exception):
|
||||
"""Exception raised when the robot device is not connected."""
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
import logging
|
||||
import os
|
||||
import os.path as osp
|
||||
import platform
|
||||
import random
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timezone
|
||||
@@ -28,6 +29,12 @@ import torch
|
||||
from omegaconf import DictConfig
|
||||
|
||||
|
||||
def none_or_int(value):
|
||||
if value == "None":
|
||||
return None
|
||||
return int(value)
|
||||
|
||||
|
||||
def inside_slurm():
|
||||
"""Check whether the python process was launched through slurm"""
|
||||
# TODO(rcadene): return False for interactive mode `--pty bash`
|
||||
@@ -183,3 +190,30 @@ def print_cuda_memory_usage():
|
||||
|
||||
def capture_timestamp_utc():
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def say(text, blocking=False):
|
||||
# Check if mac, linux, or windows.
|
||||
if platform.system() == "Darwin":
|
||||
cmd = f'say "{text}"'
|
||||
elif platform.system() == "Linux":
|
||||
cmd = f'spd-say "{text}"'
|
||||
elif platform.system() == "Windows":
|
||||
cmd = (
|
||||
'PowerShell -Command "Add-Type -AssemblyName System.Speech; '
|
||||
f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')\""
|
||||
)
|
||||
|
||||
if not blocking and platform.system() in ["Darwin", "Linux"]:
|
||||
# TODO(rcadene): Make it work for Windows
|
||||
# Use the ampersand to run command in the background
|
||||
cmd += " &"
|
||||
|
||||
os.system(cmd)
|
||||
|
||||
|
||||
def log_say(text, play_sounds, blocking=False):
|
||||
logging.info(text)
|
||||
|
||||
if play_sounds:
|
||||
say(text, blocking)
|
||||
|
||||
@@ -120,7 +120,7 @@ eval:
|
||||
# `batch_size` specifies the number of environments to use in a gym.vector.VectorEnv.
|
||||
batch_size: 1
|
||||
# `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
|
||||
use_async_envs: true
|
||||
use_async_envs: false
|
||||
|
||||
wandb:
|
||||
enable: false
|
||||
|
||||
5
lerobot/configs/env/aloha.yaml
vendored
5
lerobot/configs/env/aloha.yaml
vendored
@@ -2,11 +2,6 @@
|
||||
|
||||
fps: 50
|
||||
|
||||
eval:
|
||||
# `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
|
||||
# set it to false to avoid some problems of the aloha env
|
||||
use_async_envs: false
|
||||
|
||||
env:
|
||||
name: aloha
|
||||
task: AlohaInsertion-v0
|
||||
|
||||
10
lerobot/configs/env/aloha_real.yaml
vendored
Normal file
10
lerobot/configs/env/aloha_real.yaml
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# @package _global_
|
||||
|
||||
fps: 30
|
||||
|
||||
env:
|
||||
name: real_world
|
||||
task: null
|
||||
state_dim: 18
|
||||
action_dim: 18
|
||||
fps: ${fps}
|
||||
5
lerobot/configs/env/xarm.yaml
vendored
5
lerobot/configs/env/xarm.yaml
vendored
@@ -2,11 +2,6 @@
|
||||
|
||||
fps: 15
|
||||
|
||||
eval:
|
||||
# `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
|
||||
# set it to false to avoid some problems of the aloha env
|
||||
use_async_envs: false
|
||||
|
||||
env:
|
||||
name: xarm
|
||||
task: XarmLift-v0
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
# @package _global_
|
||||
|
||||
# Use `act_real.yaml` to train on real-world Aloha/Aloha2 datasets.
|
||||
# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, images,
|
||||
# cam_low) instead of 1 camera (i.e. top). Also, `training.eval_freq` is set to -1. This config is used
|
||||
# to evaluate checkpoints at a certain frequency of training steps. When it is set to -1, it deactivates evaluation.
|
||||
# This is because real-world evaluation is done through [dora-lerobot](https://github.com/dora-rs/dora-lerobot).
|
||||
# Look at its README for more information on how to evaluate a checkpoint in the real-world.
|
||||
# Use `act_aloha_real.yaml` to train on real-world datasets collected on Aloha or Aloha-2 robots.
|
||||
# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, cam_high, cam_low) instead of 1 camera (i.e. top).
|
||||
# Also, `training.eval_freq` is set to -1. This config is used to evaluate checkpoints at a certain frequency of training steps.
|
||||
# When it is set to -1, it deactivates evaluation. This is because real-world evaluation is done through our `control_robot.py` script.
|
||||
# Look at the documentation in header of `control_robot.py` for more information on how to collect data , train and evaluate a policy.
|
||||
#
|
||||
# Example of usage for training:
|
||||
# Example of usage for training and inference with `control_robot.py`:
|
||||
# ```bash
|
||||
# python lerobot/scripts/train.py \
|
||||
# policy=act_real \
|
||||
# policy=act_aloha_real \
|
||||
# env=aloha_real
|
||||
# ```
|
||||
#
|
||||
# Example of usage for training and inference with [Dora-rs](https://github.com/dora-rs/dora-lerobot):
|
||||
# ```bash
|
||||
# python lerobot/scripts/train.py \
|
||||
# policy=act_aloha_real \
|
||||
# env=dora_aloha_real
|
||||
# ```
|
||||
|
||||
@@ -36,10 +42,11 @@ override_dataset_stats:
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
|
||||
training:
|
||||
offline_steps: 100000
|
||||
offline_steps: 80000
|
||||
online_steps: 0
|
||||
eval_freq: -1
|
||||
save_freq: 20000
|
||||
save_freq: 10000
|
||||
log_freq: 100
|
||||
save_checkpoint: true
|
||||
|
||||
batch_size: 8
|
||||
@@ -62,7 +69,7 @@ policy:
|
||||
|
||||
# Input / output structure.
|
||||
n_obs_steps: 1
|
||||
chunk_size: 100 # chunk_size
|
||||
chunk_size: 100
|
||||
n_action_steps: 100
|
||||
|
||||
input_shapes:
|
||||
@@ -107,7 +114,7 @@ policy:
|
||||
n_vae_encoder_layers: 4
|
||||
|
||||
# Inference.
|
||||
temporal_ensemble_coeff: null
|
||||
temporal_ensemble_momentum: null
|
||||
|
||||
# Training and loss computation.
|
||||
dropout: 0.1
|
||||
@@ -1,110 +0,0 @@
|
||||
# @package _global_
|
||||
|
||||
# Use `act_real_no_state.yaml` to train on real-world Aloha/Aloha2 datasets when cameras are moving (e.g. wrist cameras)
|
||||
# Compared to `act_real.yaml`, it is camera only and does not use the state as input which is vector of robot joint positions.
|
||||
# We validated experimentaly that not using state reaches better success rate. Our hypothesis is that `act_real.yaml` might
|
||||
# overfits to the state, because the images are more complex to learn from since they are moving.
|
||||
#
|
||||
# Example of usage for training:
|
||||
# ```bash
|
||||
# python lerobot/scripts/train.py \
|
||||
# policy=act_real_no_state \
|
||||
# env=dora_aloha_real
|
||||
# ```
|
||||
|
||||
seed: 1000
|
||||
dataset_repo_id: lerobot/aloha_static_vinh_cup
|
||||
|
||||
override_dataset_stats:
|
||||
observation.images.cam_right_wrist:
|
||||
# stats from imagenet, since we use a pretrained vision model
|
||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
observation.images.cam_left_wrist:
|
||||
# stats from imagenet, since we use a pretrained vision model
|
||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
observation.images.cam_high:
|
||||
# stats from imagenet, since we use a pretrained vision model
|
||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
observation.images.cam_low:
|
||||
# stats from imagenet, since we use a pretrained vision model
|
||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||
|
||||
training:
|
||||
offline_steps: 100000
|
||||
online_steps: 0
|
||||
eval_freq: -1
|
||||
save_freq: 20000
|
||||
save_checkpoint: true
|
||||
|
||||
batch_size: 8
|
||||
lr: 1e-5
|
||||
lr_backbone: 1e-5
|
||||
weight_decay: 1e-4
|
||||
grad_clip_norm: 10
|
||||
online_steps_between_rollouts: 1
|
||||
|
||||
delta_timestamps:
|
||||
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
|
||||
|
||||
eval:
|
||||
n_episodes: 50
|
||||
batch_size: 50
|
||||
|
||||
# See `configuration_act.py` for more details.
|
||||
policy:
|
||||
name: act
|
||||
|
||||
# Input / output structure.
|
||||
n_obs_steps: 1
|
||||
chunk_size: 100 # chunk_size
|
||||
n_action_steps: 100
|
||||
|
||||
input_shapes:
|
||||
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
|
||||
observation.images.cam_right_wrist: [3, 480, 640]
|
||||
observation.images.cam_left_wrist: [3, 480, 640]
|
||||
observation.images.cam_high: [3, 480, 640]
|
||||
observation.images.cam_low: [3, 480, 640]
|
||||
output_shapes:
|
||||
action: ["${env.action_dim}"]
|
||||
|
||||
# Normalization / Unnormalization
|
||||
input_normalization_modes:
|
||||
observation.images.cam_right_wrist: mean_std
|
||||
observation.images.cam_left_wrist: mean_std
|
||||
observation.images.cam_high: mean_std
|
||||
observation.images.cam_low: mean_std
|
||||
output_normalization_modes:
|
||||
action: mean_std
|
||||
|
||||
# Architecture.
|
||||
# Vision backbone.
|
||||
vision_backbone: resnet18
|
||||
pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
|
||||
replace_final_stride_with_dilation: false
|
||||
# Transformer layers.
|
||||
pre_norm: false
|
||||
dim_model: 512
|
||||
n_heads: 8
|
||||
dim_feedforward: 3200
|
||||
feedforward_activation: relu
|
||||
n_encoder_layers: 4
|
||||
# Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code
|
||||
# that means only the first layer is used. Here we match the original implementation by setting this to 1.
|
||||
# See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521.
|
||||
n_decoder_layers: 1
|
||||
# VAE.
|
||||
use_vae: true
|
||||
latent_dim: 32
|
||||
n_vae_encoder_layers: 4
|
||||
|
||||
# Inference.
|
||||
temporal_ensemble_coeff: null
|
||||
|
||||
# Training and loss computation.
|
||||
dropout: 0.1
|
||||
kl_weight: 10.0
|
||||
@@ -91,25 +91,25 @@ follower_arms:
|
||||
cameras:
|
||||
cam_high:
|
||||
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
|
||||
camera_index: 128422271347
|
||||
serial_number: 128422271347
|
||||
fps: 30
|
||||
width: 640
|
||||
height: 480
|
||||
cam_low:
|
||||
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
|
||||
camera_index: 130322270656
|
||||
serial_number: 130322270656
|
||||
fps: 30
|
||||
width: 640
|
||||
height: 480
|
||||
cam_left_wrist:
|
||||
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
|
||||
camera_index: 218622272670
|
||||
serial_number: 218622272670
|
||||
fps: 30
|
||||
width: 640
|
||||
height: 480
|
||||
cam_right_wrist:
|
||||
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
|
||||
camera_index: 130322272300
|
||||
serial_number: 130322272300
|
||||
fps: 30
|
||||
width: 640
|
||||
height: 480
|
||||
|
||||
24
lerobot/configs/robot/stretch.yaml
Normal file
24
lerobot/configs/robot/stretch.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
_target_: lerobot.common.robot_devices.robots.stretch.StretchRobot
|
||||
robot_type: stretch3
|
||||
|
||||
cameras:
|
||||
navigation:
|
||||
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
|
||||
camera_index: /dev/hello-nav-head-camera
|
||||
fps: 10
|
||||
width: 1280
|
||||
height: 720
|
||||
rotation: -90
|
||||
head:
|
||||
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera.init_from_name
|
||||
name: Intel RealSense D435I
|
||||
fps: 30
|
||||
width: 640
|
||||
height: 480
|
||||
rotation: 90
|
||||
wrist:
|
||||
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera.init_from_name
|
||||
name: Intel RealSense D405
|
||||
fps: 30
|
||||
width: 640
|
||||
height: 480
|
||||
@@ -99,161 +99,53 @@ python lerobot/scripts/control_robot.py record \
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import time
|
||||
import traceback
|
||||
from contextlib import nullcontext
|
||||
from functools import cache
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import tqdm
|
||||
from omegaconf import DictConfig
|
||||
from PIL import Image
|
||||
from termcolor import colored
|
||||
from typing import List
|
||||
|
||||
# from safetensors.torch import load_file, save_file
|
||||
from lerobot.common.datasets.compute_stats import compute_stats
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
|
||||
from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import to_hf_dataset
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, get_default_encoding
|
||||
from lerobot.common.datasets.utils import calculate_episode_data_index, create_branch
|
||||
from lerobot.common.datasets.video_utils import encode_video_frames
|
||||
from lerobot.common.policies.factory import make_policy
|
||||
from lerobot.common.robot_devices.robots.factory import make_robot
|
||||
from lerobot.common.robot_devices.robots.utils import Robot, get_arm_id
|
||||
from lerobot.common.robot_devices.utils import busy_wait
|
||||
from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed
|
||||
from lerobot.scripts.eval import get_pretrained_policy_path
|
||||
from lerobot.scripts.push_dataset_to_hub import (
|
||||
push_dataset_card_to_hub,
|
||||
push_meta_data_to_hub,
|
||||
push_videos_to_hub,
|
||||
save_meta_data,
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.datasets.populate_dataset import (
|
||||
create_lerobot_dataset,
|
||||
delete_current_episode,
|
||||
init_dataset,
|
||||
save_current_episode,
|
||||
)
|
||||
|
||||
########################################################################################
|
||||
# Utilities
|
||||
########################################################################################
|
||||
|
||||
|
||||
def say(text, blocking=False):
|
||||
# Check if mac, linux, or windows.
|
||||
if platform.system() == "Darwin":
|
||||
cmd = f'say "{text}"'
|
||||
elif platform.system() == "Linux":
|
||||
cmd = f'spd-say "{text}"'
|
||||
elif platform.system() == "Windows":
|
||||
cmd = (
|
||||
'PowerShell -Command "Add-Type -AssemblyName System.Speech; '
|
||||
f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')\""
|
||||
)
|
||||
|
||||
if not blocking and platform.system() in ["Darwin", "Linux"]:
|
||||
# TODO(rcadene): Make it work for Windows
|
||||
# Use the ampersand to run command in the background
|
||||
cmd += " &"
|
||||
|
||||
os.system(cmd)
|
||||
|
||||
|
||||
def save_image(img_tensor, key, frame_index, episode_index, videos_dir):
|
||||
img = Image.fromarray(img_tensor.numpy())
|
||||
path = videos_dir / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(str(path), quality=100)
|
||||
|
||||
|
||||
def none_or_int(value):
|
||||
if value == "None":
|
||||
return None
|
||||
return int(value)
|
||||
|
||||
|
||||
def log_control_info(robot, dt_s, episode_index=None, frame_index=None, fps=None):
|
||||
log_items = []
|
||||
if episode_index is not None:
|
||||
log_items.append(f"ep:{episode_index}")
|
||||
if frame_index is not None:
|
||||
log_items.append(f"frame:{frame_index}")
|
||||
|
||||
def log_dt(shortname, dt_val_s):
|
||||
nonlocal log_items, fps
|
||||
info_str = f"{shortname}:{dt_val_s * 1000:5.2f} ({1/ dt_val_s:3.1f}hz)"
|
||||
if fps is not None:
|
||||
actual_fps = 1 / dt_val_s
|
||||
if actual_fps < fps - 1:
|
||||
info_str = colored(info_str, "yellow")
|
||||
log_items.append(info_str)
|
||||
|
||||
# total step time displayed in milliseconds and its frequency
|
||||
log_dt("dt", dt_s)
|
||||
|
||||
for name in robot.leader_arms:
|
||||
key = f"read_leader_{name}_pos_dt_s"
|
||||
if key in robot.logs:
|
||||
log_dt("dtRlead", robot.logs[key])
|
||||
|
||||
for name in robot.follower_arms:
|
||||
key = f"write_follower_{name}_goal_pos_dt_s"
|
||||
if key in robot.logs:
|
||||
log_dt("dtWfoll", robot.logs[key])
|
||||
|
||||
key = f"read_follower_{name}_pos_dt_s"
|
||||
if key in robot.logs:
|
||||
log_dt("dtRfoll", robot.logs[key])
|
||||
|
||||
for name in robot.cameras:
|
||||
key = f"read_camera_{name}_dt_s"
|
||||
if key in robot.logs:
|
||||
log_dt(f"dtR{name}", robot.logs[key])
|
||||
|
||||
info_str = " ".join(log_items)
|
||||
logging.info(info_str)
|
||||
|
||||
|
||||
@cache
|
||||
def is_headless():
|
||||
"""Detects if python is running without a monitor."""
|
||||
try:
|
||||
import pynput # noqa
|
||||
|
||||
return False
|
||||
except Exception:
|
||||
print(
|
||||
"Error trying to import pynput. Switching to headless mode. "
|
||||
"As a result, the video stream from the cameras won't be shown, "
|
||||
"and you won't be able to change the control flow with keyboards. "
|
||||
"For more info, see traceback below.\n"
|
||||
)
|
||||
traceback.print_exc()
|
||||
print()
|
||||
return True
|
||||
|
||||
from lerobot.common.robot_devices.control_utils import (
|
||||
control_loop,
|
||||
has_method,
|
||||
init_keyboard_listener,
|
||||
init_policy,
|
||||
log_control_info,
|
||||
record_episode,
|
||||
reset_environment,
|
||||
sanity_check_dataset_name,
|
||||
stop_recording,
|
||||
warmup_record,
|
||||
)
|
||||
from lerobot.common.robot_devices.robots.factory import make_robot
|
||||
from lerobot.common.robot_devices.robots.utils import Robot
|
||||
from lerobot.common.robot_devices.utils import busy_wait, safe_disconnect
|
||||
from lerobot.common.utils.utils import init_hydra_config, init_logging, log_say, none_or_int
|
||||
|
||||
########################################################################################
|
||||
# Control modes
|
||||
########################################################################################
|
||||
|
||||
|
||||
@safe_disconnect
|
||||
def calibrate(robot: Robot, arms: list[str] | None):
|
||||
available_arms = []
|
||||
for name in robot.follower_arms:
|
||||
arm_id = get_arm_id(name, "follower")
|
||||
available_arms.append(arm_id)
|
||||
for name in robot.leader_arms:
|
||||
arm_id = get_arm_id(name, "leader")
|
||||
available_arms.append(arm_id)
|
||||
# TODO(aliberts): move this code in robots' classes
|
||||
if robot.robot_type.startswith("stretch"):
|
||||
if not robot.is_connected:
|
||||
robot.connect()
|
||||
if not robot.is_homed():
|
||||
robot.home()
|
||||
return
|
||||
|
||||
unknown_arms = [arm_id for arm_id in arms if arm_id not in available_arms]
|
||||
|
||||
available_arms_str = " ".join(available_arms)
|
||||
unknown_arms = [arm_id for arm_id in arms if arm_id not in robot.available_arms]
|
||||
available_arms_str = " ".join(robot.available_arms)
|
||||
unknown_arms_str = " ".join(unknown_arms)
|
||||
|
||||
if arms is None or len(arms) == 0:
|
||||
@@ -285,34 +177,27 @@ def calibrate(robot: Robot, arms: list[str] | None):
|
||||
print("Calibration is done! You can now teleoperate and record datasets!")
|
||||
|
||||
|
||||
def teleoperate(robot: Robot, fps: int | None = None, teleop_time_s: float | None = None):
|
||||
# TODO(rcadene): Add option to record logs
|
||||
if not robot.is_connected:
|
||||
robot.connect()
|
||||
|
||||
start_teleop_t = time.perf_counter()
|
||||
while True:
|
||||
start_loop_t = time.perf_counter()
|
||||
robot.teleop_step()
|
||||
|
||||
if fps is not None:
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
log_control_info(robot, dt_s, fps=fps)
|
||||
|
||||
if teleop_time_s is not None and time.perf_counter() - start_teleop_t > teleop_time_s:
|
||||
break
|
||||
@safe_disconnect
|
||||
def teleoperate(
|
||||
robot: Robot, fps: int | None = None, teleop_time_s: float | None = None, display_cameras: bool = False
|
||||
):
|
||||
control_loop(
|
||||
robot,
|
||||
control_time_s=teleop_time_s,
|
||||
fps=fps,
|
||||
teleoperate=True,
|
||||
display_cameras=display_cameras,
|
||||
)
|
||||
|
||||
|
||||
@safe_disconnect
|
||||
def record(
|
||||
robot: Robot,
|
||||
policy: torch.nn.Module | None = None,
|
||||
hydra_cfg: DictConfig | None = None,
|
||||
root: str,
|
||||
repo_id: str,
|
||||
pretrained_policy_name_or_path: str | None = None,
|
||||
policy_overrides: List[str] | None = None,
|
||||
fps: int | None = None,
|
||||
root="data",
|
||||
repo_id="lerobot/debug",
|
||||
warmup_time_s=2,
|
||||
episode_time_s=10,
|
||||
reset_time_s=5,
|
||||
@@ -321,377 +206,115 @@ def record(
|
||||
run_compute_stats=True,
|
||||
push_to_hub=True,
|
||||
tags=None,
|
||||
num_image_writers_per_camera=4,
|
||||
num_image_writer_processes=0,
|
||||
num_image_writer_threads_per_camera=4,
|
||||
force_override=False,
|
||||
display_cameras=True,
|
||||
play_sounds=True,
|
||||
):
|
||||
# TODO(rcadene): Add option to record logs
|
||||
# TODO(rcadene): Clean this function via decomposition in higher level functions
|
||||
listener = None
|
||||
events = None
|
||||
policy = None
|
||||
device = None
|
||||
use_amp = None
|
||||
|
||||
_, dataset_name = repo_id.split("/")
|
||||
if dataset_name.startswith("eval_") and policy is None:
|
||||
raise ValueError(
|
||||
f"Your dataset name begins by 'eval_' ({dataset_name}) but no policy is provided ({policy})."
|
||||
)
|
||||
# Load pretrained policy
|
||||
if pretrained_policy_name_or_path is not None:
|
||||
policy, policy_fps, device, use_amp = init_policy(pretrained_policy_name_or_path, policy_overrides)
|
||||
|
||||
if not video:
|
||||
raise NotImplementedError()
|
||||
if fps is None:
|
||||
fps = policy_fps
|
||||
logging.warning(f"No fps provided, so using the fps from policy config ({policy_fps}).")
|
||||
elif fps != policy_fps:
|
||||
logging.warning(
|
||||
f"There is a mismatch between the provided fps ({fps}) and the one from policy config ({policy_fps})."
|
||||
)
|
||||
|
||||
# Create empty dataset or load existing saved episodes
|
||||
sanity_check_dataset_name(repo_id, policy)
|
||||
dataset = init_dataset(
|
||||
repo_id,
|
||||
root,
|
||||
force_override,
|
||||
fps,
|
||||
video,
|
||||
write_images=robot.has_camera,
|
||||
num_image_writer_processes=num_image_writer_processes,
|
||||
num_image_writer_threads=num_image_writer_threads_per_camera * robot.num_cameras,
|
||||
)
|
||||
|
||||
if not robot.is_connected:
|
||||
robot.connect()
|
||||
|
||||
local_dir = Path(root) / repo_id
|
||||
if local_dir.exists() and force_override:
|
||||
shutil.rmtree(local_dir)
|
||||
listener, events = init_keyboard_listener()
|
||||
|
||||
episodes_dir = local_dir / "episodes"
|
||||
episodes_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Execute a few seconds without recording to:
|
||||
# 1. teleoperate the robot to move it in starting position if no policy provided,
|
||||
# 2. give times to the robot devices to connect and start synchronizing,
|
||||
# 3. place the cameras windows on screen
|
||||
enable_teleoperation = policy is None
|
||||
log_say("Warmup record", play_sounds)
|
||||
warmup_record(robot, events, enable_teleoperation, warmup_time_s, display_cameras, fps)
|
||||
|
||||
videos_dir = local_dir / "videos"
|
||||
videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
if has_method(robot, "teleop_safety_stop"):
|
||||
robot.teleop_safety_stop()
|
||||
|
||||
# Logic to resume data recording
|
||||
rec_info_path = episodes_dir / "data_recording_info.json"
|
||||
if rec_info_path.exists():
|
||||
with open(rec_info_path) as f:
|
||||
rec_info = json.load(f)
|
||||
episode_index = rec_info["last_episode_index"] + 1
|
||||
else:
|
||||
episode_index = 0
|
||||
while True:
|
||||
if dataset["num_episodes"] >= num_episodes:
|
||||
break
|
||||
|
||||
if is_headless():
|
||||
logging.info(
|
||||
"Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
|
||||
episode_index = dataset["num_episodes"]
|
||||
log_say(f"Recording episode {episode_index}", play_sounds)
|
||||
record_episode(
|
||||
dataset=dataset,
|
||||
robot=robot,
|
||||
events=events,
|
||||
episode_time_s=episode_time_s,
|
||||
display_cameras=display_cameras,
|
||||
policy=policy,
|
||||
device=device,
|
||||
use_amp=use_amp,
|
||||
fps=fps,
|
||||
)
|
||||
|
||||
# Allow to exit early while recording an episode or resetting the environment,
|
||||
# by tapping the right arrow key '->'. This might require a sudo permission
|
||||
# to allow your terminal to monitor keyboard events.
|
||||
exit_early = False
|
||||
rerecord_episode = False
|
||||
stop_recording = False
|
||||
# Execute a few seconds without recording to give time to manually reset the environment
|
||||
# Current code logic doesn't allow to teleoperate during this time.
|
||||
# TODO(rcadene): add an option to enable teleoperation during reset
|
||||
# Skip reset for the last episode to be recorded
|
||||
if not events["stop_recording"] and (
|
||||
(episode_index < num_episodes - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment", play_sounds)
|
||||
reset_environment(robot, events, reset_time_s)
|
||||
|
||||
# Only import pynput if not in a headless environment
|
||||
if not is_headless():
|
||||
from pynput import keyboard
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode", play_sounds)
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
delete_current_episode(dataset)
|
||||
continue
|
||||
|
||||
def on_press(key):
|
||||
nonlocal exit_early, rerecord_episode, stop_recording
|
||||
try:
|
||||
if key == keyboard.Key.right:
|
||||
print("Right arrow key pressed. Exiting loop...")
|
||||
exit_early = True
|
||||
elif key == keyboard.Key.left:
|
||||
print("Left arrow key pressed. Exiting loop and rerecord the last episode...")
|
||||
rerecord_episode = True
|
||||
exit_early = True
|
||||
elif key == keyboard.Key.esc:
|
||||
print("Escape key pressed. Stopping data recording...")
|
||||
stop_recording = True
|
||||
exit_early = True
|
||||
except Exception as e:
|
||||
print(f"Error handling key press: {e}")
|
||||
# Increment by one dataset["current_episode_index"]
|
||||
save_current_episode(dataset)
|
||||
|
||||
listener = keyboard.Listener(on_press=on_press)
|
||||
listener.start()
|
||||
if events["stop_recording"]:
|
||||
break
|
||||
|
||||
# Load policy if any
|
||||
if policy is not None:
|
||||
# Check device is available
|
||||
device = get_safe_torch_device(hydra_cfg.device, log=True)
|
||||
log_say("Stop recording", play_sounds, blocking=True)
|
||||
stop_recording(robot, listener, display_cameras)
|
||||
|
||||
policy.eval()
|
||||
policy.to(device)
|
||||
lerobot_dataset = create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds)
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
set_global_seed(hydra_cfg.seed)
|
||||
|
||||
# override fps using policy fps
|
||||
fps = hydra_cfg.env.fps
|
||||
|
||||
# Execute a few seconds without recording data, to give times
|
||||
# to the robot devices to connect and start synchronizing.
|
||||
timestamp = 0
|
||||
start_warmup_t = time.perf_counter()
|
||||
is_warmup_print = False
|
||||
while timestamp < warmup_time_s:
|
||||
if not is_warmup_print:
|
||||
logging.info("Warming up (no data recording)")
|
||||
say("Warming up")
|
||||
is_warmup_print = True
|
||||
|
||||
start_loop_t = time.perf_counter()
|
||||
|
||||
if policy is None:
|
||||
observation, action = robot.teleop_step(record_data=True)
|
||||
else:
|
||||
observation = robot.capture_observation()
|
||||
|
||||
if not is_headless():
|
||||
image_keys = [key for key in observation if "image" in key]
|
||||
for key in image_keys:
|
||||
cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
|
||||
cv2.waitKey(1)
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
log_control_info(robot, dt_s, fps=fps)
|
||||
|
||||
timestamp = time.perf_counter() - start_warmup_t
|
||||
|
||||
# Save images using threads to reach high fps (30 and more)
|
||||
# Using `with` to exist smoothly if an execption is raised.
|
||||
futures = []
|
||||
num_image_writers = num_image_writers_per_camera * len(robot.cameras)
|
||||
if num_image_writers == 0:
|
||||
num_image_writers = 1
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_image_writers) as executor:
|
||||
# Start recording all episodes
|
||||
while episode_index < num_episodes:
|
||||
logging.info(f"Recording episode {episode_index}")
|
||||
say(f"Recording episode {episode_index}")
|
||||
ep_dict = {}
|
||||
frame_index = 0
|
||||
timestamp = 0
|
||||
start_episode_t = time.perf_counter()
|
||||
while timestamp < episode_time_s:
|
||||
start_loop_t = time.perf_counter()
|
||||
|
||||
if policy is None:
|
||||
observation, action = robot.teleop_step(record_data=True)
|
||||
else:
|
||||
observation = robot.capture_observation()
|
||||
|
||||
image_keys = [key for key in observation if "image" in key]
|
||||
not_image_keys = [key for key in observation if "image" not in key]
|
||||
|
||||
for key in image_keys:
|
||||
futures += [
|
||||
executor.submit(
|
||||
save_image, observation[key], key, frame_index, episode_index, videos_dir
|
||||
)
|
||||
]
|
||||
|
||||
if not is_headless():
|
||||
image_keys = [key for key in observation if "image" in key]
|
||||
for key in image_keys:
|
||||
cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
|
||||
cv2.waitKey(1)
|
||||
|
||||
for key in not_image_keys:
|
||||
if key not in ep_dict:
|
||||
ep_dict[key] = []
|
||||
ep_dict[key].append(observation[key])
|
||||
|
||||
if policy is not None:
|
||||
with (
|
||||
torch.inference_mode(),
|
||||
torch.autocast(device_type=device.type)
|
||||
if device.type == "cuda" and hydra_cfg.use_amp
|
||||
else nullcontext(),
|
||||
):
|
||||
# Convert to pytorch format: channel first and float32 in [0,1] with batch dimension
|
||||
for name in observation:
|
||||
if "image" in name:
|
||||
observation[name] = observation[name].type(torch.float32) / 255
|
||||
observation[name] = observation[name].permute(2, 0, 1).contiguous()
|
||||
observation[name] = observation[name].unsqueeze(0)
|
||||
observation[name] = observation[name].to(device)
|
||||
|
||||
# Compute the next action with the policy
|
||||
# based on the current observation
|
||||
action = policy.select_action(observation)
|
||||
|
||||
# Remove batch dimension
|
||||
action = action.squeeze(0)
|
||||
|
||||
# Move to cpu, if not already the case
|
||||
action = action.to("cpu")
|
||||
|
||||
# Order the robot to move
|
||||
action_sent = robot.send_action(action)
|
||||
|
||||
# Action can eventually be clipped using `max_relative_target`,
|
||||
# so action actually sent is saved in the dataset.
|
||||
action = {"action": action_sent}
|
||||
|
||||
for key in action:
|
||||
if key not in ep_dict:
|
||||
ep_dict[key] = []
|
||||
ep_dict[key].append(action[key])
|
||||
|
||||
frame_index += 1
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
log_control_info(robot, dt_s, fps=fps)
|
||||
|
||||
timestamp = time.perf_counter() - start_episode_t
|
||||
if exit_early:
|
||||
exit_early = False
|
||||
break
|
||||
|
||||
if not stop_recording:
|
||||
# Start resetting env while the executor are finishing
|
||||
logging.info("Reset the environment")
|
||||
say("Reset the environment")
|
||||
|
||||
timestamp = 0
|
||||
start_vencod_t = time.perf_counter()
|
||||
|
||||
# During env reset we save the data and encode the videos
|
||||
num_frames = frame_index
|
||||
|
||||
for key in image_keys:
|
||||
tmp_imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
|
||||
fname = f"{key}_episode_{episode_index:06d}.mp4"
|
||||
video_path = local_dir / "videos" / fname
|
||||
if video_path.exists():
|
||||
video_path.unlink()
|
||||
# Store the reference to the video frame, even tho the videos are not yet encoded
|
||||
ep_dict[key] = []
|
||||
for i in range(num_frames):
|
||||
ep_dict[key].append({"path": f"videos/{fname}", "timestamp": i / fps})
|
||||
|
||||
for key in not_image_keys:
|
||||
ep_dict[key] = torch.stack(ep_dict[key])
|
||||
|
||||
for key in action:
|
||||
ep_dict[key] = torch.stack(ep_dict[key])
|
||||
|
||||
ep_dict["episode_index"] = torch.tensor([episode_index] * num_frames)
|
||||
ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
|
||||
ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
|
||||
|
||||
done = torch.zeros(num_frames, dtype=torch.bool)
|
||||
done[-1] = True
|
||||
ep_dict["next.done"] = done
|
||||
|
||||
ep_path = episodes_dir / f"episode_{episode_index}.pth"
|
||||
print("Saving episode dictionary...")
|
||||
torch.save(ep_dict, ep_path)
|
||||
|
||||
rec_info = {
|
||||
"last_episode_index": episode_index,
|
||||
}
|
||||
with open(rec_info_path, "w") as f:
|
||||
json.dump(rec_info, f)
|
||||
|
||||
is_last_episode = stop_recording or (episode_index == (num_episodes - 1))
|
||||
|
||||
# Wait if necessary
|
||||
with tqdm.tqdm(total=reset_time_s, desc="Waiting") as pbar:
|
||||
while timestamp < reset_time_s and not is_last_episode:
|
||||
time.sleep(1)
|
||||
timestamp = time.perf_counter() - start_vencod_t
|
||||
pbar.update(1)
|
||||
if exit_early:
|
||||
exit_early = False
|
||||
break
|
||||
|
||||
# Skip updating episode index which forces re-recording episode
|
||||
if rerecord_episode:
|
||||
rerecord_episode = False
|
||||
continue
|
||||
|
||||
episode_index += 1
|
||||
|
||||
if is_last_episode:
|
||||
logging.info("Done recording")
|
||||
say("Done recording", blocking=True)
|
||||
if not is_headless():
|
||||
listener.stop()
|
||||
|
||||
logging.info("Waiting for threads writing the images on disk to terminate...")
|
||||
for _ in tqdm.tqdm(
|
||||
concurrent.futures.as_completed(futures), total=len(futures), desc="Writting images"
|
||||
):
|
||||
pass
|
||||
break
|
||||
|
||||
robot.disconnect()
|
||||
if not is_headless():
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
num_episodes = episode_index
|
||||
|
||||
logging.info("Encoding videos")
|
||||
say("Encoding videos")
|
||||
# Use ffmpeg to convert frames stored as png into mp4 videos
|
||||
for episode_index in tqdm.tqdm(range(num_episodes)):
|
||||
for key in image_keys:
|
||||
tmp_imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
|
||||
fname = f"{key}_episode_{episode_index:06d}.mp4"
|
||||
video_path = local_dir / "videos" / fname
|
||||
if video_path.exists():
|
||||
# Skip if video is already encoded. Could be the case when resuming data recording.
|
||||
continue
|
||||
# note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
|
||||
# since video encoding with ffmpeg is already using multithreading.
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps, overwrite=True)
|
||||
shutil.rmtree(tmp_imgs_dir)
|
||||
|
||||
logging.info("Concatenating episodes")
|
||||
ep_dicts = []
|
||||
for episode_index in tqdm.tqdm(range(num_episodes)):
|
||||
ep_path = episodes_dir / f"episode_{episode_index}.pth"
|
||||
ep_dict = torch.load(ep_path)
|
||||
ep_dicts.append(ep_dict)
|
||||
data_dict = concatenate_episodes(ep_dicts)
|
||||
|
||||
total_frames = data_dict["frame_index"].shape[0]
|
||||
data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
||||
hf_dataset = to_hf_dataset(data_dict, video)
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
info = {
|
||||
"codebase_version": CODEBASE_VERSION,
|
||||
"fps": fps,
|
||||
"video": video,
|
||||
}
|
||||
if video:
|
||||
info["encoding"] = get_default_encoding()
|
||||
|
||||
lerobot_dataset = LeRobotDataset.from_preloaded(
|
||||
repo_id=repo_id,
|
||||
hf_dataset=hf_dataset,
|
||||
episode_data_index=episode_data_index,
|
||||
info=info,
|
||||
videos_dir=videos_dir,
|
||||
)
|
||||
if run_compute_stats:
|
||||
logging.info("Computing dataset statistics")
|
||||
say("Computing dataset statistics")
|
||||
stats = compute_stats(lerobot_dataset)
|
||||
lerobot_dataset.stats = stats
|
||||
else:
|
||||
stats = {}
|
||||
logging.info("Skipping computation of the dataset statistics")
|
||||
|
||||
hf_dataset = hf_dataset.with_format(None) # to remove transforms that cant be saved
|
||||
hf_dataset.save_to_disk(str(local_dir / "train"))
|
||||
|
||||
meta_data_dir = local_dir / "meta_data"
|
||||
save_meta_data(info, stats, episode_data_index, meta_data_dir)
|
||||
|
||||
if push_to_hub:
|
||||
hf_dataset.push_to_hub(repo_id, revision="main")
|
||||
push_meta_data_to_hub(repo_id, meta_data_dir, revision="main")
|
||||
push_dataset_card_to_hub(repo_id, revision="main", tags=tags)
|
||||
if video:
|
||||
push_videos_to_hub(repo_id, videos_dir, revision="main")
|
||||
create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
|
||||
|
||||
logging.info("Exiting")
|
||||
say("Exiting")
|
||||
log_say("Exiting", play_sounds)
|
||||
return lerobot_dataset
|
||||
|
||||
|
||||
def replay(robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug"):
|
||||
@safe_disconnect
|
||||
def replay(
|
||||
robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug", play_sounds=True
|
||||
):
|
||||
# TODO(rcadene, aliberts): refactor with control_loop, once `dataset` is an instance of LeRobotDataset
|
||||
# TODO(rcadene): Add option to record logs
|
||||
local_dir = Path(root) / repo_id
|
||||
if not local_dir.exists():
|
||||
@@ -705,8 +328,7 @@ def replay(robot: Robot, episode: int, fps: int | None = None, root="data", repo
|
||||
if not robot.is_connected:
|
||||
robot.connect()
|
||||
|
||||
logging.info("Replaying episode")
|
||||
say("Replaying episode", blocking=True)
|
||||
log_say("Replaying episode", play_sounds, blocking=True)
|
||||
for idx in range(from_idx, to_idx):
|
||||
start_episode_t = time.perf_counter()
|
||||
|
||||
@@ -751,6 +373,12 @@ if __name__ == "__main__":
|
||||
parser_teleop.add_argument(
|
||||
"--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)"
|
||||
)
|
||||
parser_teleop.add_argument(
|
||||
"--display-cameras",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Display all cameras on screen (set to 1 to display or 0).",
|
||||
)
|
||||
|
||||
parser_record = subparsers.add_parser("record", parents=[base_parser])
|
||||
parser_record.add_argument(
|
||||
@@ -806,12 +434,23 @@ if __name__ == "__main__":
|
||||
help="Add tags to your dataset on the hub.",
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--num-image-writers-per-camera",
|
||||
"--num-image-writer-processes",
|
||||
type=int,
|
||||
default=0,
|
||||
help=(
|
||||
"Number of subprocesses handling the saving of frames as PNGs. Set to 0 to use threads only; "
|
||||
"set to ≥1 to use subprocesses, each using threads to write images. The best number of processes "
|
||||
"and threads depends on your system. We recommend 4 threads per camera with 0 processes. "
|
||||
"If fps is unstable, adjust the thread count. If still unstable, try using 1 or more subprocesses."
|
||||
),
|
||||
)
|
||||
parser_record.add_argument(
|
||||
"--num-image-writer-threads-per-camera",
|
||||
type=int,
|
||||
default=4,
|
||||
help=(
|
||||
"Number of threads writing the frames as png images on disk, per camera. "
|
||||
"Too much threads might cause unstable teleoperation fps due to main thread being blocked. "
|
||||
"Too many threads might cause unstable teleoperation fps due to main thread being blocked. "
|
||||
"Not enough threads might cause low camera fps."
|
||||
),
|
||||
)
|
||||
@@ -877,19 +516,7 @@ if __name__ == "__main__":
|
||||
teleoperate(robot, **kwargs)
|
||||
|
||||
elif control_mode == "record":
|
||||
pretrained_policy_name_or_path = args.pretrained_policy_name_or_path
|
||||
policy_overrides = args.policy_overrides
|
||||
del kwargs["pretrained_policy_name_or_path"]
|
||||
del kwargs["policy_overrides"]
|
||||
|
||||
policy_cfg = None
|
||||
if pretrained_policy_name_or_path is not None:
|
||||
pretrained_policy_path = get_pretrained_policy_path(pretrained_policy_name_or_path)
|
||||
policy_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", policy_overrides)
|
||||
policy = make_policy(hydra_cfg=policy_cfg, pretrained_policy_name_or_path=pretrained_policy_path)
|
||||
record(robot, policy, policy_cfg, **kwargs)
|
||||
else:
|
||||
record(robot, **kwargs)
|
||||
record(robot, **kwargs)
|
||||
|
||||
elif control_mode == "replay":
|
||||
replay(robot, **kwargs)
|
||||
|
||||
@@ -57,7 +57,7 @@ import gymnasium as gym
|
||||
import numpy as np
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.utils._errors import RepositoryNotFoundError
|
||||
from huggingface_hub.errors import RepositoryNotFoundError
|
||||
from huggingface_hub.utils._validators import HFValidationError
|
||||
from torch import Tensor, nn
|
||||
from tqdm import trange
|
||||
|
||||
@@ -1,158 +0,0 @@
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import importlib
|
||||
import shutil
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from PIL import Image
|
||||
|
||||
from lerobot.scripts.control_robot import busy_wait
|
||||
|
||||
|
||||
def save_image(img_array, camera_index, frame_index, images_dir):
|
||||
try:
|
||||
img = Image.fromarray(img_array)
|
||||
path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(str(path), quality=100)
|
||||
print(f"Image saved to: {path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to save image: {e}")
|
||||
|
||||
|
||||
def save_images_from_cameras(
|
||||
driver: str,
|
||||
images_dir: Path,
|
||||
camera_ids: list[int] | None = None,
|
||||
fps=None,
|
||||
width=None,
|
||||
height=None,
|
||||
record_time_s=2,
|
||||
):
|
||||
"""
|
||||
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
|
||||
associated to a given camera index.
|
||||
"""
|
||||
# Dynamically import the appropriate camera class based on the brand
|
||||
if driver == "intelrealsense":
|
||||
camera_module = importlib.import_module("lerobot.common.robot_devices.cameras.intelrealsense")
|
||||
camera_class = camera_module.IntelRealSenseCamera
|
||||
find_camera_indices = camera_module.find_camera_indices
|
||||
elif driver == "opencv":
|
||||
camera_module = importlib.import_module("lerobot.common.robot_devices.cameras.opencv")
|
||||
camera_class = camera_module.OpenCVCamera
|
||||
find_camera_indices = camera_module.find_camera_indices
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported camera driver: {driver}. Note: the drivers we currently support are opencv and intelrealsense."
|
||||
)
|
||||
|
||||
if camera_ids is None:
|
||||
camera_ids = find_camera_indices()
|
||||
|
||||
print("Connecting cameras")
|
||||
cameras = []
|
||||
for cam_idx in camera_ids:
|
||||
camera = camera_class(cam_idx, fps=fps, width=width, height=height)
|
||||
camera.connect()
|
||||
print(
|
||||
f"{camera.__class__.__name__}({camera.camera_index}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
|
||||
)
|
||||
cameras.append(camera)
|
||||
|
||||
images_dir = Path(images_dir)
|
||||
if images_dir.exists():
|
||||
shutil.rmtree(images_dir)
|
||||
images_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Saving images to {images_dir}")
|
||||
frame_index = 0
|
||||
start_time = time.perf_counter()
|
||||
|
||||
# Use ThreadPoolExecutor for saving images asynchronously
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
||||
try:
|
||||
while True:
|
||||
now = time.perf_counter()
|
||||
|
||||
for camera in cameras:
|
||||
# Capture image
|
||||
image = camera.read() if fps is None else camera.async_read()
|
||||
if image is None:
|
||||
print("No Frame")
|
||||
else:
|
||||
bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
# Submit the save_image function to be executed in the background
|
||||
executor.submit(
|
||||
save_image,
|
||||
bgr_converted_image,
|
||||
camera.camera_index,
|
||||
frame_index,
|
||||
images_dir,
|
||||
)
|
||||
|
||||
if fps is not None:
|
||||
dt_s = time.perf_counter() - now
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
if time.perf_counter() - start_time > record_time_s:
|
||||
break
|
||||
|
||||
print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
|
||||
|
||||
frame_index += 1
|
||||
finally:
|
||||
print(f"Images have been saved to {images_dir}")
|
||||
for camera in cameras:
|
||||
camera.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Save a few frames for all cameras connected to the computer, or a selected subset."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--driver", type=str, required=True, help="Camera driver (e.g., intelrealsense, opencv)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--camera-ids",
|
||||
type=int,
|
||||
nargs="*",
|
||||
default=None,
|
||||
help="List of camera indices used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fps",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Set the number of frames recorded per second for all cameras. If not provided, use the default fps of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--width",
|
||||
type=str,
|
||||
default=640,
|
||||
help="Set the width for all cameras. If not provided, use the default width of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--height",
|
||||
type=str,
|
||||
default=480,
|
||||
help="Set the height for all cameras. If not provided, use the default height of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--images-dir",
|
||||
type=Path,
|
||||
default="outputs/images_from_cameras",
|
||||
help="Set directory to save a few frames for each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--record-time-s",
|
||||
type=float,
|
||||
default=2.0,
|
||||
help="Set the number of seconds used to record the frames. By default, 2 seconds.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
save_images_from_cameras(**vars(args))
|
||||
@@ -383,7 +383,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
|
||||
logging.info(f"Checkpoint policy after step {step}")
|
||||
# Note: Save with step as the identifier, and format it to have at least 6 digits but more if
|
||||
# needed (choose 6 as a minimum for consistency without being overkill).
|
||||
logger.save_checkpont(
|
||||
logger.save_checkpoint(
|
||||
step,
|
||||
policy,
|
||||
optimizer,
|
||||
|
||||
@@ -250,7 +250,7 @@
|
||||
if(!canPlayVideos){
|
||||
this.videoCodecError = true;
|
||||
}
|
||||
|
||||
|
||||
// process CSV data
|
||||
this.videos = document.querySelectorAll('video');
|
||||
this.video = this.videos[0];
|
||||
|
||||
Reference in New Issue
Block a user