From f4c11593d4ea79070b28a77731e726167f156280 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 24 May 2025 10:48:06 +0200 Subject: [PATCH 1/4] Fix predict_action from record --- lerobot/common/utils/control_utils.py | 5 ++++- lerobot/record.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lerobot/common/utils/control_utils.py b/lerobot/common/utils/control_utils.py index d4790a57..012f9f27 100644 --- a/lerobot/common/utils/control_utils.py +++ b/lerobot/common/utils/control_utils.py @@ -24,6 +24,7 @@ from contextlib import nullcontext from copy import copy from functools import cache +import numpy as np import rerun as rr import torch from deepdiff import DeepDiff @@ -101,7 +102,9 @@ def is_headless(): return True -def predict_action(observation, policy, device, use_amp): +def predict_action( + observation: dict[str, np.ndarray], policy: PreTrainedPolicy, device: torch.device, use_amp: bool +): observation = copy(observation) with ( torch.inference_mode(), diff --git a/lerobot/record.py b/lerobot/record.py index 9d07f88b..6ddeb23b 100644 --- a/lerobot/record.py +++ b/lerobot/record.py @@ -178,9 +178,12 @@ def record_loop( observation = robot.get_observation() + if policy is not None or dataset is not None: + observation_frame = build_dataset_frame(dataset.features, observation, prefix="observation") + if policy is not None: action = predict_action( - observation, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp + observation_frame, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp ) else: action = teleop.get_action() @@ -190,7 +193,6 @@ def record_loop( sent_action = robot.send_action(action) if dataset is not None: - observation_frame = build_dataset_frame(dataset.features, observation, prefix="observation") action_frame = build_dataset_frame(dataset.features, sent_action, prefix="action") frame = {**observation_frame, **action_frame} dataset.add_frame(frame, task=single_task) From bed90e3a41c43758c619dba66158ddd5798d361a Mon Sep 17 00:00:00 2001 From: Ragnar Date: Sun, 25 May 2025 17:20:45 +0200 Subject: [PATCH 2/4] fix: typos and grammar (#1148) --- examples/11_use_moss.md | 2 +- lerobot/common/datasets/image_writer.py | 2 +- lerobot/common/datasets/lerobot_dataset.py | 2 +- lerobot/common/datasets/video_utils.py | 2 +- lerobot/common/policies/pi0/modeling_pi0.py | 2 +- lerobot/common/policies/pi0fast/modeling_pi0fast.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/11_use_moss.md b/examples/11_use_moss.md index b2e18573..7c9297ac 100644 --- a/examples/11_use_moss.md +++ b/examples/11_use_moss.md @@ -141,7 +141,7 @@ python lerobot/scripts/configure_motor.py \ --ID 1 ``` -Note: These motors are currently limitated. They can take values between 0 and 4096 only, which corresponds to a full turn. They can't turn more than that. 2048 is at the middle of this range, so we can take -2048 steps (180 degrees anticlockwise) and reach the maximum range, or take +2048 steps (180 degrees clockwise) and reach the maximum range. The configuration step also sets the homing offset to 0, so that if you misassembled the arm, you can always update the homing offset to account for a shift up to ± 2048 steps (± 180 degrees). +Note: These motors are currently limited. They can take values between 0 and 4096 only, which corresponds to a full turn. They can't turn more than that. 2048 is at the middle of this range, so we can take -2048 steps (180 degrees anticlockwise) and reach the maximum range, or take +2048 steps (180 degrees clockwise) and reach the maximum range. The configuration step also sets the homing offset to 0, so that if you misassembled the arm, you can always update the homing offset to account for a shift up to ± 2048 steps (± 180 degrees). Then unplug your motor and plug the second motor and set its ID to 2. ```bash diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py index 6fc0ee2f..4a4e1ab0 100644 --- a/lerobot/common/datasets/image_writer.py +++ b/lerobot/common/datasets/image_writer.py @@ -106,7 +106,7 @@ def worker_process(queue: queue.Queue, num_threads: int): class AsyncImageWriter: """ This class abstract away the initialisation of processes or/and threads to - save images on disk asynchrounously, which is critical to control a robot and record data + save images on disk asynchronously, which is critical to control a robot and record data at a high frame rate. When `num_processes=0`, it creates a threads pool of size `num_threads`. diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index d8da85d6..df365a3c 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -944,7 +944,7 @@ class LeRobotDataset(torch.utils.data.Dataset): def stop_image_writer(self) -> None: """ Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to - remove the image_writer in order for the LeRobotDataset object to be pickleable and parallelized. + remove the image_writer in order for the LeRobotDataset object to be picklable and parallelized. """ if self.image_writer is not None: self.image_writer.stop() diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index 375314e9..3a77f36e 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -101,7 +101,7 @@ def decode_video_frames_torchvision( keyframes_only = False torchvision.set_video_backend(backend) if backend == "pyav": - keyframes_only = True # pyav doesnt support accuracte seek + keyframes_only = True # pyav doesn't support accurate seek # set a video stream reader # TODO(rcadene): also load audio stream at the same time diff --git a/lerobot/common/policies/pi0/modeling_pi0.py b/lerobot/common/policies/pi0/modeling_pi0.py index 7599fa63..8ec29d32 100644 --- a/lerobot/common/policies/pi0/modeling_pi0.py +++ b/lerobot/common/policies/pi0/modeling_pi0.py @@ -357,7 +357,7 @@ class PI0Policy(PreTrainedPolicy): if self.config.resize_imgs_with_padding is not None: img = resize_with_pad(img, *self.config.resize_imgs_with_padding, pad_value=0) - # Normalize from range [0,1] to [-1,1] as expacted by siglip + # Normalize from range [0,1] to [-1,1] as expected by siglip img = img * 2.0 - 1.0 bsize = img.shape[0] diff --git a/lerobot/common/policies/pi0fast/modeling_pi0fast.py b/lerobot/common/policies/pi0fast/modeling_pi0fast.py index 36aafce9..f19e8c83 100644 --- a/lerobot/common/policies/pi0fast/modeling_pi0fast.py +++ b/lerobot/common/policies/pi0fast/modeling_pi0fast.py @@ -516,7 +516,7 @@ class PI0FAST(nn.Module): interpolate_like_pi=self.config.interpolate_like_pi, ) - # Normalize from range [0,1] to [-1,1] as expacted by siglip + # Normalize from range [0,1] to [-1,1] as expected by siglip img = img * 2.0 - 1.0 bsize = img.shape[0] From 809a9c6de0d424cfd9d80741c800f3e7d0ef563c Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Mon, 26 May 2025 10:48:42 +0200 Subject: [PATCH 3/4] fix(cameras): update docstring + handle sn when starts with 0 + update timeouts to more reasonable value (#1154) --- .../common/cameras/opencv/camera_opencv.py | 20 +++++----- .../cameras/realsense/camera_realsense.py | 40 ++++++++++--------- .../realsense/configuration_realsense.py | 10 ++--- tests/cameras/test_realsense.py | 28 ++++++------- 4 files changed, 51 insertions(+), 47 deletions(-) diff --git a/lerobot/common/cameras/opencv/camera_opencv.py b/lerobot/common/cameras/opencv/camera_opencv.py index 48001f01..d1a83f79 100644 --- a/lerobot/common/cameras/opencv/camera_opencv.py +++ b/lerobot/common/cameras/opencv/camera_opencv.py @@ -370,11 +370,14 @@ class OpenCVCamera(Camera): def _read_loop(self): """ - Internal loop run by the background thread for asynchronous reading. + Internal loop for background thread for asynchronous reading. - Continuously reads frames from the camera using the synchronous `read()` - method and places the latest frame into the `frame_queue`. It overwrites - any previous frame in the queue. + On each iteration: + 1. Reads a color frame + 2. Stores result in latest_frame (thread-safe) + 3. Sets new_frame_event to notify listeners + + Stops on DeviceNotConnectedError, logs other errors and continues. """ while not self.stop_event.is_set(): try: @@ -412,18 +415,17 @@ class OpenCVCamera(Camera): self.thread = None self.stop_event = None - def async_read(self, timeout_ms: float = 2000) -> np.ndarray: + def async_read(self, timeout_ms: float = 200) -> np.ndarray: """ Reads the latest available frame asynchronously. This method retrieves the most recent frame captured by the background read thread. It does not block waiting for the camera hardware directly, - only waits for a frame to appear in the internal queue up to the specified - timeout. + but may wait up to timeout_ms for the background thread to provide a frame. Args: timeout_ms (float): Maximum time in milliseconds to wait for a frame - to become available in the queue. Defaults to 2000ms (2 seconds). + to become available. Defaults to 200ms (0.2 seconds). Returns: np.ndarray: The latest captured frame as a NumPy array in the format @@ -432,7 +434,7 @@ class OpenCVCamera(Camera): Raises: DeviceNotConnectedError: If the camera is not connected. TimeoutError: If no frame becomes available within the specified timeout. - RuntimeError: If an unexpected error occurs while retrieving from the queue. + RuntimeError: If an unexpected error occurs. """ if not self.is_connected: raise DeviceNotConnectedError(f"{self} is not connected.") diff --git a/lerobot/common/cameras/realsense/camera_realsense.py b/lerobot/common/cameras/realsense/camera_realsense.py index 77ec60f5..71238f4e 100644 --- a/lerobot/common/cameras/realsense/camera_realsense.py +++ b/lerobot/common/cameras/realsense/camera_realsense.py @@ -67,7 +67,7 @@ class RealSenseCamera(Camera): from lerobot.common.cameras import ColorMode, Cv2Rotation # Basic usage with serial number - config = RealSenseCameraConfig(serial_number_or_name=1234567890) # Replace with actual SN + config = RealSenseCameraConfig(serial_number_or_name="0123456789") # Replace with actual SN camera = RealSenseCamera(config) camera.connect() @@ -83,7 +83,7 @@ class RealSenseCamera(Camera): # Example with depth capture and custom settings custom_config = RealSenseCameraConfig( - serial_number_or_name=1234567890, # Replace with actual SN + serial_number_or_name="0123456789", # Replace with actual SN fps=30, width=1280, height=720, @@ -116,8 +116,8 @@ class RealSenseCamera(Camera): self.config = config - if isinstance(config.serial_number_or_name, int): - self.serial_number = str(config.serial_number_or_name) + if config.serial_number_or_name.isdigit(): + self.serial_number = config.serial_number_or_name else: self.serial_number = self._find_serial_number_from_name(config.serial_number_or_name) @@ -310,7 +310,7 @@ class RealSenseCamera(Camera): self.width, self.height = actual_width, actual_height self.capture_width, self.capture_height = actual_width, actual_height - def read_depth(self, timeout_ms: int = 100) -> np.ndarray: + def read_depth(self, timeout_ms: int = 200) -> np.ndarray: """ Reads a single frame (depth) synchronously from the camera. @@ -318,7 +318,7 @@ class RealSenseCamera(Camera): from the camera hardware via the RealSense pipeline. Args: - timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms. + timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms. Returns: np.ndarray: The depth map as a NumPy array (height, width) @@ -353,7 +353,7 @@ class RealSenseCamera(Camera): return depth_map_processed - def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 100) -> np.ndarray: + def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 200) -> np.ndarray: """ Reads a single frame (color) synchronously from the camera. @@ -361,7 +361,7 @@ class RealSenseCamera(Camera): from the camera hardware via the RealSense pipeline. Args: - timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms. + timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms. Returns: np.ndarray: The captured color frame as a NumPy array @@ -442,11 +442,14 @@ class RealSenseCamera(Camera): def _read_loop(self): """ - Internal loop run by the background thread for asynchronous reading. + Internal loop for background thread for asynchronous reading. - Continuously reads frames (color and optional depth) using `read()` - and places the latest result (single image or tuple) into the `frame_queue`. - It overwrites any previous frame in the queue. + On each iteration: + 1. Reads a color frame with 500ms timeout + 2. Stores result in latest_frame (thread-safe) + 3. Sets new_frame_event to notify listeners + + Stops on DeviceNotConnectedError, logs other errors and continues. """ while not self.stop_event.is_set(): try: @@ -485,18 +488,17 @@ class RealSenseCamera(Camera): self.stop_event = None # NOTE(Steven): Missing implementation for depth for now - def async_read(self, timeout_ms: float = 100) -> np.ndarray: + def async_read(self, timeout_ms: float = 200) -> np.ndarray: """ - Reads the latest available frame data (color or color+depth) asynchronously. + Reads the latest available frame data (color) asynchronously. - This method retrieves the most recent frame captured by the background + This method retrieves the most recent color frame captured by the background read thread. It does not block waiting for the camera hardware directly, - only waits for a frame to appear in the internal queue up to the specified - timeout. + but may wait up to timeout_ms for the background thread to provide a frame. Args: timeout_ms (float): Maximum time in milliseconds to wait for a frame - to become available in the queue. Defaults to 100ms (0.1 seconds). + to become available. Defaults to 200ms (0.2 seconds). Returns: np.ndarray: @@ -505,7 +507,7 @@ class RealSenseCamera(Camera): Raises: DeviceNotConnectedError: If the camera is not connected. TimeoutError: If no frame data becomes available within the specified timeout. - RuntimeError: If the background thread died unexpectedly or another queue error occurs. + RuntimeError: If the background thread died unexpectedly or another error occurs. """ if not self.is_connected: raise DeviceNotConnectedError(f"{self} is not connected.") diff --git a/lerobot/common/cameras/realsense/configuration_realsense.py b/lerobot/common/cameras/realsense/configuration_realsense.py index 0183965a..82e7c0d3 100644 --- a/lerobot/common/cameras/realsense/configuration_realsense.py +++ b/lerobot/common/cameras/realsense/configuration_realsense.py @@ -28,12 +28,12 @@ class RealSenseCameraConfig(CameraConfig): Example configurations for Intel RealSense D405: ```python # Basic configurations - RealSenseCameraConfig(128422271347, 30, 1280, 720) # 1280x720 @ 30FPS - RealSenseCameraConfig(128422271347, 60, 640, 480) # 640x480 @ 60FPS + RealSenseCameraConfig("0123456789", 30, 1280, 720) # 1280x720 @ 30FPS + RealSenseCameraConfig("0123456789", 60, 640, 480) # 640x480 @ 60FPS # Advanced configurations - RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True) # With depth sensing - RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation + RealSenseCameraConfig("0123456789", 30, 640, 480, use_depth=True) # With depth sensing + RealSenseCameraConfig("0123456789", 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation ``` Attributes: @@ -53,7 +53,7 @@ class RealSenseCameraConfig(CameraConfig): - For `fps`, `width` and `height`, either all of them need to be set, or none of them. """ - serial_number_or_name: int | str + serial_number_or_name: str color_mode: ColorMode = ColorMode.RGB use_depth: bool = False rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION diff --git a/tests/cameras/test_realsense.py b/tests/cameras/test_realsense.py index 4b552c1e..6e04b6c5 100644 --- a/tests/cameras/test_realsense.py +++ b/tests/cameras/test_realsense.py @@ -57,12 +57,12 @@ def fixture_patch_realsense(): def test_abc_implementation(): """Instantiation should raise an error if the class doesn't implement abstract methods/properties.""" - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") _ = RealSenseCamera(config) def test_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -70,7 +70,7 @@ def test_connect(): def test_connect_already_connected(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -80,7 +80,7 @@ def test_connect_already_connected(): def test_connect_invalid_camera_path(patch_realsense): patch_realsense.side_effect = mock_rs_config_enable_device_bad_file - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) with pytest.raises(ConnectionError): @@ -88,7 +88,7 @@ def test_connect_invalid_camera_path(patch_realsense): def test_invalid_width_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=99999, height=480, fps=30) + config = RealSenseCameraConfig(serial_number_or_name="042", width=99999, height=480, fps=30) camera = RealSenseCamera(config) with pytest.raises(ConnectionError): @@ -96,7 +96,7 @@ def test_invalid_width_connect(): def test_read(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) + config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30) camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -105,7 +105,7 @@ def test_read(): def test_read_depth(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30, use_depth=True) + config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30, use_depth=True) camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -114,7 +114,7 @@ def test_read_depth(): def test_read_before_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) with pytest.raises(DeviceNotConnectedError): @@ -122,7 +122,7 @@ def test_read_before_connect(): def test_disconnect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -132,7 +132,7 @@ def test_disconnect(): def test_disconnect_before_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) with pytest.raises(DeviceNotConnectedError): @@ -140,7 +140,7 @@ def test_disconnect_before_connect(): def test_async_read(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) + config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30) camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -156,7 +156,7 @@ def test_async_read(): def test_async_read_timeout(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) + config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30) camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -171,7 +171,7 @@ def test_async_read_timeout(): def test_async_read_before_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) with pytest.raises(DeviceNotConnectedError): @@ -189,7 +189,7 @@ def test_async_read_before_connect(): ids=["no_rot", "rot90", "rot180", "rot270"], ) def test_rotation(rotation): - config = RealSenseCameraConfig(serial_number_or_name=42, rotation=rotation) + config = RealSenseCameraConfig(serial_number_or_name="042", rotation=rotation) camera = RealSenseCamera(config) camera.connect(warmup=False) From fb4bfaf0293fdaa7f60496d221038090fe601921 Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Mon, 26 May 2025 10:51:05 +0200 Subject: [PATCH 4/4] fix(scripts): parser instead of draccus in record + add __get_path_fields__() to RecordConfig (#1155) --- lerobot/record.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lerobot/record.py b/lerobot/record.py index 6ddeb23b..733955b1 100644 --- a/lerobot/record.py +++ b/lerobot/record.py @@ -38,7 +38,6 @@ from dataclasses import asdict, dataclass from pathlib import Path from pprint import pformat -import draccus import numpy as np import rerun as rr @@ -151,6 +150,11 @@ class RecordConfig: self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides) self.policy.pretrained_path = policy_path + @classmethod + def __get_path_fields__(cls) -> list[str]: + """This enables the parser to load config from the policy using `--policy.path=local/dir`""" + return ["policy"] + @safe_stop_image_writer def record_loop( @@ -220,7 +224,7 @@ def record_loop( break -@draccus.wrap() +@parser.wrap() def record(cfg: RecordConfig) -> LeRobotDataset: init_logging() logging.info(pformat(asdict(cfg)))