diff --git a/lerobot/common/cameras/opencv/camera_opencv.py b/lerobot/common/cameras/opencv/camera_opencv.py index 48001f01..d1a83f79 100644 --- a/lerobot/common/cameras/opencv/camera_opencv.py +++ b/lerobot/common/cameras/opencv/camera_opencv.py @@ -370,11 +370,14 @@ class OpenCVCamera(Camera): def _read_loop(self): """ - Internal loop run by the background thread for asynchronous reading. + Internal loop for background thread for asynchronous reading. - Continuously reads frames from the camera using the synchronous `read()` - method and places the latest frame into the `frame_queue`. It overwrites - any previous frame in the queue. + On each iteration: + 1. Reads a color frame + 2. Stores result in latest_frame (thread-safe) + 3. Sets new_frame_event to notify listeners + + Stops on DeviceNotConnectedError, logs other errors and continues. """ while not self.stop_event.is_set(): try: @@ -412,18 +415,17 @@ class OpenCVCamera(Camera): self.thread = None self.stop_event = None - def async_read(self, timeout_ms: float = 2000) -> np.ndarray: + def async_read(self, timeout_ms: float = 200) -> np.ndarray: """ Reads the latest available frame asynchronously. This method retrieves the most recent frame captured by the background read thread. It does not block waiting for the camera hardware directly, - only waits for a frame to appear in the internal queue up to the specified - timeout. + but may wait up to timeout_ms for the background thread to provide a frame. Args: timeout_ms (float): Maximum time in milliseconds to wait for a frame - to become available in the queue. Defaults to 2000ms (2 seconds). + to become available. Defaults to 200ms (0.2 seconds). Returns: np.ndarray: The latest captured frame as a NumPy array in the format @@ -432,7 +434,7 @@ class OpenCVCamera(Camera): Raises: DeviceNotConnectedError: If the camera is not connected. TimeoutError: If no frame becomes available within the specified timeout. - RuntimeError: If an unexpected error occurs while retrieving from the queue. + RuntimeError: If an unexpected error occurs. """ if not self.is_connected: raise DeviceNotConnectedError(f"{self} is not connected.") diff --git a/lerobot/common/cameras/realsense/camera_realsense.py b/lerobot/common/cameras/realsense/camera_realsense.py index 77ec60f5..71238f4e 100644 --- a/lerobot/common/cameras/realsense/camera_realsense.py +++ b/lerobot/common/cameras/realsense/camera_realsense.py @@ -67,7 +67,7 @@ class RealSenseCamera(Camera): from lerobot.common.cameras import ColorMode, Cv2Rotation # Basic usage with serial number - config = RealSenseCameraConfig(serial_number_or_name=1234567890) # Replace with actual SN + config = RealSenseCameraConfig(serial_number_or_name="0123456789") # Replace with actual SN camera = RealSenseCamera(config) camera.connect() @@ -83,7 +83,7 @@ class RealSenseCamera(Camera): # Example with depth capture and custom settings custom_config = RealSenseCameraConfig( - serial_number_or_name=1234567890, # Replace with actual SN + serial_number_or_name="0123456789", # Replace with actual SN fps=30, width=1280, height=720, @@ -116,8 +116,8 @@ class RealSenseCamera(Camera): self.config = config - if isinstance(config.serial_number_or_name, int): - self.serial_number = str(config.serial_number_or_name) + if config.serial_number_or_name.isdigit(): + self.serial_number = config.serial_number_or_name else: self.serial_number = self._find_serial_number_from_name(config.serial_number_or_name) @@ -310,7 +310,7 @@ class RealSenseCamera(Camera): self.width, self.height = actual_width, actual_height self.capture_width, self.capture_height = actual_width, actual_height - def read_depth(self, timeout_ms: int = 100) -> np.ndarray: + def read_depth(self, timeout_ms: int = 200) -> np.ndarray: """ Reads a single frame (depth) synchronously from the camera. @@ -318,7 +318,7 @@ class RealSenseCamera(Camera): from the camera hardware via the RealSense pipeline. Args: - timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms. + timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms. Returns: np.ndarray: The depth map as a NumPy array (height, width) @@ -353,7 +353,7 @@ class RealSenseCamera(Camera): return depth_map_processed - def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 100) -> np.ndarray: + def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 200) -> np.ndarray: """ Reads a single frame (color) synchronously from the camera. @@ -361,7 +361,7 @@ class RealSenseCamera(Camera): from the camera hardware via the RealSense pipeline. Args: - timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms. + timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms. Returns: np.ndarray: The captured color frame as a NumPy array @@ -442,11 +442,14 @@ class RealSenseCamera(Camera): def _read_loop(self): """ - Internal loop run by the background thread for asynchronous reading. + Internal loop for background thread for asynchronous reading. - Continuously reads frames (color and optional depth) using `read()` - and places the latest result (single image or tuple) into the `frame_queue`. - It overwrites any previous frame in the queue. + On each iteration: + 1. Reads a color frame with 500ms timeout + 2. Stores result in latest_frame (thread-safe) + 3. Sets new_frame_event to notify listeners + + Stops on DeviceNotConnectedError, logs other errors and continues. """ while not self.stop_event.is_set(): try: @@ -485,18 +488,17 @@ class RealSenseCamera(Camera): self.stop_event = None # NOTE(Steven): Missing implementation for depth for now - def async_read(self, timeout_ms: float = 100) -> np.ndarray: + def async_read(self, timeout_ms: float = 200) -> np.ndarray: """ - Reads the latest available frame data (color or color+depth) asynchronously. + Reads the latest available frame data (color) asynchronously. - This method retrieves the most recent frame captured by the background + This method retrieves the most recent color frame captured by the background read thread. It does not block waiting for the camera hardware directly, - only waits for a frame to appear in the internal queue up to the specified - timeout. + but may wait up to timeout_ms for the background thread to provide a frame. Args: timeout_ms (float): Maximum time in milliseconds to wait for a frame - to become available in the queue. Defaults to 100ms (0.1 seconds). + to become available. Defaults to 200ms (0.2 seconds). Returns: np.ndarray: @@ -505,7 +507,7 @@ class RealSenseCamera(Camera): Raises: DeviceNotConnectedError: If the camera is not connected. TimeoutError: If no frame data becomes available within the specified timeout. - RuntimeError: If the background thread died unexpectedly or another queue error occurs. + RuntimeError: If the background thread died unexpectedly or another error occurs. """ if not self.is_connected: raise DeviceNotConnectedError(f"{self} is not connected.") diff --git a/lerobot/common/cameras/realsense/configuration_realsense.py b/lerobot/common/cameras/realsense/configuration_realsense.py index 0183965a..82e7c0d3 100644 --- a/lerobot/common/cameras/realsense/configuration_realsense.py +++ b/lerobot/common/cameras/realsense/configuration_realsense.py @@ -28,12 +28,12 @@ class RealSenseCameraConfig(CameraConfig): Example configurations for Intel RealSense D405: ```python # Basic configurations - RealSenseCameraConfig(128422271347, 30, 1280, 720) # 1280x720 @ 30FPS - RealSenseCameraConfig(128422271347, 60, 640, 480) # 640x480 @ 60FPS + RealSenseCameraConfig("0123456789", 30, 1280, 720) # 1280x720 @ 30FPS + RealSenseCameraConfig("0123456789", 60, 640, 480) # 640x480 @ 60FPS # Advanced configurations - RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True) # With depth sensing - RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation + RealSenseCameraConfig("0123456789", 30, 640, 480, use_depth=True) # With depth sensing + RealSenseCameraConfig("0123456789", 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation ``` Attributes: @@ -53,7 +53,7 @@ class RealSenseCameraConfig(CameraConfig): - For `fps`, `width` and `height`, either all of them need to be set, or none of them. """ - serial_number_or_name: int | str + serial_number_or_name: str color_mode: ColorMode = ColorMode.RGB use_depth: bool = False rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py index 6fc0ee2f..4a4e1ab0 100644 --- a/lerobot/common/datasets/image_writer.py +++ b/lerobot/common/datasets/image_writer.py @@ -106,7 +106,7 @@ def worker_process(queue: queue.Queue, num_threads: int): class AsyncImageWriter: """ This class abstract away the initialisation of processes or/and threads to - save images on disk asynchrounously, which is critical to control a robot and record data + save images on disk asynchronously, which is critical to control a robot and record data at a high frame rate. When `num_processes=0`, it creates a threads pool of size `num_threads`. diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 9d922c8a..817ab39f 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -932,7 +932,7 @@ class LeRobotDataset(torch.utils.data.Dataset): def stop_image_writer(self) -> None: """ Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to - remove the image_writer in order for the LeRobotDataset object to be pickleable and parallelized. + remove the image_writer in order for the LeRobotDataset object to be picklable and parallelized. """ if self.image_writer is not None: self.image_writer.stop() diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index 375314e9..3a77f36e 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -101,7 +101,7 @@ def decode_video_frames_torchvision( keyframes_only = False torchvision.set_video_backend(backend) if backend == "pyav": - keyframes_only = True # pyav doesnt support accuracte seek + keyframes_only = True # pyav doesn't support accurate seek # set a video stream reader # TODO(rcadene): also load audio stream at the same time diff --git a/lerobot/common/policies/pi0/modeling_pi0.py b/lerobot/common/policies/pi0/modeling_pi0.py index aeb63398..1d8a5055 100644 --- a/lerobot/common/policies/pi0/modeling_pi0.py +++ b/lerobot/common/policies/pi0/modeling_pi0.py @@ -357,7 +357,7 @@ class PI0Policy(PreTrainedPolicy): if self.config.resize_imgs_with_padding is not None: img = resize_with_pad(img, *self.config.resize_imgs_with_padding, pad_value=0) - # Normalize from range [0,1] to [-1,1] as expacted by siglip + # Normalize from range [0,1] to [-1,1] as expected by siglip img = img * 2.0 - 1.0 bsize = img.shape[0] diff --git a/lerobot/common/policies/pi0fast/modeling_pi0fast.py b/lerobot/common/policies/pi0fast/modeling_pi0fast.py index 36aafce9..f19e8c83 100644 --- a/lerobot/common/policies/pi0fast/modeling_pi0fast.py +++ b/lerobot/common/policies/pi0fast/modeling_pi0fast.py @@ -516,7 +516,7 @@ class PI0FAST(nn.Module): interpolate_like_pi=self.config.interpolate_like_pi, ) - # Normalize from range [0,1] to [-1,1] as expacted by siglip + # Normalize from range [0,1] to [-1,1] as expected by siglip img = img * 2.0 - 1.0 bsize = img.shape[0] diff --git a/lerobot/common/utils/control_utils.py b/lerobot/common/utils/control_utils.py index d4790a57..012f9f27 100644 --- a/lerobot/common/utils/control_utils.py +++ b/lerobot/common/utils/control_utils.py @@ -24,6 +24,7 @@ from contextlib import nullcontext from copy import copy from functools import cache +import numpy as np import rerun as rr import torch from deepdiff import DeepDiff @@ -101,7 +102,9 @@ def is_headless(): return True -def predict_action(observation, policy, device, use_amp): +def predict_action( + observation: dict[str, np.ndarray], policy: PreTrainedPolicy, device: torch.device, use_amp: bool +): observation = copy(observation) with ( torch.inference_mode(), diff --git a/lerobot/record.py b/lerobot/record.py index 9d07f88b..733955b1 100644 --- a/lerobot/record.py +++ b/lerobot/record.py @@ -38,7 +38,6 @@ from dataclasses import asdict, dataclass from pathlib import Path from pprint import pformat -import draccus import numpy as np import rerun as rr @@ -151,6 +150,11 @@ class RecordConfig: self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides) self.policy.pretrained_path = policy_path + @classmethod + def __get_path_fields__(cls) -> list[str]: + """This enables the parser to load config from the policy using `--policy.path=local/dir`""" + return ["policy"] + @safe_stop_image_writer def record_loop( @@ -178,9 +182,12 @@ def record_loop( observation = robot.get_observation() + if policy is not None or dataset is not None: + observation_frame = build_dataset_frame(dataset.features, observation, prefix="observation") + if policy is not None: action = predict_action( - observation, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp + observation_frame, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp ) else: action = teleop.get_action() @@ -190,7 +197,6 @@ def record_loop( sent_action = robot.send_action(action) if dataset is not None: - observation_frame = build_dataset_frame(dataset.features, observation, prefix="observation") action_frame = build_dataset_frame(dataset.features, sent_action, prefix="action") frame = {**observation_frame, **action_frame} dataset.add_frame(frame, task=single_task) @@ -218,7 +224,7 @@ def record_loop( break -@draccus.wrap() +@parser.wrap() def record(cfg: RecordConfig) -> LeRobotDataset: init_logging() logging.info(pformat(asdict(cfg))) diff --git a/tests/cameras/test_realsense.py b/tests/cameras/test_realsense.py index 4b552c1e..6e04b6c5 100644 --- a/tests/cameras/test_realsense.py +++ b/tests/cameras/test_realsense.py @@ -57,12 +57,12 @@ def fixture_patch_realsense(): def test_abc_implementation(): """Instantiation should raise an error if the class doesn't implement abstract methods/properties.""" - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") _ = RealSenseCamera(config) def test_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -70,7 +70,7 @@ def test_connect(): def test_connect_already_connected(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -80,7 +80,7 @@ def test_connect_already_connected(): def test_connect_invalid_camera_path(patch_realsense): patch_realsense.side_effect = mock_rs_config_enable_device_bad_file - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) with pytest.raises(ConnectionError): @@ -88,7 +88,7 @@ def test_connect_invalid_camera_path(patch_realsense): def test_invalid_width_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=99999, height=480, fps=30) + config = RealSenseCameraConfig(serial_number_or_name="042", width=99999, height=480, fps=30) camera = RealSenseCamera(config) with pytest.raises(ConnectionError): @@ -96,7 +96,7 @@ def test_invalid_width_connect(): def test_read(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) + config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30) camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -105,7 +105,7 @@ def test_read(): def test_read_depth(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30, use_depth=True) + config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30, use_depth=True) camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -114,7 +114,7 @@ def test_read_depth(): def test_read_before_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) with pytest.raises(DeviceNotConnectedError): @@ -122,7 +122,7 @@ def test_read_before_connect(): def test_disconnect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -132,7 +132,7 @@ def test_disconnect(): def test_disconnect_before_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) with pytest.raises(DeviceNotConnectedError): @@ -140,7 +140,7 @@ def test_disconnect_before_connect(): def test_async_read(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) + config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30) camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -156,7 +156,7 @@ def test_async_read(): def test_async_read_timeout(): - config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) + config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30) camera = RealSenseCamera(config) camera.connect(warmup=False) @@ -171,7 +171,7 @@ def test_async_read_timeout(): def test_async_read_before_connect(): - config = RealSenseCameraConfig(serial_number_or_name=42) + config = RealSenseCameraConfig(serial_number_or_name="042") camera = RealSenseCamera(config) with pytest.raises(DeviceNotConnectedError): @@ -189,7 +189,7 @@ def test_async_read_before_connect(): ids=["no_rot", "rot90", "rot180", "rot270"], ) def test_rotation(rotation): - config = RealSenseCameraConfig(serial_number_or_name=42, rotation=rotation) + config = RealSenseCameraConfig(serial_number_or_name="042", rotation=rotation) camera = RealSenseCamera(config) camera.connect(warmup=False)