Merge remote-tracking branch 'origin/user/aliberts/2025_02_25_refactor_robots' into user/aliberts/2025_04_03_add_hope_jr

2025-05-26 17:40:15 +02:00
parent d5ebac067a 78e29f4f20
commit dfa96b4258
11 changed files with 70 additions and 57 deletions
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -370,11 +370,14 @@ class OpenCVCamera(Camera):

    def _read_loop(self):
        """
-        Internal loop run by the background thread for asynchronous reading.
+        Internal loop for background thread for asynchronous reading.

-        Continuously reads frames from the camera using the synchronous `read()`
-        method and places the latest frame into the `frame_queue`. It overwrites
-        any previous frame in the queue.
+        On each iteration:
+        1. Reads a color frame
+        2. Stores result in latest_frame (thread-safe)
+        3. Sets new_frame_event to notify listeners
+
+        Stops on DeviceNotConnectedError, logs other errors and continues.
        """
        while not self.stop_event.is_set():
            try:
@@ -412,18 +415,17 @@ class OpenCVCamera(Camera):
        self.thread = None
        self.stop_event = None

-    def async_read(self, timeout_ms: float = 2000) -> np.ndarray:
+    def async_read(self, timeout_ms: float = 200) -> np.ndarray:
        """
        Reads the latest available frame asynchronously.

        This method retrieves the most recent frame captured by the background
        read thread. It does not block waiting for the camera hardware directly,
-        only waits for a frame to appear in the internal queue up to the specified
-        timeout.
+        but may wait up to timeout_ms for the background thread to provide a frame.

        Args:
            timeout_ms (float): Maximum time in milliseconds to wait for a frame
-                to become available in the queue. Defaults to 2000ms (2 seconds).
+                to become available. Defaults to 200ms (0.2 seconds).

        Returns:
            np.ndarray: The latest captured frame as a NumPy array in the format
@@ -432,7 +434,7 @@ class OpenCVCamera(Camera):
        Raises:
            DeviceNotConnectedError: If the camera is not connected.
            TimeoutError: If no frame becomes available within the specified timeout.
-            RuntimeError: If an unexpected error occurs while retrieving from the queue.
+            RuntimeError: If an unexpected error occurs.
        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"{self} is not connected.")
--- a/lerobot/common/cameras/realsense/camera_realsense.py
+++ b/lerobot/common/cameras/realsense/camera_realsense.py
@@ -67,7 +67,7 @@ class RealSenseCamera(Camera):
        from lerobot.common.cameras import ColorMode, Cv2Rotation

        # Basic usage with serial number
-        config = RealSenseCameraConfig(serial_number_or_name=1234567890) # Replace with actual SN
+        config = RealSenseCameraConfig(serial_number_or_name="0123456789") # Replace with actual SN
        camera = RealSenseCamera(config)
        camera.connect()

@@ -83,7 +83,7 @@ class RealSenseCamera(Camera):

        # Example with depth capture and custom settings
        custom_config = RealSenseCameraConfig(
-            serial_number_or_name=1234567890, # Replace with actual SN
+            serial_number_or_name="0123456789", # Replace with actual SN
            fps=30,
            width=1280,
            height=720,
@@ -116,8 +116,8 @@ class RealSenseCamera(Camera):

        self.config = config

-        if isinstance(config.serial_number_or_name, int):
-            self.serial_number = str(config.serial_number_or_name)
+        if config.serial_number_or_name.isdigit():
+            self.serial_number = config.serial_number_or_name
        else:
            self.serial_number = self._find_serial_number_from_name(config.serial_number_or_name)

@@ -310,7 +310,7 @@ class RealSenseCamera(Camera):
                self.width, self.height = actual_width, actual_height
                self.capture_width, self.capture_height = actual_width, actual_height

-    def read_depth(self, timeout_ms: int = 100) -> np.ndarray:
+    def read_depth(self, timeout_ms: int = 200) -> np.ndarray:
        """
        Reads a single frame (depth) synchronously from the camera.

@@ -318,7 +318,7 @@ class RealSenseCamera(Camera):
        from the camera hardware via the RealSense pipeline.

        Args:
-            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms.
+            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.

        Returns:
            np.ndarray: The depth map as a NumPy array (height, width)
@@ -353,7 +353,7 @@ class RealSenseCamera(Camera):

        return depth_map_processed

-    def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 100) -> np.ndarray:
+    def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 200) -> np.ndarray:
        """
        Reads a single frame (color) synchronously from the camera.

@@ -361,7 +361,7 @@ class RealSenseCamera(Camera):
        from the camera hardware via the RealSense pipeline.

        Args:
-            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms.
+            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.

        Returns:
            np.ndarray: The captured color frame as a NumPy array
@@ -442,11 +442,14 @@ class RealSenseCamera(Camera):

    def _read_loop(self):
        """
-        Internal loop run by the background thread for asynchronous reading.
+        Internal loop for background thread for asynchronous reading.

-        Continuously reads frames (color and optional depth) using `read()`
-        and places the latest result (single image or tuple) into the `frame_queue`.
-        It overwrites any previous frame in the queue.
+        On each iteration:
+        1. Reads a color frame with 500ms timeout
+        2. Stores result in latest_frame (thread-safe)
+        3. Sets new_frame_event to notify listeners
+
+        Stops on DeviceNotConnectedError, logs other errors and continues.
        """
        while not self.stop_event.is_set():
            try:
@@ -485,18 +488,17 @@ class RealSenseCamera(Camera):
        self.stop_event = None

    # NOTE(Steven): Missing implementation for depth for now
-    def async_read(self, timeout_ms: float = 100) -> np.ndarray:
+    def async_read(self, timeout_ms: float = 200) -> np.ndarray:
        """
-        Reads the latest available frame data (color or color+depth) asynchronously.
+        Reads the latest available frame data (color) asynchronously.

-        This method retrieves the most recent frame captured by the background
+        This method retrieves the most recent color frame captured by the background
        read thread. It does not block waiting for the camera hardware directly,
-        only waits for a frame to appear in the internal queue up to the specified
-        timeout.
+        but may wait up to timeout_ms for the background thread to provide a frame.

        Args:
            timeout_ms (float): Maximum time in milliseconds to wait for a frame
-                to become available in the queue. Defaults to 100ms (0.1 seconds).
+                to become available. Defaults to 200ms (0.2 seconds).

        Returns:
            np.ndarray:
@@ -505,7 +507,7 @@ class RealSenseCamera(Camera):
        Raises:
            DeviceNotConnectedError: If the camera is not connected.
            TimeoutError: If no frame data becomes available within the specified timeout.
-            RuntimeError: If the background thread died unexpectedly or another queue error occurs.
+            RuntimeError: If the background thread died unexpectedly or another error occurs.
        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"{self} is not connected.")
--- a/lerobot/common/cameras/realsense/configuration_realsense.py
+++ b/lerobot/common/cameras/realsense/configuration_realsense.py
@@ -28,12 +28,12 @@ class RealSenseCameraConfig(CameraConfig):
    Example configurations for Intel RealSense D405:
    ```python
    # Basic configurations
-    RealSenseCameraConfig(128422271347, 30, 1280, 720)   # 1280x720 @ 30FPS
-    RealSenseCameraConfig(128422271347, 60, 640, 480)   # 640x480 @ 60FPS
+    RealSenseCameraConfig("0123456789", 30, 1280, 720)   # 1280x720 @ 30FPS
+    RealSenseCameraConfig("0123456789", 60, 640, 480)   # 640x480 @ 60FPS

    # Advanced configurations
-    RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)  # With depth sensing
-    RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
+    RealSenseCameraConfig("0123456789", 30, 640, 480, use_depth=True)  # With depth sensing
+    RealSenseCameraConfig("0123456789", 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
    ```

    Attributes:
@@ -53,7 +53,7 @@ class RealSenseCameraConfig(CameraConfig):
        - For `fps`, `width` and `height`, either all of them need to be set, or none of them.
    """

-    serial_number_or_name: int | str
+    serial_number_or_name: str
    color_mode: ColorMode = ColorMode.RGB
    use_depth: bool = False
    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
--- a/lerobot/common/datasets/image_writer.py
+++ b/lerobot/common/datasets/image_writer.py
@@ -106,7 +106,7 @@ def worker_process(queue: queue.Queue, num_threads: int):
 class AsyncImageWriter:
    """
    This class abstract away the initialisation of processes or/and threads to
-    save images on disk asynchrounously, which is critical to control a robot and record data
+    save images on disk asynchronously, which is critical to control a robot and record data
    at a high frame rate.

    When `num_processes=0`, it creates a threads pool of size `num_threads`.
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -932,7 +932,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
    def stop_image_writer(self) -> None:
        """
        Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to
-        remove the image_writer in order for the LeRobotDataset object to be pickleable and parallelized.
+        remove the image_writer in order for the LeRobotDataset object to be picklable and parallelized.
        """
        if self.image_writer is not None:
            self.image_writer.stop()
--- a/lerobot/common/datasets/video_utils.py
+++ b/lerobot/common/datasets/video_utils.py
@@ -101,7 +101,7 @@ def decode_video_frames_torchvision(
    keyframes_only = False
    torchvision.set_video_backend(backend)
    if backend == "pyav":
-        keyframes_only = True  # pyav doesnt support accuracte seek
+        keyframes_only = True  # pyav doesn't support accurate seek

    # set a video stream reader
    # TODO(rcadene): also load audio stream at the same time
--- a/lerobot/common/policies/pi0/modeling_pi0.py
+++ b/lerobot/common/policies/pi0/modeling_pi0.py
@@ -357,7 +357,7 @@ class PI0Policy(PreTrainedPolicy):
            if self.config.resize_imgs_with_padding is not None:
                img = resize_with_pad(img, *self.config.resize_imgs_with_padding, pad_value=0)

-            # Normalize from range [0,1] to [-1,1] as expacted by siglip
+            # Normalize from range [0,1] to [-1,1] as expected by siglip
            img = img * 2.0 - 1.0

            bsize = img.shape[0]
--- a/lerobot/common/policies/pi0fast/modeling_pi0fast.py
+++ b/lerobot/common/policies/pi0fast/modeling_pi0fast.py
@@ -516,7 +516,7 @@ class PI0FAST(nn.Module):
                        interpolate_like_pi=self.config.interpolate_like_pi,
                    )

-                # Normalize from range [0,1] to [-1,1] as expacted by siglip
+                # Normalize from range [0,1] to [-1,1] as expected by siglip
                img = img * 2.0 - 1.0

                bsize = img.shape[0]
--- a/lerobot/common/utils/control_utils.py
+++ b/lerobot/common/utils/control_utils.py
@@ -24,6 +24,7 @@ from contextlib import nullcontext
 from copy import copy
 from functools import cache

+import numpy as np
 import rerun as rr
 import torch
 from deepdiff import DeepDiff
@@ -101,7 +102,9 @@ def is_headless():
        return True


-def predict_action(observation, policy, device, use_amp):
+def predict_action(
+    observation: dict[str, np.ndarray], policy: PreTrainedPolicy, device: torch.device, use_amp: bool
+):
    observation = copy(observation)
    with (
        torch.inference_mode(),
--- a/lerobot/record.py
+++ b/lerobot/record.py
@@ -38,7 +38,6 @@ from dataclasses import asdict, dataclass
 from pathlib import Path
 from pprint import pformat

-import draccus
 import numpy as np
 import rerun as rr

@@ -151,6 +150,11 @@ class RecordConfig:
            self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides)
            self.policy.pretrained_path = policy_path

+    @classmethod
+    def __get_path_fields__(cls) -> list[str]:
+        """This enables the parser to load config from the policy using `--policy.path=local/dir`"""
+        return ["policy"]
+

@safe_stop_image_writer
 def record_loop(
@@ -178,9 +182,12 @@ def record_loop(

        observation = robot.get_observation()

+        if policy is not None or dataset is not None:
+            observation_frame = build_dataset_frame(dataset.features, observation, prefix="observation")
+
        if policy is not None:
            action = predict_action(
-                observation, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp
+                observation_frame, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp
            )
        else:
            action = teleop.get_action()
@@ -190,7 +197,6 @@ def record_loop(
        sent_action = robot.send_action(action)

        if dataset is not None:
-            observation_frame = build_dataset_frame(dataset.features, observation, prefix="observation")
            action_frame = build_dataset_frame(dataset.features, sent_action, prefix="action")
            frame = {**observation_frame, **action_frame}
            dataset.add_frame(frame, task=single_task)
@@ -218,7 +224,7 @@ def record_loop(
            break


-@draccus.wrap()
+@parser.wrap()
 def record(cfg: RecordConfig) -> LeRobotDataset:
    init_logging()
    logging.info(pformat(asdict(cfg)))
--- a/tests/cameras/test_realsense.py
+++ b/tests/cameras/test_realsense.py
@@ -57,12 +57,12 @@ def fixture_patch_realsense():

 def test_abc_implementation():
    """Instantiation should raise an error if the class doesn't implement abstract methods/properties."""
-    config = RealSenseCameraConfig(serial_number_or_name=42)
+    config = RealSenseCameraConfig(serial_number_or_name="042")
    _ = RealSenseCamera(config)


 def test_connect():
-    config = RealSenseCameraConfig(serial_number_or_name=42)
+    config = RealSenseCameraConfig(serial_number_or_name="042")
    camera = RealSenseCamera(config)

    camera.connect(warmup=False)
@@ -70,7 +70,7 @@ def test_connect():


 def test_connect_already_connected():
-    config = RealSenseCameraConfig(serial_number_or_name=42)
+    config = RealSenseCameraConfig(serial_number_or_name="042")
    camera = RealSenseCamera(config)
    camera.connect(warmup=False)

@@ -80,7 +80,7 @@ def test_connect_already_connected():

 def test_connect_invalid_camera_path(patch_realsense):
    patch_realsense.side_effect = mock_rs_config_enable_device_bad_file
-    config = RealSenseCameraConfig(serial_number_or_name=42)
+    config = RealSenseCameraConfig(serial_number_or_name="042")
    camera = RealSenseCamera(config)

    with pytest.raises(ConnectionError):
@@ -88,7 +88,7 @@ def test_connect_invalid_camera_path(patch_realsense):


 def test_invalid_width_connect():
-    config = RealSenseCameraConfig(serial_number_or_name=42, width=99999, height=480, fps=30)
+    config = RealSenseCameraConfig(serial_number_or_name="042", width=99999, height=480, fps=30)
    camera = RealSenseCamera(config)

    with pytest.raises(ConnectionError):
@@ -96,7 +96,7 @@ def test_invalid_width_connect():


 def test_read():
-    config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30)
+    config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30)
    camera = RealSenseCamera(config)
    camera.connect(warmup=False)

@@ -105,7 +105,7 @@ def test_read():


 def test_read_depth():
-    config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30, use_depth=True)
+    config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30, use_depth=True)
    camera = RealSenseCamera(config)
    camera.connect(warmup=False)

@@ -114,7 +114,7 @@ def test_read_depth():


 def test_read_before_connect():
-    config = RealSenseCameraConfig(serial_number_or_name=42)
+    config = RealSenseCameraConfig(serial_number_or_name="042")
    camera = RealSenseCamera(config)

    with pytest.raises(DeviceNotConnectedError):
@@ -122,7 +122,7 @@ def test_read_before_connect():


 def test_disconnect():
-    config = RealSenseCameraConfig(serial_number_or_name=42)
+    config = RealSenseCameraConfig(serial_number_or_name="042")
    camera = RealSenseCamera(config)
    camera.connect(warmup=False)

@@ -132,7 +132,7 @@ def test_disconnect():


 def test_disconnect_before_connect():
-    config = RealSenseCameraConfig(serial_number_or_name=42)
+    config = RealSenseCameraConfig(serial_number_or_name="042")
    camera = RealSenseCamera(config)

    with pytest.raises(DeviceNotConnectedError):
@@ -140,7 +140,7 @@ def test_disconnect_before_connect():


 def test_async_read():
-    config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30)
+    config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30)
    camera = RealSenseCamera(config)
    camera.connect(warmup=False)

@@ -156,7 +156,7 @@ def test_async_read():


 def test_async_read_timeout():
-    config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30)
+    config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30)
    camera = RealSenseCamera(config)
    camera.connect(warmup=False)

@@ -171,7 +171,7 @@ def test_async_read_timeout():


 def test_async_read_before_connect():
-    config = RealSenseCameraConfig(serial_number_or_name=42)
+    config = RealSenseCameraConfig(serial_number_or_name="042")
    camera = RealSenseCamera(config)

    with pytest.raises(DeviceNotConnectedError):
@@ -189,7 +189,7 @@ def test_async_read_before_connect():
    ids=["no_rot", "rot90", "rot180", "rot270"],
 )
 def test_rotation(rotation):
-    config = RealSenseCameraConfig(serial_number_or_name=42, rotation=rotation)
+    config = RealSenseCameraConfig(serial_number_or_name="042", rotation=rotation)
    camera = RealSenseCamera(config)
    camera.connect(warmup=False)