Merge remote-tracking branch 'origin/user/aliberts/2025_02_25_refactor_robots' into user/aliberts/2025_04_03_add_hope_jr

This commit is contained in:
Simon Alibert
2025-05-26 17:40:15 +02:00
11 changed files with 70 additions and 57 deletions

View File

@@ -370,11 +370,14 @@ class OpenCVCamera(Camera):
def _read_loop(self): def _read_loop(self):
""" """
Internal loop run by the background thread for asynchronous reading. Internal loop for background thread for asynchronous reading.
Continuously reads frames from the camera using the synchronous `read()` On each iteration:
method and places the latest frame into the `frame_queue`. It overwrites 1. Reads a color frame
any previous frame in the queue. 2. Stores result in latest_frame (thread-safe)
3. Sets new_frame_event to notify listeners
Stops on DeviceNotConnectedError, logs other errors and continues.
""" """
while not self.stop_event.is_set(): while not self.stop_event.is_set():
try: try:
@@ -412,18 +415,17 @@ class OpenCVCamera(Camera):
self.thread = None self.thread = None
self.stop_event = None self.stop_event = None
def async_read(self, timeout_ms: float = 2000) -> np.ndarray: def async_read(self, timeout_ms: float = 200) -> np.ndarray:
""" """
Reads the latest available frame asynchronously. Reads the latest available frame asynchronously.
This method retrieves the most recent frame captured by the background This method retrieves the most recent frame captured by the background
read thread. It does not block waiting for the camera hardware directly, read thread. It does not block waiting for the camera hardware directly,
only waits for a frame to appear in the internal queue up to the specified but may wait up to timeout_ms for the background thread to provide a frame.
timeout.
Args: Args:
timeout_ms (float): Maximum time in milliseconds to wait for a frame timeout_ms (float): Maximum time in milliseconds to wait for a frame
to become available in the queue. Defaults to 2000ms (2 seconds). to become available. Defaults to 200ms (0.2 seconds).
Returns: Returns:
np.ndarray: The latest captured frame as a NumPy array in the format np.ndarray: The latest captured frame as a NumPy array in the format
@@ -432,7 +434,7 @@ class OpenCVCamera(Camera):
Raises: Raises:
DeviceNotConnectedError: If the camera is not connected. DeviceNotConnectedError: If the camera is not connected.
TimeoutError: If no frame becomes available within the specified timeout. TimeoutError: If no frame becomes available within the specified timeout.
RuntimeError: If an unexpected error occurs while retrieving from the queue. RuntimeError: If an unexpected error occurs.
""" """
if not self.is_connected: if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.") raise DeviceNotConnectedError(f"{self} is not connected.")

View File

@@ -67,7 +67,7 @@ class RealSenseCamera(Camera):
from lerobot.common.cameras import ColorMode, Cv2Rotation from lerobot.common.cameras import ColorMode, Cv2Rotation
# Basic usage with serial number # Basic usage with serial number
config = RealSenseCameraConfig(serial_number_or_name=1234567890) # Replace with actual SN config = RealSenseCameraConfig(serial_number_or_name="0123456789") # Replace with actual SN
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect() camera.connect()
@@ -83,7 +83,7 @@ class RealSenseCamera(Camera):
# Example with depth capture and custom settings # Example with depth capture and custom settings
custom_config = RealSenseCameraConfig( custom_config = RealSenseCameraConfig(
serial_number_or_name=1234567890, # Replace with actual SN serial_number_or_name="0123456789", # Replace with actual SN
fps=30, fps=30,
width=1280, width=1280,
height=720, height=720,
@@ -116,8 +116,8 @@ class RealSenseCamera(Camera):
self.config = config self.config = config
if isinstance(config.serial_number_or_name, int): if config.serial_number_or_name.isdigit():
self.serial_number = str(config.serial_number_or_name) self.serial_number = config.serial_number_or_name
else: else:
self.serial_number = self._find_serial_number_from_name(config.serial_number_or_name) self.serial_number = self._find_serial_number_from_name(config.serial_number_or_name)
@@ -310,7 +310,7 @@ class RealSenseCamera(Camera):
self.width, self.height = actual_width, actual_height self.width, self.height = actual_width, actual_height
self.capture_width, self.capture_height = actual_width, actual_height self.capture_width, self.capture_height = actual_width, actual_height
def read_depth(self, timeout_ms: int = 100) -> np.ndarray: def read_depth(self, timeout_ms: int = 200) -> np.ndarray:
""" """
Reads a single frame (depth) synchronously from the camera. Reads a single frame (depth) synchronously from the camera.
@@ -318,7 +318,7 @@ class RealSenseCamera(Camera):
from the camera hardware via the RealSense pipeline. from the camera hardware via the RealSense pipeline.
Args: Args:
timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms. timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.
Returns: Returns:
np.ndarray: The depth map as a NumPy array (height, width) np.ndarray: The depth map as a NumPy array (height, width)
@@ -353,7 +353,7 @@ class RealSenseCamera(Camera):
return depth_map_processed return depth_map_processed
def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 100) -> np.ndarray: def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 200) -> np.ndarray:
""" """
Reads a single frame (color) synchronously from the camera. Reads a single frame (color) synchronously from the camera.
@@ -361,7 +361,7 @@ class RealSenseCamera(Camera):
from the camera hardware via the RealSense pipeline. from the camera hardware via the RealSense pipeline.
Args: Args:
timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms. timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.
Returns: Returns:
np.ndarray: The captured color frame as a NumPy array np.ndarray: The captured color frame as a NumPy array
@@ -442,11 +442,14 @@ class RealSenseCamera(Camera):
def _read_loop(self): def _read_loop(self):
""" """
Internal loop run by the background thread for asynchronous reading. Internal loop for background thread for asynchronous reading.
Continuously reads frames (color and optional depth) using `read()` On each iteration:
and places the latest result (single image or tuple) into the `frame_queue`. 1. Reads a color frame with 500ms timeout
It overwrites any previous frame in the queue. 2. Stores result in latest_frame (thread-safe)
3. Sets new_frame_event to notify listeners
Stops on DeviceNotConnectedError, logs other errors and continues.
""" """
while not self.stop_event.is_set(): while not self.stop_event.is_set():
try: try:
@@ -485,18 +488,17 @@ class RealSenseCamera(Camera):
self.stop_event = None self.stop_event = None
# NOTE(Steven): Missing implementation for depth for now # NOTE(Steven): Missing implementation for depth for now
def async_read(self, timeout_ms: float = 100) -> np.ndarray: def async_read(self, timeout_ms: float = 200) -> np.ndarray:
""" """
Reads the latest available frame data (color or color+depth) asynchronously. Reads the latest available frame data (color) asynchronously.
This method retrieves the most recent frame captured by the background This method retrieves the most recent color frame captured by the background
read thread. It does not block waiting for the camera hardware directly, read thread. It does not block waiting for the camera hardware directly,
only waits for a frame to appear in the internal queue up to the specified but may wait up to timeout_ms for the background thread to provide a frame.
timeout.
Args: Args:
timeout_ms (float): Maximum time in milliseconds to wait for a frame timeout_ms (float): Maximum time in milliseconds to wait for a frame
to become available in the queue. Defaults to 100ms (0.1 seconds). to become available. Defaults to 200ms (0.2 seconds).
Returns: Returns:
np.ndarray: np.ndarray:
@@ -505,7 +507,7 @@ class RealSenseCamera(Camera):
Raises: Raises:
DeviceNotConnectedError: If the camera is not connected. DeviceNotConnectedError: If the camera is not connected.
TimeoutError: If no frame data becomes available within the specified timeout. TimeoutError: If no frame data becomes available within the specified timeout.
RuntimeError: If the background thread died unexpectedly or another queue error occurs. RuntimeError: If the background thread died unexpectedly or another error occurs.
""" """
if not self.is_connected: if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.") raise DeviceNotConnectedError(f"{self} is not connected.")

View File

@@ -28,12 +28,12 @@ class RealSenseCameraConfig(CameraConfig):
Example configurations for Intel RealSense D405: Example configurations for Intel RealSense D405:
```python ```python
# Basic configurations # Basic configurations
RealSenseCameraConfig(128422271347, 30, 1280, 720) # 1280x720 @ 30FPS RealSenseCameraConfig("0123456789", 30, 1280, 720) # 1280x720 @ 30FPS
RealSenseCameraConfig(128422271347, 60, 640, 480) # 640x480 @ 60FPS RealSenseCameraConfig("0123456789", 60, 640, 480) # 640x480 @ 60FPS
# Advanced configurations # Advanced configurations
RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True) # With depth sensing RealSenseCameraConfig("0123456789", 30, 640, 480, use_depth=True) # With depth sensing
RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation RealSenseCameraConfig("0123456789", 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation
``` ```
Attributes: Attributes:
@@ -53,7 +53,7 @@ class RealSenseCameraConfig(CameraConfig):
- For `fps`, `width` and `height`, either all of them need to be set, or none of them. - For `fps`, `width` and `height`, either all of them need to be set, or none of them.
""" """
serial_number_or_name: int | str serial_number_or_name: str
color_mode: ColorMode = ColorMode.RGB color_mode: ColorMode = ColorMode.RGB
use_depth: bool = False use_depth: bool = False
rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION

View File

@@ -106,7 +106,7 @@ def worker_process(queue: queue.Queue, num_threads: int):
class AsyncImageWriter: class AsyncImageWriter:
""" """
This class abstract away the initialisation of processes or/and threads to This class abstract away the initialisation of processes or/and threads to
save images on disk asynchrounously, which is critical to control a robot and record data save images on disk asynchronously, which is critical to control a robot and record data
at a high frame rate. at a high frame rate.
When `num_processes=0`, it creates a threads pool of size `num_threads`. When `num_processes=0`, it creates a threads pool of size `num_threads`.

View File

@@ -932,7 +932,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
def stop_image_writer(self) -> None: def stop_image_writer(self) -> None:
""" """
Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to
remove the image_writer in order for the LeRobotDataset object to be pickleable and parallelized. remove the image_writer in order for the LeRobotDataset object to be picklable and parallelized.
""" """
if self.image_writer is not None: if self.image_writer is not None:
self.image_writer.stop() self.image_writer.stop()

View File

@@ -101,7 +101,7 @@ def decode_video_frames_torchvision(
keyframes_only = False keyframes_only = False
torchvision.set_video_backend(backend) torchvision.set_video_backend(backend)
if backend == "pyav": if backend == "pyav":
keyframes_only = True # pyav doesnt support accuracte seek keyframes_only = True # pyav doesn't support accurate seek
# set a video stream reader # set a video stream reader
# TODO(rcadene): also load audio stream at the same time # TODO(rcadene): also load audio stream at the same time

View File

@@ -357,7 +357,7 @@ class PI0Policy(PreTrainedPolicy):
if self.config.resize_imgs_with_padding is not None: if self.config.resize_imgs_with_padding is not None:
img = resize_with_pad(img, *self.config.resize_imgs_with_padding, pad_value=0) img = resize_with_pad(img, *self.config.resize_imgs_with_padding, pad_value=0)
# Normalize from range [0,1] to [-1,1] as expacted by siglip # Normalize from range [0,1] to [-1,1] as expected by siglip
img = img * 2.0 - 1.0 img = img * 2.0 - 1.0
bsize = img.shape[0] bsize = img.shape[0]

View File

@@ -516,7 +516,7 @@ class PI0FAST(nn.Module):
interpolate_like_pi=self.config.interpolate_like_pi, interpolate_like_pi=self.config.interpolate_like_pi,
) )
# Normalize from range [0,1] to [-1,1] as expacted by siglip # Normalize from range [0,1] to [-1,1] as expected by siglip
img = img * 2.0 - 1.0 img = img * 2.0 - 1.0
bsize = img.shape[0] bsize = img.shape[0]

View File

@@ -24,6 +24,7 @@ from contextlib import nullcontext
from copy import copy from copy import copy
from functools import cache from functools import cache
import numpy as np
import rerun as rr import rerun as rr
import torch import torch
from deepdiff import DeepDiff from deepdiff import DeepDiff
@@ -101,7 +102,9 @@ def is_headless():
return True return True
def predict_action(observation, policy, device, use_amp): def predict_action(
observation: dict[str, np.ndarray], policy: PreTrainedPolicy, device: torch.device, use_amp: bool
):
observation = copy(observation) observation = copy(observation)
with ( with (
torch.inference_mode(), torch.inference_mode(),

View File

@@ -38,7 +38,6 @@ from dataclasses import asdict, dataclass
from pathlib import Path from pathlib import Path
from pprint import pformat from pprint import pformat
import draccus
import numpy as np import numpy as np
import rerun as rr import rerun as rr
@@ -151,6 +150,11 @@ class RecordConfig:
self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides) self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides)
self.policy.pretrained_path = policy_path self.policy.pretrained_path = policy_path
@classmethod
def __get_path_fields__(cls) -> list[str]:
"""This enables the parser to load config from the policy using `--policy.path=local/dir`"""
return ["policy"]
@safe_stop_image_writer @safe_stop_image_writer
def record_loop( def record_loop(
@@ -178,9 +182,12 @@ def record_loop(
observation = robot.get_observation() observation = robot.get_observation()
if policy is not None or dataset is not None:
observation_frame = build_dataset_frame(dataset.features, observation, prefix="observation")
if policy is not None: if policy is not None:
action = predict_action( action = predict_action(
observation, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp observation_frame, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp
) )
else: else:
action = teleop.get_action() action = teleop.get_action()
@@ -190,7 +197,6 @@ def record_loop(
sent_action = robot.send_action(action) sent_action = robot.send_action(action)
if dataset is not None: if dataset is not None:
observation_frame = build_dataset_frame(dataset.features, observation, prefix="observation")
action_frame = build_dataset_frame(dataset.features, sent_action, prefix="action") action_frame = build_dataset_frame(dataset.features, sent_action, prefix="action")
frame = {**observation_frame, **action_frame} frame = {**observation_frame, **action_frame}
dataset.add_frame(frame, task=single_task) dataset.add_frame(frame, task=single_task)
@@ -218,7 +224,7 @@ def record_loop(
break break
@draccus.wrap() @parser.wrap()
def record(cfg: RecordConfig) -> LeRobotDataset: def record(cfg: RecordConfig) -> LeRobotDataset:
init_logging() init_logging()
logging.info(pformat(asdict(cfg))) logging.info(pformat(asdict(cfg)))

View File

@@ -57,12 +57,12 @@ def fixture_patch_realsense():
def test_abc_implementation(): def test_abc_implementation():
"""Instantiation should raise an error if the class doesn't implement abstract methods/properties.""" """Instantiation should raise an error if the class doesn't implement abstract methods/properties."""
config = RealSenseCameraConfig(serial_number_or_name=42) config = RealSenseCameraConfig(serial_number_or_name="042")
_ = RealSenseCamera(config) _ = RealSenseCamera(config)
def test_connect(): def test_connect():
config = RealSenseCameraConfig(serial_number_or_name=42) config = RealSenseCameraConfig(serial_number_or_name="042")
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect(warmup=False) camera.connect(warmup=False)
@@ -70,7 +70,7 @@ def test_connect():
def test_connect_already_connected(): def test_connect_already_connected():
config = RealSenseCameraConfig(serial_number_or_name=42) config = RealSenseCameraConfig(serial_number_or_name="042")
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect(warmup=False) camera.connect(warmup=False)
@@ -80,7 +80,7 @@ def test_connect_already_connected():
def test_connect_invalid_camera_path(patch_realsense): def test_connect_invalid_camera_path(patch_realsense):
patch_realsense.side_effect = mock_rs_config_enable_device_bad_file patch_realsense.side_effect = mock_rs_config_enable_device_bad_file
config = RealSenseCameraConfig(serial_number_or_name=42) config = RealSenseCameraConfig(serial_number_or_name="042")
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
with pytest.raises(ConnectionError): with pytest.raises(ConnectionError):
@@ -88,7 +88,7 @@ def test_connect_invalid_camera_path(patch_realsense):
def test_invalid_width_connect(): def test_invalid_width_connect():
config = RealSenseCameraConfig(serial_number_or_name=42, width=99999, height=480, fps=30) config = RealSenseCameraConfig(serial_number_or_name="042", width=99999, height=480, fps=30)
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
with pytest.raises(ConnectionError): with pytest.raises(ConnectionError):
@@ -96,7 +96,7 @@ def test_invalid_width_connect():
def test_read(): def test_read():
config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30)
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect(warmup=False) camera.connect(warmup=False)
@@ -105,7 +105,7 @@ def test_read():
def test_read_depth(): def test_read_depth():
config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30, use_depth=True) config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30, use_depth=True)
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect(warmup=False) camera.connect(warmup=False)
@@ -114,7 +114,7 @@ def test_read_depth():
def test_read_before_connect(): def test_read_before_connect():
config = RealSenseCameraConfig(serial_number_or_name=42) config = RealSenseCameraConfig(serial_number_or_name="042")
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
with pytest.raises(DeviceNotConnectedError): with pytest.raises(DeviceNotConnectedError):
@@ -122,7 +122,7 @@ def test_read_before_connect():
def test_disconnect(): def test_disconnect():
config = RealSenseCameraConfig(serial_number_or_name=42) config = RealSenseCameraConfig(serial_number_or_name="042")
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect(warmup=False) camera.connect(warmup=False)
@@ -132,7 +132,7 @@ def test_disconnect():
def test_disconnect_before_connect(): def test_disconnect_before_connect():
config = RealSenseCameraConfig(serial_number_or_name=42) config = RealSenseCameraConfig(serial_number_or_name="042")
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
with pytest.raises(DeviceNotConnectedError): with pytest.raises(DeviceNotConnectedError):
@@ -140,7 +140,7 @@ def test_disconnect_before_connect():
def test_async_read(): def test_async_read():
config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30)
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect(warmup=False) camera.connect(warmup=False)
@@ -156,7 +156,7 @@ def test_async_read():
def test_async_read_timeout(): def test_async_read_timeout():
config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30) config = RealSenseCameraConfig(serial_number_or_name="042", width=640, height=480, fps=30)
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect(warmup=False) camera.connect(warmup=False)
@@ -171,7 +171,7 @@ def test_async_read_timeout():
def test_async_read_before_connect(): def test_async_read_before_connect():
config = RealSenseCameraConfig(serial_number_or_name=42) config = RealSenseCameraConfig(serial_number_or_name="042")
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
with pytest.raises(DeviceNotConnectedError): with pytest.raises(DeviceNotConnectedError):
@@ -189,7 +189,7 @@ def test_async_read_before_connect():
ids=["no_rot", "rot90", "rot180", "rot270"], ids=["no_rot", "rot90", "rot180", "rot270"],
) )
def test_rotation(rotation): def test_rotation(rotation):
config = RealSenseCameraConfig(serial_number_or_name=42, rotation=rotation) config = RealSenseCameraConfig(serial_number_or_name="042", rotation=rotation)
camera = RealSenseCamera(config) camera = RealSenseCamera(config)
camera.connect(warmup=False) camera.connect(warmup=False)