From 859a369b29d49c77989b6025c1da35c62b8ae5e8 Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Wed, 14 May 2025 15:44:38 +0200 Subject: [PATCH] chore(docs): adress notes + add docs in camera code --- lerobot/common/cameras/__init__.py | 14 ++++++ lerobot/common/cameras/camera.py | 1 - lerobot/common/cameras/intel/__init__.py | 14 ++++++ .../common/cameras/intel/camera_realsense.py | 29 +++++------- .../cameras/intel/configuration_realsense.py | 41 ++++++++++++---- lerobot/common/cameras/opencv/__init__.py | 14 ++++++ .../common/cameras/opencv/camera_opencv.py | 9 ++-- .../cameras/opencv/configuration_opencv.py | 47 ++++++++++++++++--- lerobot/find_cameras.py | 30 ++++++------ tests/cameras/test_opencv.py | 2 +- tests/cameras/test_realsense.py | 4 +- 11 files changed, 149 insertions(+), 56 deletions(-) diff --git a/lerobot/common/cameras/__init__.py b/lerobot/common/cameras/__init__.py index f115e95f..272c4e9e 100644 --- a/lerobot/common/cameras/__init__.py +++ b/lerobot/common/cameras/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .camera import Camera from .configs import CameraConfig from .utils import make_cameras_from_configs diff --git a/lerobot/common/cameras/camera.py b/lerobot/common/cameras/camera.py index d25bedb1..8d88794c 100644 --- a/lerobot/common/cameras/camera.py +++ b/lerobot/common/cameras/camera.py @@ -21,7 +21,6 @@ import numpy as np from .configs import CameraConfig, ColorMode -# NOTE(Steven): Consider something like configure() if makes sense for both cameras class Camera(abc.ABC): def __init__(self, config: CameraConfig): self.fps: int | None = config.fps diff --git a/lerobot/common/cameras/intel/__init__.py b/lerobot/common/cameras/intel/__init__.py index 5786667c..67f2f400 100644 --- a/lerobot/common/cameras/intel/__init__.py +++ b/lerobot/common/cameras/intel/__init__.py @@ -1,2 +1,16 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .camera_realsense import RealSenseCamera from .configuration_realsense import RealSenseCameraConfig diff --git a/lerobot/common/cameras/intel/camera_realsense.py b/lerobot/common/cameras/intel/camera_realsense.py index 274eda84..1f39e299 100644 --- a/lerobot/common/cameras/intel/camera_realsense.py +++ b/lerobot/common/cameras/intel/camera_realsense.py @@ -22,7 +22,7 @@ import math import queue import time from threading import Event, Thread -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List import cv2 import numpy as np @@ -49,6 +49,11 @@ class RealSenseCamera(Camera): indices, especially on Linux. It also supports capturing depth maps alongside color frames. + Use the provided utility script to find available camera indices and default profiles: + ```bash + python -m lerobot.find_cameras + ``` + A `RealSenseCamera` instance requires a configuration object specifying the camera's serial number or a unique device name. If using the name, ensure only one camera with that name is connected. @@ -120,7 +125,7 @@ class RealSenseCamera(Camera): self.config = config - if config.name is not None: # TODO(Steven): Do we want to continue supporting this? + if config.name is not None: # NOTE(Steven): Do we want to continue supporting this? self.serial_number = self._find_serial_number_from_name(config.name) elif config.serial_number is not None: self.serial_number = str(config.serial_number) @@ -143,8 +148,6 @@ class RealSenseCamera(Camera): self.rotation: int | None = get_cv2_rotation(config.rotation) - # NOTE(Steven): What happens if rotation is specified but we leave width and height to None? - # NOTE(Steven): Should we enforce these parameters if rotation is set? if self.height and self.width: if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]: self.prerotated_width, self.prerotated_height = self.height, self.width @@ -301,7 +304,6 @@ class RealSenseCamera(Camera): self.rs_profile.get_stream(rs.stream.depth).as_video_stream_profile() ) - # NOTE(Steven): Add a wamr-up period time config def connect(self, do_warmup_read: bool = True): """ Connects to the RealSense camera specified in the configuration. @@ -339,13 +341,12 @@ class RealSenseCamera(Camera): if do_warmup_read: logger.debug(f"Reading a warm-up frame for {self.serial_number}...") - self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X secs + self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs logger.info(f"Camera {self.serial_number} connected and configured successfully.") def _validate_fps(self, stream) -> None: """Validates and sets the internal FPS based on actual stream FPS.""" - actual_fps = stream.fps() if self.fps is None: @@ -366,7 +367,6 @@ class RealSenseCamera(Camera): def _validate_width_and_height(self, stream) -> None: """Validates and sets the internal capture width and height based on actual stream width.""" - actual_width = int(round(stream.width())) actual_height = int(round(stream.height())) @@ -475,7 +475,7 @@ class RealSenseCamera(Camera): ret, frame = self.rs_pipeline.try_wait_for_frames( timeout_ms=timeout_ms - ) # NOTE(Steven): This read has a timeout + ) # NOTE(Steven): This read has a timeout while opencv doesn't if not ret or frame is None: raise RuntimeError( @@ -579,8 +579,8 @@ class RealSenseCamera(Camera): self.thread.start() logger.debug(f"Read thread started for {self}.") - # NOTE(Steven): Missing implementation for depth - def async_read(self, timeout_ms: float = 2000) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + # NOTE(Steven): Missing implementation for depth for now + def async_read(self, timeout_ms: float = 2000) -> np.ndarray: """ Reads the latest available frame data (color or color+depth) asynchronously. @@ -594,10 +594,8 @@ class RealSenseCamera(Camera): to become available in the queue. Defaults to 2000ms (2 seconds). Returns: - Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: - The latest captured frame data (color image or tuple of color image - and depth map), processed according to configuration. Format depends - on `self.use_depth`. + np.ndarray: + The latest captured frame data (color image), processed according to configuration. Raises: DeviceNotConnectedError: If the camera is not connected. @@ -628,7 +626,6 @@ class RealSenseCamera(Camera): f"Error getting frame data from queue for camera {self.serial_number}: {e}" ) from e - # NOTE(Steven): There are multiple functions that are the same between realsense and opencv. We should consider moving them to the parent class def _shutdown_read_thread(self): """Signals the background read thread to stop and waits for it to join.""" if self.stop_event is not None: diff --git a/lerobot/common/cameras/intel/configuration_realsense.py b/lerobot/common/cameras/intel/configuration_realsense.py index 2cb1f42d..5d9f8a80 100644 --- a/lerobot/common/cameras/intel/configuration_realsense.py +++ b/lerobot/common/cameras/intel/configuration_realsense.py @@ -20,17 +20,39 @@ from ..configs import CameraConfig, ColorMode, Cv2Rotation @CameraConfig.register_subclass("intelrealsense") @dataclass class RealSenseCameraConfig(CameraConfig): - """ - Example of tested options for Intel Real Sense D405: + """Configuration class for Intel RealSense cameras. + This class provides specialized configuration options for Intel RealSense cameras, + including support for depth sensing and device identification via serial number or name. + + Example configurations for Intel RealSense D405: ```python - RealSenseCameraConfig(128422271347, 30, 640, 480) - RealSenseCameraConfig(128422271347, 60, 640, 480) - RealSenseCameraConfig(128422271347, 90, 640, 480) - RealSenseCameraConfig(128422271347, 30, 1280, 720) - RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True) - RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=90) + # Basic configurations + RealSenseCameraConfig(128422271347, 30, 1280, 720) # 1280x720 @ 30FPS + RealSenseCameraConfig(128422271347, 60, 640, 480) # 640x480 @ 60FPS + + # Advanced configurations + RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True) # With depth sensing + RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation ``` + + Attributes: + fps: Requested frames per second for the color stream. + width: Requested frame width in pixels for the color stream. + height: Requested frame height in pixels for the color stream. + name: Optional human-readable name to identify the camera. + serial_number: Optional unique serial number to identify the camera. + Either name or serial_number must be provided. + color_mode: Color mode for image output (RGB or BGR). Defaults to RGB. + channels: Number of color channels (currently only 3 is supported). + use_depth: Whether to enable depth stream. Defaults to False. + rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation. + + Note: + - Either name or serial_number must be specified, but not both. + - Depth stream configuration (if enabled) will use the same FPS as the color stream. + - The actual resolution and FPS may be adjusted by the camera to the nearest supported mode. + - Only 3-channel color output (RGB/BGR) is currently supported. """ name: str | None = None @@ -38,8 +60,7 @@ class RealSenseCameraConfig(CameraConfig): color_mode: ColorMode = ColorMode.RGB channels: int | None = 3 use_depth: bool = False - # NOTE(Steven): Check how draccus would deal with this str -> enum - rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION + rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION # NOTE(Steven): Check if draccus can parse to an enum def __post_init__(self): if self.color_mode not in (ColorMode.RGB, ColorMode.BGR): diff --git a/lerobot/common/cameras/opencv/__init__.py b/lerobot/common/cameras/opencv/__init__.py index 29ae5175..11d3139f 100644 --- a/lerobot/common/cameras/opencv/__init__.py +++ b/lerobot/common/cameras/opencv/__init__.py @@ -1,2 +1,16 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .camera_opencv import OpenCVCamera from .configuration_opencv import OpenCVCameraConfig diff --git a/lerobot/common/cameras/opencv/camera_opencv.py b/lerobot/common/cameras/opencv/camera_opencv.py index f746bb34..117b8500 100644 --- a/lerobot/common/cameras/opencv/camera_opencv.py +++ b/lerobot/common/cameras/opencv/camera_opencv.py @@ -36,7 +36,7 @@ from ..camera import Camera from ..utils import IndexOrPath, get_cv2_backend, get_cv2_rotation from .configuration_opencv import ColorMode, OpenCVCameraConfig -# The maximum opencv device index depends on your operating system. For instance, +# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance, # if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case # on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23. # When you change the USB port or reboot the computer, the operating system might @@ -129,10 +129,8 @@ class OpenCVCamera(Camera): self.logs: dict = {} # NOTE(Steven): Might be removed in the future self.rotation: int | None = get_cv2_rotation(config.rotation) - self.backend: int = get_cv2_backend() # NOTE(Steven): If I specify backend the opencv open fails + self.backend: int = get_cv2_backend() # NOTE(Steven): If we specify backend the opencv open fails - # NOTE(Steven): What happens if rotation is specified but we leave width and height to None? - # NOTE(Steven): Should we enforce these parameters if rotation is set? if self.height and self.width: if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]: self.prerotated_width, self.prerotated_height = self.height, self.width @@ -208,7 +206,7 @@ class OpenCVCamera(Camera): if do_warmup_read: logger.debug(f"Reading a warm-up frame for {self.index_or_path}...") - self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X secs\ + self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs logger.debug(f"Camera {self.index_or_path} connected and configured successfully.") @@ -239,7 +237,6 @@ class OpenCVCamera(Camera): actual_width = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH))) actual_height = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT))) - # NOTE(Steven): When do we constraint the possibility of only setting one? if self.width is None or self.height is None: if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]: self.width, self.height = actual_height, actual_width diff --git a/lerobot/common/cameras/opencv/configuration_opencv.py b/lerobot/common/cameras/opencv/configuration_opencv.py index ab9fc2f7..5d7c4195 100644 --- a/lerobot/common/cameras/opencv/configuration_opencv.py +++ b/lerobot/common/cameras/opencv/configuration_opencv.py @@ -1,3 +1,17 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from pathlib import Path @@ -7,15 +21,34 @@ from ..configs import CameraConfig, ColorMode, Cv2Rotation @CameraConfig.register_subclass("opencv") @dataclass class OpenCVCameraConfig(CameraConfig): - """ - Example of tested options for Intel Real Sense D405: - #NOTE(Steven): update this doc + """Configuration class for OpenCV-based camera devices or video files. + + This class provides configuration options for cameras accessed through OpenCV, + supporting both physical camera devices and video files. It includes settings + for resolution, frame rate, color mode, and image rotation. + + Example configurations: ```python - OpenCVCameraConfig(0, 30, 640, 480) - OpenCVCameraConfig(0, 60, 640, 480) - OpenCVCameraConfig(0, 90, 640, 480) - OpenCVCameraConfig(0, 30, 1280, 720) + # Basic configurations + OpenCVCameraConfig(0, 30, 1280, 720) # 1280x720 @ 30FPS + OpenCVCameraConfig(/dev/video4, 60, 640, 480) # 640x480 @ 60FPS + + # Advanced configurations + OpenCVCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation ``` + + Attributes: + index_or_path: Either an integer representing the camera device index, + or a Path object pointing to a video file. + fps: Requested frames per second for the color stream. + width: Requested frame width in pixels for the color stream. + height: Requested frame height in pixels for the color stream. + color_mode: Color mode for image output (RGB or BGR). Defaults to RGB. + channels: Number of color channels (currently only 3 is supported). + rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation. + + Note: + - Only 3-channel color output (RGB/BGR) is currently supported. """ index_or_path: int | Path diff --git a/lerobot/find_cameras.py b/lerobot/find_cameras.py index cd3195ca..6227eaba 100644 --- a/lerobot/find_cameras.py +++ b/lerobot/find_cameras.py @@ -20,7 +20,7 @@ import logging import shutil import time from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List import numpy as np from PIL import Image @@ -77,7 +77,7 @@ def find_all_realsense_cameras() -> List[Dict[str, Any]]: return all_realsense_cameras_info -def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dict[str, Any]]: +def find_and_print_cameras(camera_type_filter: str | None = None) -> List[Dict[str, Any]]: """ Finds available cameras based on an optional filter and prints their information. @@ -106,7 +106,7 @@ def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dic else: print("\n--- Detected Cameras ---") for i, cam_info in enumerate(all_cameras_info): - print(f"Camera #{i + 1}:") + print(f"Camera #{i}:") for key, value in cam_info.items(): if key == "default_stream_profile" and isinstance(value, dict): print(f" {key.replace('_', ' ').capitalize()}:") @@ -120,7 +120,7 @@ def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dic def save_image( img_array: np.ndarray, - camera_identifier: Union[str, int], + camera_identifier: str | int, images_dir: Path, camera_type: str, ): @@ -142,7 +142,7 @@ def save_image( logger.error(f"Failed to save image for camera {camera_identifier} (type {camera_type}): {e}") -def initialize_output_directory(output_dir: Union[str, Path]) -> Path: +def initialize_output_directory(output_dir: str | Path) -> Path: """Initialize and clean the output directory.""" output_dir = Path(output_dir) if output_dir.exists(): @@ -153,7 +153,7 @@ def initialize_output_directory(output_dir: Union[str, Path]) -> Path: return output_dir -def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]: +def create_camera_instance(cam_meta: Dict[str, Any]) -> Dict[str, Any] | None: """Create and connect to a camera instance based on metadata.""" cam_type = cam_meta.get("type") cam_id = cam_meta.get("id") @@ -180,7 +180,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]] if instance: logger.info(f"Connecting to {cam_type} camera: {cam_id}...") - instance.connect() + instance.connect(do_warmup_read=False) return {"instance": instance, "meta": cam_meta} except Exception as e: logger.error(f"Failed to connect or configure {cam_type} camera {cam_id}: {e}") @@ -191,7 +191,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]] def process_camera_image( cam_dict: Dict[str, Any], output_dir: Path, current_time: float -) -> Optional[concurrent.futures.Future]: +) -> concurrent.futures.Future | None: """Capture and process an image from a single camera.""" cam = cam_dict["instance"] meta = cam_dict["meta"] @@ -228,9 +228,9 @@ def cleanup_cameras(cameras_to_use: List[Dict[str, Any]]): def save_images_from_all_cameras( - output_dir: Union[str, Path], + output_dir: str | Path, record_time_s: float = 2.0, - camera_type_filter: Optional[str] = None, + camera_type_filter: str | None = None, ): """ Connects to detected cameras (optionally filtered by type) and saves images from each. @@ -249,7 +249,6 @@ def save_images_from_all_cameras( logger.warning("No cameras detected matching the criteria. Cannot save images.") return - # Create and connect to all cameras cameras_to_use = [] for cam_meta in all_camera_metadata: camera_instance = create_camera_instance(cam_meta) @@ -263,7 +262,6 @@ def save_images_from_all_cameras( logger.info(f"Starting image capture for {record_time_s} seconds from {len(cameras_to_use)} cameras.") start_time = time.perf_counter() - # NOTE(Steven): This seems like an overkill to me with concurrent.futures.ThreadPoolExecutor(max_workers=len(cameras_to_use) * 2) as executor: try: while time.perf_counter() - start_time < record_time_s: @@ -287,6 +285,10 @@ def save_images_from_all_cameras( logger.info(f"Image capture finished. Images saved to {output_dir}") +# NOTE(Steven): +# * realsense identified as opencv -> consistent in linux, we can even capture images +# * opencv mac cams reporting different fps at init, not an issue as we don't enforce fps here +# * opencv not opening in linux if we specify a backend if __name__ == "__main__": parser = argparse.ArgumentParser( description="Unified camera utility script for listing cameras and capturing images." @@ -329,8 +331,8 @@ if __name__ == "__main__": capture_parser.add_argument( "--record-time-s", type=float, - default=5.0, - help="Time duration to attempt capturing frames. Default: 0.5 seconds (usually enough for one frame).", + default=6.0, + help="Time duration to attempt capturing frames. Default: 6 seconds.", ) capture_parser.set_defaults( func=lambda args: save_images_from_all_cameras( diff --git a/tests/cameras/test_opencv.py b/tests/cameras/test_opencv.py index 3b141290..44cb9bb8 100644 --- a/tests/cameras/test_opencv.py +++ b/tests/cameras/test_opencv.py @@ -28,7 +28,7 @@ from lerobot.common.cameras.configs import Cv2Rotation from lerobot.common.cameras.opencv import OpenCVCamera, OpenCVCameraConfig from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError -# NOTE(Steven): Consider improving the assert coverage +# NOTE(Steven): more tests + assertions? TEST_ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "artifacts", "cameras") DEFAULT_PNG_FILE_PATH = os.path.join(TEST_ARTIFACTS_DIR, "fakecam_sd_160x120.png") TEST_IMAGE_PATHS = [ diff --git a/tests/cameras/test_realsense.py b/tests/cameras/test_realsense.py index 30e9e932..fb5e5009 100644 --- a/tests/cameras/test_realsense.py +++ b/tests/cameras/test_realsense.py @@ -36,7 +36,9 @@ except (ImportError, ModuleNotFoundError): TEST_ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "artifacts", "cameras") BAG_FILE_PATH = os.path.join(TEST_ARTIFACTS_DIR, "test_rs.bag") - +# NOTE(Steven): Missing tests for depth +# NOTE(Steven): Takes 20sec, the patch being the biggest bottleneck +# NOTE(Steven): more tests + assertions? if not os.path.exists(BAG_FILE_PATH): print(f"Warning: Bag file not found at {BAG_FILE_PATH}. Some tests might fail or be skipped.")