chore(docs): adress notes + add docs in camera code

2025-05-14 15:44:38 +02:00
parent cca647307b
commit 859a369b29
11 changed files with 149 additions and 56 deletions
--- a/lerobot/common/cameras/init.py
+++ b/lerobot/common/cameras/init.py
@@ -1,3 +1,17 @@
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .camera import Camera
 from .configs import CameraConfig
 from .utils import make_cameras_from_configs
--- a/lerobot/common/cameras/camera.py
+++ b/lerobot/common/cameras/camera.py
@@ -21,7 +21,6 @@ import numpy as np
 from .configs import CameraConfig, ColorMode
 # NOTE(Steven): Consider something like configure() if makes sense for both cameras
 class Camera(abc.ABC):
    def __init__(self, config: CameraConfig):
        self.fps: int | None = config.fps
--- a/lerobot/common/cameras/intel/init.py
+++ b/lerobot/common/cameras/intel/init.py
@@ -1,2 +1,16 @@
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .camera_realsense import RealSenseCamera
 from .configuration_realsense import RealSenseCameraConfig
--- a/lerobot/common/cameras/intel/camera_realsense.py
+++ b/lerobot/common/cameras/intel/camera_realsense.py
@@ -22,7 +22,7 @@ import math
 import queue
 import time
 from threading import Event, Thread
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List
 import cv2
 import numpy as np
@@ -49,6 +49,11 @@ class RealSenseCamera(Camera):
    indices, especially on Linux. It also supports capturing depth maps alongside
    color frames.
    Use the provided utility script to find available camera indices and default profiles:
    ```bash
    python -m lerobot.find_cameras
    ```
    A `RealSenseCamera` instance requires a configuration object specifying the
    camera's serial number or a unique device name. If using the name, ensure only
    one camera with that name is connected.
@@ -120,7 +125,7 @@ class RealSenseCamera(Camera):
        self.config = config
-        if config.name is not None:  # TODO(Steven): Do we want to continue supporting this?
+        if config.name is not None:  # NOTE(Steven): Do we want to continue supporting this?
            self.serial_number = self._find_serial_number_from_name(config.name)
        elif config.serial_number is not None:
            self.serial_number = str(config.serial_number)
@@ -143,8 +148,6 @@ class RealSenseCamera(Camera):
        self.rotation: int | None = get_cv2_rotation(config.rotation)
        # NOTE(Steven): What happens if rotation is specified but we leave width and height to None?
        # NOTE(Steven): Should we enforce these parameters if rotation is set?
        if self.height and self.width:
            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
                self.prerotated_width, self.prerotated_height = self.height, self.width
@@ -301,7 +304,6 @@ class RealSenseCamera(Camera):
                self.rs_profile.get_stream(rs.stream.depth).as_video_stream_profile()
            )
    # NOTE(Steven): Add a wamr-up period time config
    def connect(self, do_warmup_read: bool = True):
        """
        Connects to the RealSense camera specified in the configuration.
@@ -339,13 +341,12 @@ class RealSenseCamera(Camera):
        if do_warmup_read:
            logger.debug(f"Reading a warm-up frame for {self.serial_number}...")
-            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X secs
+            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs
        logger.info(f"Camera {self.serial_number} connected and configured successfully.")
    def _validate_fps(self, stream) -> None:
        """Validates and sets the internal FPS based on actual stream FPS."""
        actual_fps = stream.fps()
        if self.fps is None:
@@ -366,7 +367,6 @@ class RealSenseCamera(Camera):
    def _validate_width_and_height(self, stream) -> None:
        """Validates and sets the internal capture width and height based on actual stream width."""
        actual_width = int(round(stream.width()))
        actual_height = int(round(stream.height()))
@@ -475,7 +475,7 @@ class RealSenseCamera(Camera):
        ret, frame = self.rs_pipeline.try_wait_for_frames(
            timeout_ms=timeout_ms
-        )  # NOTE(Steven): This read has a timeout
+        )  # NOTE(Steven): This read has a timeout while opencv doesn't
        if not ret or frame is None:
            raise RuntimeError(
@@ -579,8 +579,8 @@ class RealSenseCamera(Camera):
        self.thread.start()
        logger.debug(f"Read thread started for {self}.")
-    # NOTE(Steven): Missing implementation for depth
+    # NOTE(Steven): Missing implementation for depth for now
-    def async_read(self, timeout_ms: float = 2000) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+    def async_read(self, timeout_ms: float = 2000) -> np.ndarray:
        """
        Reads the latest available frame data (color or color+depth) asynchronously.
@@ -594,10 +594,8 @@ class RealSenseCamera(Camera):
                to become available in the queue. Defaults to 2000ms (2 seconds).
        Returns:
-            Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+            np.ndarray:
-            The latest captured frame data (color image or tuple of color image
+            The latest captured frame data (color image), processed according to configuration.
            and depth map), processed according to configuration. Format depends
            on `self.use_depth`.
        Raises:
            DeviceNotConnectedError: If the camera is not connected.
@@ -628,7 +626,6 @@ class RealSenseCamera(Camera):
                f"Error getting frame data from queue for camera {self.serial_number}: {e}"
            ) from e
    # NOTE(Steven): There are multiple functions that are the same between realsense and opencv. We should consider moving them to the parent class
    def _shutdown_read_thread(self):
        """Signals the background read thread to stop and waits for it to join."""
        if self.stop_event is not None:
--- a/lerobot/common/cameras/intel/configuration_realsense.py
+++ b/lerobot/common/cameras/intel/configuration_realsense.py
@@ -20,17 +20,39 @@ from ..configs import CameraConfig, ColorMode, Cv2Rotation
@CameraConfig.register_subclass("intelrealsense")
@dataclass
 class RealSenseCameraConfig(CameraConfig):
-    """
+    """Configuration class for Intel RealSense cameras.
    Example of tested options for Intel Real Sense D405:
    This class provides specialized configuration options for Intel RealSense cameras,
    including support for depth sensing and device identification via serial number or name.
    Example configurations for Intel RealSense D405:
    ```python
-    RealSenseCameraConfig(128422271347, 30, 640, 480)
+    # Basic configurations
-    RealSenseCameraConfig(128422271347, 60, 640, 480)
+    RealSenseCameraConfig(128422271347, 30, 1280, 720)   # 1280x720 @ 30FPS
-    RealSenseCameraConfig(128422271347, 90, 640, 480)
+    RealSenseCameraConfig(128422271347, 60, 640, 480)   # 640x480 @ 60FPS
-    RealSenseCameraConfig(128422271347, 30, 1280, 720)
+
-    RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)
+    # Advanced configurations
-    RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=90)
+    RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)  # With depth sensing
    RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
    ```
    Attributes:
        fps: Requested frames per second for the color stream.
        width: Requested frame width in pixels for the color stream.
        height: Requested frame height in pixels for the color stream.
        name: Optional human-readable name to identify the camera.
        serial_number: Optional unique serial number to identify the camera.
                      Either name or serial_number must be provided.
        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
        channels: Number of color channels (currently only 3 is supported).
        use_depth: Whether to enable depth stream. Defaults to False.
        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
    Note:
        - Either name or serial_number must be specified, but not both.
        - Depth stream configuration (if enabled) will use the same FPS as the color stream.
        - The actual resolution and FPS may be adjusted by the camera to the nearest supported mode.
        - Only 3-channel color output (RGB/BGR) is currently supported.
    """
    name: str | None = None
@@ -38,8 +60,7 @@ class RealSenseCameraConfig(CameraConfig):
    color_mode: ColorMode = ColorMode.RGB
    channels: int | None = 3
    use_depth: bool = False
-    # NOTE(Steven): Check how draccus would deal with this str -> enum
+    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION  # NOTE(Steven): Check if draccus can parse to an enum
    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
    def __post_init__(self):
        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
--- a/lerobot/common/cameras/opencv/init.py
+++ b/lerobot/common/cameras/opencv/init.py
@@ -1,2 +1,16 @@
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .camera_opencv import OpenCVCamera
 from .configuration_opencv import OpenCVCameraConfig
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -36,7 +36,7 @@ from ..camera import Camera
 from ..utils import IndexOrPath, get_cv2_backend, get_cv2_rotation
 from .configuration_opencv import ColorMode, OpenCVCameraConfig
-# The maximum opencv device index depends on your operating system. For instance,
+# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
 # if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
 # on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
 # When you change the USB port or reboot the computer, the operating system might
@@ -129,10 +129,8 @@ class OpenCVCamera(Camera):
        self.logs: dict = {}  # NOTE(Steven): Might be removed in the future
        self.rotation: int | None = get_cv2_rotation(config.rotation)
-        self.backend: int = get_cv2_backend()  # NOTE(Steven): If I specify backend the opencv open fails
+        self.backend: int = get_cv2_backend()  # NOTE(Steven): If we specify backend the opencv open fails
        # NOTE(Steven): What happens if rotation is specified but we leave width and height to None?
        # NOTE(Steven): Should we enforce these parameters if rotation is set?
        if self.height and self.width:
            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
                self.prerotated_width, self.prerotated_height = self.height, self.width
@@ -208,7 +206,7 @@ class OpenCVCamera(Camera):
        if do_warmup_read:
            logger.debug(f"Reading a warm-up frame for {self.index_or_path}...")
-            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X secs\
+            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs
        logger.debug(f"Camera {self.index_or_path} connected and configured successfully.")
@@ -239,7 +237,6 @@ class OpenCVCamera(Camera):
        actual_width = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH)))
        actual_height = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        # NOTE(Steven): When do we constraint the possibility of only setting one?
        if self.width is None or self.height is None:
            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
                self.width, self.height = actual_height, actual_width
--- a/lerobot/common/cameras/opencv/configuration_opencv.py
+++ b/lerobot/common/cameras/opencv/configuration_opencv.py
@@ -1,3 +1,17 @@
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from dataclasses import dataclass
 from pathlib import Path
@@ -7,15 +21,34 @@ from ..configs import CameraConfig, ColorMode, Cv2Rotation
@CameraConfig.register_subclass("opencv")
@dataclass
 class OpenCVCameraConfig(CameraConfig):
-    """
+    """Configuration class for OpenCV-based camera devices or video files.
-    Example of tested options for Intel Real Sense D405:
+
-    #NOTE(Steven): update this doc
+    This class provides configuration options for cameras accessed through OpenCV,
    supporting both physical camera devices and video files. It includes settings
    for resolution, frame rate, color mode, and image rotation.
    Example configurations:
    ```python
-    OpenCVCameraConfig(0, 30, 640, 480)
+    # Basic configurations
-    OpenCVCameraConfig(0, 60, 640, 480)
+    OpenCVCameraConfig(0, 30, 1280, 720)   # 1280x720 @ 30FPS
-    OpenCVCameraConfig(0, 90, 640, 480)
+    OpenCVCameraConfig(/dev/video4, 60, 640, 480)   # 640x480 @ 60FPS
-    OpenCVCameraConfig(0, 30, 1280, 720)
+
    # Advanced configurations
    OpenCVCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
    ```
    Attributes:
        index_or_path: Either an integer representing the camera device index,
                      or a Path object pointing to a video file.
        fps: Requested frames per second for the color stream.
        width: Requested frame width in pixels for the color stream.
        height: Requested frame height in pixels for the color stream.
        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
        channels: Number of color channels (currently only 3 is supported).
        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
    Note:
        - Only 3-channel color output (RGB/BGR) is currently supported.
    """
    index_or_path: int | Path
--- a/lerobot/find_cameras.py
+++ b/lerobot/find_cameras.py
@@ -20,7 +20,7 @@ import logging
 import shutil
 import time
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List
 import numpy as np
 from PIL import Image
@@ -77,7 +77,7 @@ def find_all_realsense_cameras() -> List[Dict[str, Any]]:
    return all_realsense_cameras_info
-def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dict[str, Any]]:
+def find_and_print_cameras(camera_type_filter: str | None = None) -> List[Dict[str, Any]]:
    """
    Finds available cameras based on an optional filter and prints their information.
@@ -106,7 +106,7 @@ def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dic
    else:
        print("\n--- Detected Cameras ---")
        for i, cam_info in enumerate(all_cameras_info):
-            print(f"Camera #{i + 1}:")
+            print(f"Camera #{i}:")
            for key, value in cam_info.items():
                if key == "default_stream_profile" and isinstance(value, dict):
                    print(f"  {key.replace('_', ' ').capitalize()}:")
@@ -120,7 +120,7 @@ def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dic
 def save_image(
    img_array: np.ndarray,
-    camera_identifier: Union[str, int],
+    camera_identifier: str | int,
    images_dir: Path,
    camera_type: str,
 ):
@@ -142,7 +142,7 @@ def save_image(
        logger.error(f"Failed to save image for camera {camera_identifier} (type {camera_type}): {e}")
-def initialize_output_directory(output_dir: Union[str, Path]) -> Path:
+def initialize_output_directory(output_dir: str | Path) -> Path:
    """Initialize and clean the output directory."""
    output_dir = Path(output_dir)
    if output_dir.exists():
@@ -153,7 +153,7 @@ def initialize_output_directory(output_dir: Union[str, Path]) -> Path:
    return output_dir
-def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+def create_camera_instance(cam_meta: Dict[str, Any]) -> Dict[str, Any] | None:
    """Create and connect to a camera instance based on metadata."""
    cam_type = cam_meta.get("type")
    cam_id = cam_meta.get("id")
@@ -180,7 +180,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]
        if instance:
            logger.info(f"Connecting to {cam_type} camera: {cam_id}...")
-            instance.connect()
+            instance.connect(do_warmup_read=False)
            return {"instance": instance, "meta": cam_meta}
    except Exception as e:
        logger.error(f"Failed to connect or configure {cam_type} camera {cam_id}: {e}")
@@ -191,7 +191,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]
 def process_camera_image(
    cam_dict: Dict[str, Any], output_dir: Path, current_time: float
-) -> Optional[concurrent.futures.Future]:
+) -> concurrent.futures.Future | None:
    """Capture and process an image from a single camera."""
    cam = cam_dict["instance"]
    meta = cam_dict["meta"]
@@ -228,9 +228,9 @@ def cleanup_cameras(cameras_to_use: List[Dict[str, Any]]):
 def save_images_from_all_cameras(
-    output_dir: Union[str, Path],
+    output_dir: str | Path,
    record_time_s: float = 2.0,
-    camera_type_filter: Optional[str] = None,
+    camera_type_filter: str | None = None,
 ):
    """
    Connects to detected cameras (optionally filtered by type) and saves images from each.
@@ -249,7 +249,6 @@ def save_images_from_all_cameras(
        logger.warning("No cameras detected matching the criteria. Cannot save images.")
        return
    # Create and connect to all cameras
    cameras_to_use = []
    for cam_meta in all_camera_metadata:
        camera_instance = create_camera_instance(cam_meta)
@@ -263,7 +262,6 @@ def save_images_from_all_cameras(
    logger.info(f"Starting image capture for {record_time_s} seconds from {len(cameras_to_use)} cameras.")
    start_time = time.perf_counter()
    # NOTE(Steven): This seems like an overkill to me
    with concurrent.futures.ThreadPoolExecutor(max_workers=len(cameras_to_use) * 2) as executor:
        try:
            while time.perf_counter() - start_time < record_time_s:
@@ -287,6 +285,10 @@ def save_images_from_all_cameras(
            logger.info(f"Image capture finished. Images saved to {output_dir}")
 # NOTE(Steven):
 # * realsense identified as opencv -> consistent in linux, we can even capture images
 # * opencv mac cams reporting different fps at init, not an issue as we don't enforce fps here
 # * opencv not opening in linux if we specify a backend
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Unified camera utility script for listing cameras and capturing images."
@@ -329,8 +331,8 @@ if __name__ == "__main__":
    capture_parser.add_argument(
        "--record-time-s",
        type=float,
-        default=5.0,
+        default=6.0,
-        help="Time duration to attempt capturing frames. Default: 0.5 seconds (usually enough for one frame).",
+        help="Time duration to attempt capturing frames. Default: 6 seconds.",
    )
    capture_parser.set_defaults(
        func=lambda args: save_images_from_all_cameras(
--- a/tests/cameras/test_opencv.py
+++ b/tests/cameras/test_opencv.py
@@ -28,7 +28,7 @@ from lerobot.common.cameras.configs import Cv2Rotation
 from lerobot.common.cameras.opencv import OpenCVCamera, OpenCVCameraConfig
 from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-# NOTE(Steven): Consider improving the assert coverage
+# NOTE(Steven): more tests + assertions?
 TEST_ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "artifacts", "cameras")
 DEFAULT_PNG_FILE_PATH = os.path.join(TEST_ARTIFACTS_DIR, "fakecam_sd_160x120.png")
 TEST_IMAGE_PATHS = [
--- a/tests/cameras/test_realsense.py
+++ b/tests/cameras/test_realsense.py
@@ -36,7 +36,9 @@ except (ImportError, ModuleNotFoundError):
 TEST_ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "artifacts", "cameras")
 BAG_FILE_PATH = os.path.join(TEST_ARTIFACTS_DIR, "test_rs.bag")
-
+# NOTE(Steven): Missing tests for depth
 # NOTE(Steven): Takes 20sec, the patch being the biggest bottleneck
 # NOTE(Steven): more tests + assertions?
 if not os.path.exists(BAG_FILE_PATH):
    print(f"Warning: Bag file not found at {BAG_FILE_PATH}. Some tests might fail or be skipped.")