chore(docs): adress notes + add docs in camera code

2025-05-14 15:44:38 +02:00
parent cca647307b
commit 859a369b29
11 changed files with 149 additions and 56 deletions
--- a/lerobot/common/cameras/init.py
+++ b/lerobot/common/cameras/init.py
@@ -1,3 +1,17 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .camera import Camera
 from .configs import CameraConfig
 from .utils import make_cameras_from_configs
--- a/lerobot/common/cameras/camera.py
+++ b/lerobot/common/cameras/camera.py
@@ -21,7 +21,6 @@ import numpy as np
 from .configs import CameraConfig, ColorMode


-# NOTE(Steven): Consider something like configure() if makes sense for both cameras
 class Camera(abc.ABC):
    def __init__(self, config: CameraConfig):
        self.fps: int | None = config.fps
--- a/lerobot/common/cameras/intel/init.py
+++ b/lerobot/common/cameras/intel/init.py
@@ -1,2 +1,16 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .camera_realsense import RealSenseCamera
 from .configuration_realsense import RealSenseCameraConfig
--- a/lerobot/common/cameras/intel/camera_realsense.py
+++ b/lerobot/common/cameras/intel/camera_realsense.py
@@ -22,7 +22,7 @@ import math
 import queue
 import time
 from threading import Event, Thread
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List

 import cv2
 import numpy as np
@@ -49,6 +49,11 @@ class RealSenseCamera(Camera):
    indices, especially on Linux. It also supports capturing depth maps alongside
    color frames.

+    Use the provided utility script to find available camera indices and default profiles:
+    ```bash
+    python -m lerobot.find_cameras
+    ```
+
    A `RealSenseCamera` instance requires a configuration object specifying the
    camera's serial number or a unique device name. If using the name, ensure only
    one camera with that name is connected.
@@ -120,7 +125,7 @@ class RealSenseCamera(Camera):

        self.config = config

-        if config.name is not None:  # TODO(Steven): Do we want to continue supporting this?
+        if config.name is not None:  # NOTE(Steven): Do we want to continue supporting this?
            self.serial_number = self._find_serial_number_from_name(config.name)
        elif config.serial_number is not None:
            self.serial_number = str(config.serial_number)
@@ -143,8 +148,6 @@ class RealSenseCamera(Camera):

        self.rotation: int | None = get_cv2_rotation(config.rotation)

-        # NOTE(Steven): What happens if rotation is specified but we leave width and height to None?
-        # NOTE(Steven): Should we enforce these parameters if rotation is set?
        if self.height and self.width:
            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
                self.prerotated_width, self.prerotated_height = self.height, self.width
@@ -301,7 +304,6 @@ class RealSenseCamera(Camera):
                self.rs_profile.get_stream(rs.stream.depth).as_video_stream_profile()
            )

-    # NOTE(Steven): Add a wamr-up period time config
    def connect(self, do_warmup_read: bool = True):
        """
        Connects to the RealSense camera specified in the configuration.
@@ -339,13 +341,12 @@ class RealSenseCamera(Camera):

        if do_warmup_read:
            logger.debug(f"Reading a warm-up frame for {self.serial_number}...")
-            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X secs
+            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs

        logger.info(f"Camera {self.serial_number} connected and configured successfully.")

    def _validate_fps(self, stream) -> None:
        """Validates and sets the internal FPS based on actual stream FPS."""
-
        actual_fps = stream.fps()

        if self.fps is None:
@@ -366,7 +367,6 @@ class RealSenseCamera(Camera):

    def _validate_width_and_height(self, stream) -> None:
        """Validates and sets the internal capture width and height based on actual stream width."""
-
        actual_width = int(round(stream.width()))
        actual_height = int(round(stream.height()))

@@ -475,7 +475,7 @@ class RealSenseCamera(Camera):

        ret, frame = self.rs_pipeline.try_wait_for_frames(
            timeout_ms=timeout_ms
-        )  # NOTE(Steven): This read has a timeout
+        )  # NOTE(Steven): This read has a timeout while opencv doesn't

        if not ret or frame is None:
            raise RuntimeError(
@@ -579,8 +579,8 @@ class RealSenseCamera(Camera):
        self.thread.start()
        logger.debug(f"Read thread started for {self}.")

-    # NOTE(Steven): Missing implementation for depth
-    def async_read(self, timeout_ms: float = 2000) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+    # NOTE(Steven): Missing implementation for depth for now
+    def async_read(self, timeout_ms: float = 2000) -> np.ndarray:
        """
        Reads the latest available frame data (color or color+depth) asynchronously.

@@ -594,10 +594,8 @@ class RealSenseCamera(Camera):
                to become available in the queue. Defaults to 2000ms (2 seconds).

        Returns:
-            Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
-            The latest captured frame data (color image or tuple of color image
-            and depth map), processed according to configuration. Format depends
-            on `self.use_depth`.
+            np.ndarray:
+            The latest captured frame data (color image), processed according to configuration.

        Raises:
            DeviceNotConnectedError: If the camera is not connected.
@@ -628,7 +626,6 @@ class RealSenseCamera(Camera):
                f"Error getting frame data from queue for camera {self.serial_number}: {e}"
            ) from e

-    # NOTE(Steven): There are multiple functions that are the same between realsense and opencv. We should consider moving them to the parent class
    def _shutdown_read_thread(self):
        """Signals the background read thread to stop and waits for it to join."""
        if self.stop_event is not None:
--- a/lerobot/common/cameras/intel/configuration_realsense.py
+++ b/lerobot/common/cameras/intel/configuration_realsense.py
@@ -20,17 +20,39 @@ from ..configs import CameraConfig, ColorMode, Cv2Rotation
@CameraConfig.register_subclass("intelrealsense")
@dataclass
 class RealSenseCameraConfig(CameraConfig):
-    """
-    Example of tested options for Intel Real Sense D405:
+    """Configuration class for Intel RealSense cameras.

+    This class provides specialized configuration options for Intel RealSense cameras,
+    including support for depth sensing and device identification via serial number or name.
+
+    Example configurations for Intel RealSense D405:
    ```python
-    RealSenseCameraConfig(128422271347, 30, 640, 480)
-    RealSenseCameraConfig(128422271347, 60, 640, 480)
-    RealSenseCameraConfig(128422271347, 90, 640, 480)
-    RealSenseCameraConfig(128422271347, 30, 1280, 720)
-    RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)
-    RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=90)
+    # Basic configurations
+    RealSenseCameraConfig(128422271347, 30, 1280, 720)   # 1280x720 @ 30FPS
+    RealSenseCameraConfig(128422271347, 60, 640, 480)   # 640x480 @ 60FPS
+
+    # Advanced configurations
+    RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)  # With depth sensing
+    RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
    ```
+
+    Attributes:
+        fps: Requested frames per second for the color stream.
+        width: Requested frame width in pixels for the color stream.
+        height: Requested frame height in pixels for the color stream.
+        name: Optional human-readable name to identify the camera.
+        serial_number: Optional unique serial number to identify the camera.
+                      Either name or serial_number must be provided.
+        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
+        channels: Number of color channels (currently only 3 is supported).
+        use_depth: Whether to enable depth stream. Defaults to False.
+        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
+
+    Note:
+        - Either name or serial_number must be specified, but not both.
+        - Depth stream configuration (if enabled) will use the same FPS as the color stream.
+        - The actual resolution and FPS may be adjusted by the camera to the nearest supported mode.
+        - Only 3-channel color output (RGB/BGR) is currently supported.
    """

    name: str | None = None
@@ -38,8 +60,7 @@ class RealSenseCameraConfig(CameraConfig):
    color_mode: ColorMode = ColorMode.RGB
    channels: int | None = 3
    use_depth: bool = False
-    # NOTE(Steven): Check how draccus would deal with this str -> enum
-    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
+    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION  # NOTE(Steven): Check if draccus can parse to an enum

    def __post_init__(self):
        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
--- a/lerobot/common/cameras/opencv/init.py
+++ b/lerobot/common/cameras/opencv/init.py
@@ -1,2 +1,16 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .camera_opencv import OpenCVCamera
 from .configuration_opencv import OpenCVCameraConfig
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -36,7 +36,7 @@ from ..camera import Camera
 from ..utils import IndexOrPath, get_cv2_backend, get_cv2_rotation
 from .configuration_opencv import ColorMode, OpenCVCameraConfig

-# The maximum opencv device index depends on your operating system. For instance,
+# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
 # if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
 # on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
 # When you change the USB port or reboot the computer, the operating system might
@@ -129,10 +129,8 @@ class OpenCVCamera(Camera):
        self.logs: dict = {}  # NOTE(Steven): Might be removed in the future

        self.rotation: int | None = get_cv2_rotation(config.rotation)
-        self.backend: int = get_cv2_backend()  # NOTE(Steven): If I specify backend the opencv open fails
+        self.backend: int = get_cv2_backend()  # NOTE(Steven): If we specify backend the opencv open fails

-        # NOTE(Steven): What happens if rotation is specified but we leave width and height to None?
-        # NOTE(Steven): Should we enforce these parameters if rotation is set?
        if self.height and self.width:
            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
                self.prerotated_width, self.prerotated_height = self.height, self.width
@@ -208,7 +206,7 @@ class OpenCVCamera(Camera):

        if do_warmup_read:
            logger.debug(f"Reading a warm-up frame for {self.index_or_path}...")
-            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X secs\
+            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs

        logger.debug(f"Camera {self.index_or_path} connected and configured successfully.")

@@ -239,7 +237,6 @@ class OpenCVCamera(Camera):
        actual_width = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH)))
        actual_height = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))

-        # NOTE(Steven): When do we constraint the possibility of only setting one?
        if self.width is None or self.height is None:
            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
                self.width, self.height = actual_height, actual_width
--- a/lerobot/common/cameras/opencv/configuration_opencv.py
+++ b/lerobot/common/cameras/opencv/configuration_opencv.py
@@ -1,3 +1,17 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass
 from pathlib import Path

@@ -7,15 +21,34 @@ from ..configs import CameraConfig, ColorMode, Cv2Rotation
@CameraConfig.register_subclass("opencv")
@dataclass
 class OpenCVCameraConfig(CameraConfig):
-    """
-    Example of tested options for Intel Real Sense D405:
-    #NOTE(Steven): update this doc
+    """Configuration class for OpenCV-based camera devices or video files.
+
+    This class provides configuration options for cameras accessed through OpenCV,
+    supporting both physical camera devices and video files. It includes settings
+    for resolution, frame rate, color mode, and image rotation.
+
+    Example configurations:
    ```python
-    OpenCVCameraConfig(0, 30, 640, 480)
-    OpenCVCameraConfig(0, 60, 640, 480)
-    OpenCVCameraConfig(0, 90, 640, 480)
-    OpenCVCameraConfig(0, 30, 1280, 720)
+    # Basic configurations
+    OpenCVCameraConfig(0, 30, 1280, 720)   # 1280x720 @ 30FPS
+    OpenCVCameraConfig(/dev/video4, 60, 640, 480)   # 640x480 @ 60FPS
+
+    # Advanced configurations
+    OpenCVCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
    ```
+
+    Attributes:
+        index_or_path: Either an integer representing the camera device index,
+                      or a Path object pointing to a video file.
+        fps: Requested frames per second for the color stream.
+        width: Requested frame width in pixels for the color stream.
+        height: Requested frame height in pixels for the color stream.
+        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
+        channels: Number of color channels (currently only 3 is supported).
+        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
+
+    Note:
+        - Only 3-channel color output (RGB/BGR) is currently supported.
    """

    index_or_path: int | Path
--- a/lerobot/find_cameras.py
+++ b/lerobot/find_cameras.py
@@ -20,7 +20,7 @@ import logging
 import shutil
 import time
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List

 import numpy as np
 from PIL import Image
@@ -77,7 +77,7 @@ def find_all_realsense_cameras() -> List[Dict[str, Any]]:
    return all_realsense_cameras_info


-def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dict[str, Any]]:
+def find_and_print_cameras(camera_type_filter: str | None = None) -> List[Dict[str, Any]]:
    """
    Finds available cameras based on an optional filter and prints their information.

@@ -106,7 +106,7 @@ def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dic
    else:
        print("\n--- Detected Cameras ---")
        for i, cam_info in enumerate(all_cameras_info):
-            print(f"Camera #{i + 1}:")
+            print(f"Camera #{i}:")
            for key, value in cam_info.items():
                if key == "default_stream_profile" and isinstance(value, dict):
                    print(f"  {key.replace('_', ' ').capitalize()}:")
@@ -120,7 +120,7 @@ def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dic

 def save_image(
    img_array: np.ndarray,
-    camera_identifier: Union[str, int],
+    camera_identifier: str | int,
    images_dir: Path,
    camera_type: str,
 ):
@@ -142,7 +142,7 @@ def save_image(
        logger.error(f"Failed to save image for camera {camera_identifier} (type {camera_type}): {e}")


-def initialize_output_directory(output_dir: Union[str, Path]) -> Path:
+def initialize_output_directory(output_dir: str | Path) -> Path:
    """Initialize and clean the output directory."""
    output_dir = Path(output_dir)
    if output_dir.exists():
@@ -153,7 +153,7 @@ def initialize_output_directory(output_dir: Union[str, Path]) -> Path:
    return output_dir


-def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+def create_camera_instance(cam_meta: Dict[str, Any]) -> Dict[str, Any] | None:
    """Create and connect to a camera instance based on metadata."""
    cam_type = cam_meta.get("type")
    cam_id = cam_meta.get("id")
@@ -180,7 +180,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]

        if instance:
            logger.info(f"Connecting to {cam_type} camera: {cam_id}...")
-            instance.connect()
+            instance.connect(do_warmup_read=False)
            return {"instance": instance, "meta": cam_meta}
    except Exception as e:
        logger.error(f"Failed to connect or configure {cam_type} camera {cam_id}: {e}")
@@ -191,7 +191,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]

 def process_camera_image(
    cam_dict: Dict[str, Any], output_dir: Path, current_time: float
-) -> Optional[concurrent.futures.Future]:
+) -> concurrent.futures.Future | None:
    """Capture and process an image from a single camera."""
    cam = cam_dict["instance"]
    meta = cam_dict["meta"]
@@ -228,9 +228,9 @@ def cleanup_cameras(cameras_to_use: List[Dict[str, Any]]):


 def save_images_from_all_cameras(
-    output_dir: Union[str, Path],
+    output_dir: str | Path,
    record_time_s: float = 2.0,
-    camera_type_filter: Optional[str] = None,
+    camera_type_filter: str | None = None,
 ):
    """
    Connects to detected cameras (optionally filtered by type) and saves images from each.
@@ -249,7 +249,6 @@ def save_images_from_all_cameras(
        logger.warning("No cameras detected matching the criteria. Cannot save images.")
        return

-    # Create and connect to all cameras
    cameras_to_use = []
    for cam_meta in all_camera_metadata:
        camera_instance = create_camera_instance(cam_meta)
@@ -263,7 +262,6 @@ def save_images_from_all_cameras(
    logger.info(f"Starting image capture for {record_time_s} seconds from {len(cameras_to_use)} cameras.")
    start_time = time.perf_counter()

-    # NOTE(Steven): This seems like an overkill to me
    with concurrent.futures.ThreadPoolExecutor(max_workers=len(cameras_to_use) * 2) as executor:
        try:
            while time.perf_counter() - start_time < record_time_s:
@@ -287,6 +285,10 @@ def save_images_from_all_cameras(
            logger.info(f"Image capture finished. Images saved to {output_dir}")


+# NOTE(Steven):
+# * realsense identified as opencv -> consistent in linux, we can even capture images
+# * opencv mac cams reporting different fps at init, not an issue as we don't enforce fps here
+# * opencv not opening in linux if we specify a backend
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Unified camera utility script for listing cameras and capturing images."
@@ -329,8 +331,8 @@ if __name__ == "__main__":
    capture_parser.add_argument(
        "--record-time-s",
        type=float,
-        default=5.0,
-        help="Time duration to attempt capturing frames. Default: 0.5 seconds (usually enough for one frame).",
+        default=6.0,
+        help="Time duration to attempt capturing frames. Default: 6 seconds.",
    )
    capture_parser.set_defaults(
        func=lambda args: save_images_from_all_cameras(
--- a/tests/cameras/test_opencv.py
+++ b/tests/cameras/test_opencv.py
@@ -28,7 +28,7 @@ from lerobot.common.cameras.configs import Cv2Rotation
 from lerobot.common.cameras.opencv import OpenCVCamera, OpenCVCameraConfig
 from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError

-# NOTE(Steven): Consider improving the assert coverage
+# NOTE(Steven): more tests + assertions?
 TEST_ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "artifacts", "cameras")
 DEFAULT_PNG_FILE_PATH = os.path.join(TEST_ARTIFACTS_DIR, "fakecam_sd_160x120.png")
 TEST_IMAGE_PATHS = [
--- a/tests/cameras/test_realsense.py
+++ b/tests/cameras/test_realsense.py
@@ -36,7 +36,9 @@ except (ImportError, ModuleNotFoundError):
 TEST_ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "artifacts", "cameras")
 BAG_FILE_PATH = os.path.join(TEST_ARTIFACTS_DIR, "test_rs.bag")

-
+# NOTE(Steven): Missing tests for depth
+# NOTE(Steven): Takes 20sec, the patch being the biggest bottleneck
+# NOTE(Steven): more tests + assertions?
 if not os.path.exists(BAG_FILE_PATH):
    print(f"Warning: Bag file not found at {BAG_FILE_PATH}. Some tests might fail or be skipped.")