chore(cameras): address review comments + make test pass again

2025-05-20 14:42:05 +02:00
parent 3a08eddeab
commit 41179d0996
13 changed files with 150 additions and 127 deletions
--- a/lerobot/common/cameras/camera.py
+++ b/lerobot/common/cameras/camera.py
@@ -22,7 +22,35 @@ from .configs import CameraConfig, ColorMode


 class Camera(abc.ABC):
+    """Base class for camera implementations.
+
+    Defines a standard interface for camera operations across different backends.
+    Subclasses must implement all abstract methods.
+
+    Manages basic camera properties (FPS, resolution) and core operations:
+    - Connection/disconnection
+    - Frame capture (sync/async)
+
+    Attributes:
+        fps (int | None): Configured frames per second
+        width (int | None): Frame width in pixels
+        height (int | None): Frame height in pixels
+
+    Example:
+        class MyCamera(Camera):
+            def __init__(self, config): ...
+            @property
+            def is_connected(self) -> bool: ...
+            def connect(self, warmup=True): ...
+            # Plus other required methods
+    """
+
    def __init__(self, config: CameraConfig):
+        """Initialize the camera with the given configuration.
+
+        Args:
+            config: Camera configuration containing FPS and resolution.
+        """
        self.fps: int | None = config.fps
        self.width: int | None = config.width
        self.height: int | None = config.height
@@ -30,20 +58,50 @@ class Camera(abc.ABC):
    @property
    @abc.abstractmethod
    def is_connected(self) -> bool:
+        """Check if the camera is currently connected.
+
+        Returns:
+            bool: True if the camera is connected and ready to capture frames,
+                  False otherwise.
+        """
        pass

    @abc.abstractmethod
    def connect(self, warmup: bool = True) -> None:
+        """Establish connection to the camera.
+
+        Args:
+            warmup: If True (default), captures a warmup frame before returning.
+        """
        pass

    @abc.abstractmethod
    def read(self, color_mode: ColorMode | None = None) -> np.ndarray:
+        """Capture and return a single frame from the camera.
+
+        Args:
+            color_mode: Desired color mode for the output frame. If None,
+                        uses the camera's default color mode.
+
+        Returns:
+            np.ndarray: Captured frame as a numpy array.
+        """
        pass

    @abc.abstractmethod
    def async_read(self, timeout_ms: float = ...) -> np.ndarray:
+        """Asynchronously capture and return a single frame from the camera.
+
+        Args:
+            timeout_ms: Maximum time to wait for a frame in milliseconds.
+                        Defaults to implementation-specific timeout.
+
+        Returns:
+            np.ndarray: Captured frame as a numpy array.
+        """
        pass

    @abc.abstractmethod
    def disconnect(self) -> None:
+        """Disconnect from the camera and release resources."""
        pass
--- a/lerobot/common/cameras/intel/camera_realsense.py
+++ b/lerobot/common/cameras/intel/camera_realsense.py
@@ -29,7 +29,6 @@ import numpy as np
 import pyrealsense2 as rs

 from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-from lerobot.common.utils.utils import capture_timestamp_utc

 from ..camera import Camera
 from ..configs import ColorMode
@@ -131,7 +130,6 @@ class RealSenseCamera(Camera):
            raise ValueError("RealSenseCameraConfig must provide either 'serial_number' or 'name'.")

        self.fps = config.fps
-        self.channels = config.channels
        self.color_mode = config.color_mode
        self.use_depth = config.use_depth

@@ -213,7 +211,7 @@ class RealSenseCamera(Camera):

    def _find_serial_number_from_name(self, name: str) -> str:
        """Finds the serial number for a given unique camera name."""
-        camera_infos = self.find_cameras(raise_when_empty=True)
+        camera_infos = self.find_cameras()
        found_devices = [cam for cam in camera_infos if str(cam["name"]) == name]

        if not found_devices:
@@ -304,7 +302,7 @@ class RealSenseCamera(Camera):
            raise DeviceAlreadyConnectedError(f"{self} is already connected.")

        self.rs_pipeline = rs.pipeline()
-        rs_config = self._configure_realsense_settings()
+        rs_config = self._make_rs_pipeline_config()

        try:
            self.rs_profile = self.rs_pipeline.start(rs_config)
@@ -313,7 +311,7 @@ class RealSenseCamera(Camera):
            self.rs_profile = None
            self.rs_pipeline = None
            raise ConnectionError(
-                f"Failed to open {self} camera. Run 'python -m find_cameras list-cameras' for details."
+                f"Failed to open {self} camera. Run 'python -m find_cameras' for details about the available cameras in your system."
            ) from e

        logger.debug(f"Validating stream configuration for {self}...")
@@ -416,7 +414,6 @@ class RealSenseCamera(Camera):
        read_duration_ms = (time.perf_counter() - start_time) * 1e3
        logger.debug(f"{self} synchronous read took: {read_duration_ms:.1f}ms")

-        self.logs["timestamp_utc"] = capture_timestamp_utc()
        return depth_map_processed

    def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 100) -> np.ndarray:
@@ -461,7 +458,6 @@ class RealSenseCamera(Camera):
        read_duration_ms = (time.perf_counter() - start_time) * 1e3
        logger.debug(f"{self} synchronous read took: {read_duration_ms:.1f}ms")

-        self.logs["timestamp_utc"] = capture_timestamp_utc()
        return color_image_processed

    def _postprocess_image(
@@ -492,11 +488,7 @@ class RealSenseCamera(Camera):
        if depth_frame:
            h, w = image.shape
        else:
-            h, w, c = image.shape
-            if c != self.channels:
-                logger.warning(
-                    f"Captured frame channels ({c}) do not match configured channels ({self.channels}) for {self}."
-                )
+            h, w, _c = image.shape

        if h != self.prerotated_height or w != self.prerotated_width:
            raise RuntimeError(
@@ -580,7 +572,7 @@ class RealSenseCamera(Camera):
            raise DeviceNotConnectedError(f"{self} is not connected.")

        if self.thread is None or not self.thread.is_alive():
-            self._ensure_read_thread_running()
+            self._start_read_thread()

        try:
            return self.frame_queue.get(timeout=timeout_ms / 1000.0)
@@ -600,7 +592,7 @@ class RealSenseCamera(Camera):
                f"Error getting frame data from queue for camera {self.serial_number}: {e}"
            ) from e

-    def _shutdown_read_thread(self):
+    def _stop_read_thread(self):
        """Signals the background read thread to stop and waits for it to join."""
        if self.stop_event is not None:
            logger.debug(f"Signaling stop event for read thread of {self}.")
@@ -633,7 +625,7 @@ class RealSenseCamera(Camera):
            )

        if self.thread is not None:
-            self._shutdown_read_thread()
+            self._stop_read_thread()

        if self.rs_pipeline is not None:
            logger.debug(f"Stopping RealSense pipeline object for {self}.")
--- a/lerobot/common/cameras/intel/configuration_realsense.py
+++ b/lerobot/common/cameras/intel/configuration_realsense.py
@@ -44,7 +44,6 @@ class RealSenseCameraConfig(CameraConfig):
        serial_number: Optional unique serial number to identify the camera.
                      Either name or serial_number must be provided.
        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
-        channels: Number of color channels (currently only 3 is supported).
        use_depth: Whether to enable depth stream. Defaults to False.
        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.

@@ -58,7 +57,6 @@ class RealSenseCameraConfig(CameraConfig):
    name: str | None = None
    serial_number: int | None = None
    color_mode: ColorMode = ColorMode.RGB
-    channels: int | None = 3
    use_depth: bool = False
    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION  # NOTE(Steven): Check if draccus can parse to an enum

@@ -78,9 +76,6 @@ class RealSenseCameraConfig(CameraConfig):
                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
            )

-        if self.channels != 3:
-            raise NotImplementedError(f"Unsupported number of channels: {self.channels}")
-
        if bool(self.name) and bool(self.serial_number):
            raise ValueError(
                f"One of them must be set: name or serial_number, but {self.name=} and {self.serial_number=} provided."
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -30,7 +30,6 @@ import cv2
 import numpy as np

 from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-from lerobot.common.utils.utils import capture_timestamp_utc

 from ..camera import Camera
 from ..utils import get_cv2_backend, get_cv2_rotation
@@ -117,7 +116,6 @@ class OpenCVCamera(Camera):
        self.index_or_path = config.index_or_path

        self.fps = config.fps
-        self.channels = config.channels
        self.color_mode = config.color_mode

        self.videocapture: cv2.VideoCapture | None = None
@@ -141,7 +139,7 @@ class OpenCVCamera(Camera):
    @property
    def is_connected(self) -> bool:
        """Checks if the camera is currently connected and opened."""
-        return isinstance(self.videocapture_camera, cv2.VideoCapture) and self.videocapture_camera.isOpened()
+        return isinstance(self.videocapture, cv2.VideoCapture) and self.videocapture.isOpened()

    def _configure_capture_settings(self) -> None:
        """
@@ -187,19 +185,19 @@ class OpenCVCamera(Camera):
        # blocking in multi-threaded applications, especially during data collection.
        cv2.setNumThreads(1)

-        self.videocapture_camera = cv2.VideoCapture(self.index_or_path)
+        self.videocapture = cv2.VideoCapture(self.index_or_path)

-        if not self.videocapture_camera.isOpened():
-            self.videocapture_camera.release()
-            self.videocapture_camera = None
+        if not self.videocapture.isOpened():
+            self.videocapture.release()
+            self.videocapture = None
            raise ConnectionError(
                f"Failed to open OpenCV camera {self.index_or_path}."
-                f"Run 'python -m find_cameras list-cameras' for details."
+                f"Run 'python -m find_cameras Run 'python -m find_cameras' for details about the available cameras in your system."
            )

        self._configure_capture_settings()

-        if do_warmup_read:
+        if warmup:
            logger.debug(f"Reading a warm-up frame for {self.index_or_path}...")
            self.read()  # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs

@@ -209,12 +207,12 @@ class OpenCVCamera(Camera):
        """Validates and sets the camera's frames per second (FPS)."""

        if self.fps is None:
-            self.fps = self.videocapture_camera.get(cv2.CAP_PROP_FPS)
+            self.fps = self.videocapture.get(cv2.CAP_PROP_FPS)
            logger.info(f"FPS set to camera default: {self.fps}.")
            return

-        success = self.videocapture_camera.set(cv2.CAP_PROP_FPS, float(self.fps))
-        actual_fps = self.videocapture_camera.get(cv2.CAP_PROP_FPS)
+        success = self.videocapture.set(cv2.CAP_PROP_FPS, float(self.fps))
+        actual_fps = self.videocapture.get(cv2.CAP_PROP_FPS)
        # Use math.isclose for robust float comparison
        if not success or not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
            logger.warning(
@@ -229,8 +227,8 @@ class OpenCVCamera(Camera):
    def _validate_width_and_height(self) -> None:
        """Validates and sets the camera's frame capture width and height."""

-        default_width = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH)))
-        default_height = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+        default_width = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)))
+        default_height = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))

        if self.width is None or self.height is None:
            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
@@ -243,8 +241,8 @@ class OpenCVCamera(Camera):
            logger.info(f"Capture height set to camera default: {self.height}.")
            return

-        success = self.videocapture_camera.set(cv2.CAP_PROP_FRAME_WIDTH, float(self.prerotated_width))
-        actual_width = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH)))
+        success = self.videocapture.set(cv2.CAP_PROP_FRAME_WIDTH, float(self.prerotated_width))
+        actual_width = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)))
        if not success or self.prerotated_width != actual_width:
            logger.warning(
                f"Requested capture width {self.prerotated_width} for {self}, but camera reported {actual_width} (set success: {success})."
@@ -254,8 +252,8 @@ class OpenCVCamera(Camera):
            )
        logger.debug(f"Capture width set to {actual_width} for {self}.")

-        success = self.videocapture_camera.set(cv2.CAP_PROP_FRAME_HEIGHT, float(self.prerotated_height))
-        actual_height = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+        success = self.videocapture.set(cv2.CAP_PROP_FRAME_HEIGHT, float(self.prerotated_height))
+        actual_height = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        if not success or self.prerotated_height != actual_height:
            logger.warning(
                f"Requested capture height {self.prerotated_height} for {self}, but camera reported {actual_height} (set success: {success})."
@@ -275,7 +273,6 @@ class OpenCVCamera(Camera):

        Args:
            max_index_search_range (int): The maximum index to check on non-Linux systems.
-            raise_when_empty (bool): If True, raises an OSError if no cameras are found.

        Returns:
            List[Dict[str, Any]]: A list of dictionaries,
@@ -321,8 +318,6 @@ class OpenCVCamera(Camera):

        if not found_cameras_info:
            logger.warning("No OpenCV devices detected.")
-            if raise_when_empty:
-                raise OSError("No OpenCV devices detected. Ensure cameras are connected.")

        logger.info(f"Detected OpenCV cameras: {[cam['id'] for cam in found_cameras_info]}")
        return found_cameras_info
@@ -356,7 +351,7 @@ class OpenCVCamera(Camera):
        start_time = time.perf_counter()

        # NOTE(Steven): Are we okay with this blocking an undefined amount of time?
-        ret, frame = self.videocapture_camera.read()
+        ret, frame = self.videocapture.read()

        if not ret or frame is None:
            raise RuntimeError(
@@ -369,7 +364,6 @@ class OpenCVCamera(Camera):
        read_duration_ms = (time.perf_counter() - start_time) * 1e3
        logger.debug(f"{self} synchronous read took: {read_duration_ms:.1f}ms")

-        self.logs["timestamp_utc"] = capture_timestamp_utc()
        return processed_frame

    def _postprocess_image(self, image: np.ndarray, color_mode: ColorMode | None = None) -> np.ndarray:
@@ -402,10 +396,6 @@ class OpenCVCamera(Camera):
            raise RuntimeError(
                f"Captured frame dimensions ({h}x{w}) do not match configured capture dimensions ({self.prerotated_height}x{self.prerotated_width}) for {self}."
            )
-        if c != self.channels:
-            logger.warning(
-                f"Captured frame channels ({c}) do not match configured channels ({self.channels}) for {self}."
-            )

        processed_image = image
        if requested_color_mode == ColorMode.RGB:
@@ -485,7 +475,7 @@ class OpenCVCamera(Camera):
            raise DeviceNotConnectedError(f"{self} is not connected.")

        if self.thread is None or not self.thread.is_alive():
-            self._ensure_read_thread_running()
+            self._start_read_thread()

        try:
            return self.frame_queue.get(timeout=timeout_ms / 1000.0)
@@ -534,10 +524,10 @@ class OpenCVCamera(Camera):
            raise DeviceNotConnectedError(f"{self} not connected.")

        if self.thread is not None:
-            self._shutdown_read_thread()
+            self._stop_read_thread()

-        if self.videocapture_camera is not None:
-            self.videocapture_camera.release()
-            self.videocapture_camera = None
+        if self.videocapture is not None:
+            self.videocapture.release()
+            self.videocapture = None

        logger.info(f"{self} disconnected.")
--- a/lerobot/common/cameras/opencv/configuration_opencv.py
+++ b/lerobot/common/cameras/opencv/configuration_opencv.py
@@ -44,7 +44,6 @@ class OpenCVCameraConfig(CameraConfig):
        width: Requested frame width in pixels for the color stream.
        height: Requested frame height in pixels for the color stream.
        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
-        channels: Number of color channels (currently only 3 is supported).
        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.

    Note:
@@ -70,6 +69,3 @@ class OpenCVCameraConfig(CameraConfig):
            raise ValueError(
                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
            )
-
-        if self.channels != 3:
-            raise NotImplementedError(f"Unsupported number of channels: {self.channels}")
--- a/lerobot/find_cameras.py
+++ b/lerobot/find_cameras.py
@@ -14,6 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+"""
+Helper to find the camera devices available in your system.
+
+Example:
+
+```shell
+python -m lerobot.find_cameras
+```
+"""
+
 import argparse
 import concurrent.futures
 import logging
@@ -44,7 +54,7 @@ def find_all_opencv_cameras() -> List[Dict[str, Any]]:
    all_opencv_cameras_info: List[Dict[str, Any]] = []
    logger.info("Searching for OpenCV cameras...")
    try:
-        opencv_cameras = OpenCVCamera.find_cameras(raise_when_empty=False)
+        opencv_cameras = OpenCVCamera.find_cameras()
        for cam_info in opencv_cameras:
            all_opencv_cameras_info.append(cam_info)
        logger.info(f"Found {len(opencv_cameras)} OpenCV cameras.")
@@ -64,7 +74,7 @@ def find_all_realsense_cameras() -> List[Dict[str, Any]]:
    all_realsense_cameras_info: List[Dict[str, Any]] = []
    logger.info("Searching for RealSense cameras...")
    try:
-        realsense_cameras = RealSenseCamera.find_cameras(raise_when_empty=False)
+        realsense_cameras = RealSenseCamera.find_cameras()
        for cam_info in realsense_cameras:
            all_realsense_cameras_info.append(cam_info)
        logger.info(f"Found {len(realsense_cameras)} RealSense cameras.")
@@ -179,7 +189,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Dict[str, Any] | None:

        if instance:
            logger.info(f"Connecting to {cam_type} camera: {cam_id}...")
-            instance.connect(do_warmup_read=False)
+            instance.connect(warmup=False)
            return {"instance": instance, "meta": cam_meta}
    except Exception as e:
        logger.error(f"Failed to connect or configure {cam_type} camera {cam_id}: {e}")
@@ -292,28 +302,8 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Unified camera utility script for listing cameras and capturing images."
    )
-    subparsers = parser.add_subparsers(dest="command", help="Available commands")

-    # List cameras command
-    list_parser = subparsers.add_parser(
-        "list-cameras", help="Shows connected cameras. Optionally filter by type (realsense or opencv)."
-    )
-    list_parser.add_argument(
-        "camera_type",
-        type=str,
-        nargs="?",
-        default=None,
-        choices=["realsense", "opencv"],
-        help="Specify camera type to list (e.g., 'realsense', 'opencv'). Lists all if omitted.",
-    )
-    list_parser.set_defaults(func=lambda args: find_and_print_cameras(args.camera_type))
-
-    # Capture images command
-    capture_parser = subparsers.add_parser(
-        "capture-images",
-        help="Saves images from detected cameras (optionally filtered by type) using their default stream profiles.",
-    )
-    capture_parser.add_argument(
+    parser.add_argument(
        "camera_type",
        type=str,
        nargs="?",
@@ -321,19 +311,19 @@ if __name__ == "__main__":
        choices=["realsense", "opencv"],
        help="Specify camera type to capture from (e.g., 'realsense', 'opencv'). Captures from all if omitted.",
    )
-    capture_parser.add_argument(
+    parser.add_argument(
        "--output-dir",
        type=Path,
        default="outputs/captured_images",
        help="Directory to save images. Default: outputs/captured_images",
    )
-    capture_parser.add_argument(
+    parser.add_argument(
        "--record-time-s",
        type=float,
        default=6.0,
        help="Time duration to attempt capturing frames. Default: 6 seconds.",
    )
-    capture_parser.set_defaults(
+    parser.set_defaults(
        func=lambda args: save_images_from_all_cameras(
            output_dir=args.output_dir,
            record_time_s=args.record_time_s,
@@ -343,14 +333,4 @@ if __name__ == "__main__":

    args = parser.parse_args()

-    if args.command is None:
-        default_output_dir = capture_parser.get_default("output_dir")
-        default_record_time_s = capture_parser.get_default("record_time_s")
-
-        save_images_from_all_cameras(
-            output_dir=default_output_dir,
-            record_time_s=default_record_time_s,
-            camera_type_filter=None,
-        )
-    else:
-        args.func(args)
+    args.func(args)