refactor(cameras): improvements realsense cam v0.1

2025-05-02 15:09:38 +02:00
parent 35c4b01752
commit 15b5d28f45
5 changed files with 782 additions and 551 deletions
--- a/lerobot/common/cameras/intel/camera_realsense.py
+++ b/lerobot/common/cameras/intel/camera_realsense.py
--- a/lerobot/common/cameras/intel/configuration_realsense.py
+++ b/lerobot/common/cameras/intel/configuration_realsense.py
@@ -14,7 +14,7 @@

 from dataclasses import dataclass

-from ..configs import CameraConfig
+from ..configs import CameraConfig, ColorMode, Cv2Rotation


@CameraConfig.register_subclass("intelrealsense")
@@ -38,26 +38,39 @@ class RealSenseCameraConfig(CameraConfig):
    fps: int | None = None
    width: int | None = None
    height: int | None = None
-    color_mode: str = "rgb"
-    channels: int | None = None
+    color_mode: ColorMode = ColorMode.RGB
+    channels: int | None = 3
    use_depth: bool = False
    force_hardware_reset: bool = True
-    rotation: int | None = None
+    rotation: Cv2Rotation = (
+        Cv2Rotation.NO_ROTATION
+    )  # NOTE(Steven): Check how draccus would deal with this str -> enum

    def __post_init__(self):
+        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
+            raise ValueError(
+                f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
+            )
+
+        if self.rotation not in (
+            Cv2Rotation.NO_ROTATION,
+            Cv2Rotation.ROTATE_90,
+            Cv2Rotation.ROTATE_180,
+            Cv2Rotation.ROTATE_270,
+        ):
+            raise ValueError(
+                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
+            )
+
+        if self.channels != 3:
+            raise NotImplementedError(f"Unsupported number of channels: {self.channels}")
+
        # bool is stronger than is None, since it works with empty strings
        if bool(self.name) and bool(self.serial_number):
            raise ValueError(
                f"One of them must be set: name or serial_number, but {self.name=} and {self.serial_number=} provided."
            )

-        if self.color_mode not in ["rgb", "bgr"]:
-            raise ValueError(
-                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
-            )
-
-        self.channels = 3
-
        at_least_one_is_not_none = self.fps is not None or self.width is not None or self.height is not None
        at_least_one_is_none = self.fps is None or self.width is None or self.height is None
        if at_least_one_is_not_none and at_least_one_is_none:
@@ -65,6 +78,3 @@ class RealSenseCameraConfig(CameraConfig):
                "For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
                f"but {self.fps=}, {self.width=}, {self.height=} were provided."
            )
-
-        if self.rotation not in [-90, None, 90, 180]:
-            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -115,12 +115,15 @@ class OpenCVCamera(Camera):

        self.capture_width: int | None = config.width
        self.capture_height: int | None = config.height
+        self.width: int | None = None
+        self.height: int | None = None

        self.fps: int | None = config.fps
        self.channels: int = config.channels
        self.color_mode: ColorMode = config.color_mode

        self.videocapture_camera: cv2.VideoCapture | None = None
+
        self.thread: Thread | None = None
        self.stop_event: Event | None = None
        self.frame_queue: queue.Queue = queue.Queue(maxsize=1)
@@ -188,18 +191,19 @@ class OpenCVCamera(Camera):
        if not self.is_connected:
            raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")

-        self._set_fps()
-        self._set_capture_width()
-        self._set_capture_height()
+        self._validate_fps()
+        self._validate_capture_width()
+        self._validate_capture_height()

        if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
            self.width, self.height = self.capture_height, self.capture_width
        else:
            self.width, self.height = self.capture_width, self.capture_height
+        logger.debug(f"Final image dimensions set to: {self.width}x{self.height} (after rotation if any)")

    def connect(self):
        """
-        Connects to the camera device specified in the configuration.
+        Connects to the OpenCV camera specified in the configuration.

        Initializes the OpenCV VideoCapture object, sets desired camera properties
        (FPS, width, height), and performs initial checks.
@@ -231,8 +235,8 @@ class OpenCVCamera(Camera):
        self._configure_capture_settings()
        logger.debug(f"Camera {self.index_or_path} connected and configured successfully.")

-    def _set_fps(self) -> None:
-        """Sets the camera's frames per second (FPS)."""
+    def _validate_fps(self) -> None:
+        """Validates and sets the camera's frames per second (FPS)."""

        if self.fps is None:
            self.fps = self.videocapture_camera.get(cv2.CAP_PROP_FPS)
@@ -252,8 +256,8 @@ class OpenCVCamera(Camera):
            )
        logger.debug(f"FPS set to {actual_fps} for {self}.")

-    def _set_capture_width(self) -> None:
-        """Sets the camera's frame capture width."""
+    def _validate_capture_width(self) -> None:
+        """Validates and sets the camera's frame capture width."""

        if self.capture_width is None:
            self.capture_width = self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH)
@@ -271,8 +275,8 @@ class OpenCVCamera(Camera):
            )
        logger.debug(f"Capture width set to {actual_width} for {self}.")

-    def _set_capture_height(self) -> None:
-        """Sets the camera's frame capture height."""
+    def _validate_capture_height(self) -> None:
+        """Validates and sets the camera's frame capture height."""

        if self.capture_height is None:
            self.capture_height = self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
@@ -365,6 +369,7 @@ class OpenCVCamera(Camera):

        start_time = time.perf_counter()

+        # NOTE(Steven): Are we okay with this blocking an undefined amount of time?
        ret, frame = self.videocapture_camera.read()

        if not ret or frame is None:
@@ -423,7 +428,7 @@ class OpenCVCamera(Camera):

        if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
            processed_image = cv2.rotate(processed_image, self.rotation)
-        logger.debug(f"Rotated frame by {self.config.rotation} degrees for {self}.")
+            logger.debug(f"Rotated frame by {self.config.rotation} degrees for {self}.")

        return processed_image

@@ -433,7 +438,7 @@ class OpenCVCamera(Camera):

        Continuously reads frames from the camera using the synchronous `read()`
        method and places the latest frame into the `frame_queue`. It overwrites
-        any previous frame in the queue to ensure only the most recent one is available.
+        any previous frame in the queue.
        """
        logger.debug(f"Starting read loop thread for {self}.")
        while not self.stop_event.is_set():
@@ -455,7 +460,6 @@ class OpenCVCamera(Camera):

    def _ensure_read_thread_running(self):
        """Starts or restarts the background read thread if it's not running."""
-        logger.debug(f"Read thread for {self} is not running. Starting...")
        if self.thread is not None:
            self.thread.join(timeout=0.1)
        if self.stop_event is not None:
@@ -495,11 +499,9 @@ class OpenCVCamera(Camera):
            raise DeviceNotConnectedError(f"{self} is not connected.")

        if self.thread is None or not self.thread.is_alive():
-            # Ensure the background reading thread is operational
            self._ensure_read_thread_running()

        try:
-            # Get the latest frame from the queue, waiting up to the timeout
            return self.frame_queue.get(timeout=timeout_ms / 1000.0)
        except queue.Empty as e:
            thread_alive = self.thread is not None and self.thread.is_alive()
@@ -516,19 +518,23 @@ class OpenCVCamera(Camera):
            raise RuntimeError(f"Error getting frame from queue for camera {self.index_or_path}: {e}") from e

    def _shutdown_read_thread(self):
-        """Cleans the background read thread if it's running."""
+        """Signals the background read thread to stop and waits for it to join."""
        if self.stop_event is not None:
            logger.debug(f"Signaling stop event for read thread of {self}.")
            self.stop_event.set()
        else:
            logger.warning(f"Stop event not found for thread of {self}, cannot signal.")

-        logger.debug(f"Waiting for read thread of {self} to join...")
-        self.thread.join(timeout=2.0)
        if self.thread.is_alive():
-            logger.warning(f"Read thread for {self} did not terminate gracefully after 2 seconds.")
+            logger.debug(f"Waiting for read thread of {self} to join...")
+            self.thread.join(timeout=2.0)
+            if self.thread.is_alive():
+                logger.warning(f"Read thread for {self} did not terminate gracefully after 2 seconds.")
+            else:
+                logger.debug(f"Read thread for {self} joined successfully.")
        else:
-            logger.debug(f"Read thread for {self} joined successfully.")
+            logger.debug(f"Read thread for {self} was already stopped.")
+
        self.thread = None
        self.stop_event = None

@@ -556,9 +562,5 @@ class OpenCVCamera(Camera):
            logger.debug(f"Releasing OpenCV VideoCapture object for {self}.")
            self.videocapture_camera.release()
            self.videocapture_camera = None
-        else:
-            logger.debug(
-                f"No OpenCV VideoCapture object to release for {self} (was already None or failed connection)."
-            )

-        logger.debug(f"Camera {self.index_or_path} disconnected successfully.")
+        logger.info(f"Camera {self.index_or_path} disconnected successfully.")
--- a/lerobot/common/cameras/utils.py
+++ b/lerobot/common/cameras/utils.py
@@ -192,3 +192,180 @@ def save_image(img_array: np.ndarray, camera_index: int, frame_index: int, image
 #     )
 #     args = parser.parse_args()
 #     save_images_from_cameras(**vars(args))
+
+
+### Realsense
+
+# def find_realsense_cameras(raise_when_empty: bool = True) -> list[dict]:
+#     """
+#     Find the names and the serial numbers of the Intel RealSense cameras
+#     connected to the computer.
+#     """
+#     import pyrealsense2 as rs
+
+#     cameras = []
+#     for device in rs.context().query_devices():
+#         serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
+#         name = device.get_info(rs.camera_info.name)
+#         cameras.append(
+#             {
+#                 "serial_number": serial_number,
+#                 "name": name,
+#             }
+#         )
+
+#     if raise_when_empty and len(cameras) == 0:
+#         raise OSError(
+#             "Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
+#         )
+
+#     return cameras
+
+
+# def save_image(img_array, serial_number, frame_index, images_dir):
+#     try:
+#         img = Image.fromarray(img_array)
+#         path = images_dir / f"camera_{serial_number}_frame_{frame_index:06d}.png"
+#         path.parent.mkdir(parents=True, exist_ok=True)
+#         img.save(str(path), quality=100)
+#         logging.info(f"Saved image: {path}")
+#     except Exception as e:
+#         logging.error(f"Failed to save image for camera {serial_number} frame {frame_index}: {e}")
+
+
+# def save_images_from_cameras(
+#     images_dir: Path,
+#     serial_numbers: list[int] | None = None,
+#     fps=None,
+#     width=None,
+#     height=None,
+#     record_time_s=2,
+# ):
+#     """
+#     Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
+#     associated to a given serial number.
+#     """
+#     if serial_numbers is None or len(serial_numbers) == 0:
+#         camera_infos = find_realsense_cameras()
+#         serial_numbers = [cam["serial_number"] for cam in camera_infos]
+
+#     import cv2
+
+#     print("Connecting cameras")
+#     cameras = []
+#     for cam_sn in serial_numbers:
+#         print(f"{cam_sn=}")
+#         config = RealSenseCameraConfig(serial_number=cam_sn, fps=fps, width=width, height=height)
+#         camera = RealSenseCamera(config)
+#         camera.connect()
+#         print(
+#             f"RealSenseCamera({camera.serial_number}, fps={camera.fps}, width={camera.capture_width}, height={camera.capture_height}, color_mode={camera.color_mode})"
+#         )
+#         cameras.append(camera)
+
+#     images_dir = Path(images_dir)
+#     if images_dir.exists():
+#         shutil.rmtree(
+#             images_dir,
+#         )
+#     images_dir.mkdir(parents=True, exist_ok=True)
+
+#     print(f"Saving images to {images_dir}")
+#     frame_index = 0
+#     start_time = time.perf_counter()
+#     try:
+#         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+#             while True:
+#                 now = time.perf_counter()
+
+#                 for camera in cameras:
+#                     # If we use async_read when fps is None, the loop will go full speed, and we will end up
+#                     # saving the same images from the cameras multiple times until the RAM/disk is full.
+#                     image = camera.read() if fps is None else camera.async_read()
+#                     if image is None:
+#                         print("No Frame")
+
+#                     bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+
+#                     executor.submit(
+#                         save_image,
+#                         bgr_converted_image,
+#                         camera.serial_number,
+#                         frame_index,
+#                         images_dir,
+#                     )
+
+#                 if fps is not None:
+#                     dt_s = time.perf_counter() - now
+#                     busy_wait(1 / fps - dt_s)
+
+#                 if time.perf_counter() - start_time > record_time_s:
+#                     break
+
+#                 print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
+
+#                 frame_index += 1
+#     finally:
+#         print(f"Images have been saved to {images_dir}")
+#         for camera in cameras:
+#             camera.disconnect()
+
+
+# def find_serial_number_from_name(name):
+#     camera_infos = find_realsense_cameras()
+#     camera_names = [cam["name"] for cam in camera_infos]
+#     this_name_count = Counter(camera_names)[name]
+#     if this_name_count > 1:
+#         # TODO(aliberts): Test this with multiple identical cameras (Aloha)
+#         raise ValueError(
+#             f"Multiple {name} cameras have been detected. Please use their serial number to instantiate them."
+#         )
+
+#     name_to_serial_dict = {cam["name"]: cam["serial_number"] for cam in camera_infos}
+#     cam_sn = name_to_serial_dict[name]
+
+#     return cam_sn
+
+# if __name__ == "__main__":
+#     parser = argparse.ArgumentParser(
+#         description="Save a few frames using `RealSenseCamera` for all cameras connected to the computer, or a selected subset."
+#     )
+#     parser.add_argument(
+#         "--serial-numbers",
+#         type=int,
+#         nargs="*",
+#         default=None,
+#         help="List of serial numbers used to instantiate the `RealSenseCamera`. If not provided, find and use all available camera indices.",
+#     )
+#     parser.add_argument(
+#         "--fps",
+#         type=int,
+#         default=30,
+#         help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
+#     )
+#     parser.add_argument(
+#         "--width",
+#         type=str,
+#         default=640,
+#         help="Set the width for all cameras. If not provided, use the default width of each camera.",
+#     )
+#     parser.add_argument(
+#         "--height",
+#         type=str,
+#         default=480,
+#         help="Set the height for all cameras. If not provided, use the default height of each camera.",
+#     )
+#     parser.add_argument(
+#         "--images-dir",
+#         type=Path,
+#         default="outputs/images_from_intelrealsense_cameras",
+#         help="Set directory to save a few frames for each camera.",
+#     )
+#     parser.add_argument(
+#         "--record-time-s",
+#         type=float,
+#         default=2.0,
+#         help="Set the number of seconds used to record the frames. By default, 2 seconds.",
+#     )
+#     args = parser.parse_args()
+#     save_images_from_cameras(**vars(args))
--- a/tests/cameras/mock_cv2.py
+++ b/tests/cameras/mock_cv2.py
@@ -1,101 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from functools import cache
-
-import numpy as np
-
-CAP_V4L2 = 200
-CAP_DSHOW = 700
-CAP_AVFOUNDATION = 1200
-CAP_ANY = -1
-
-CAP_PROP_FPS = 5
-CAP_PROP_FRAME_WIDTH = 3
-CAP_PROP_FRAME_HEIGHT = 4
-COLOR_RGB2BGR = 4
-COLOR_BGR2RGB = 4
-
-ROTATE_90_COUNTERCLOCKWISE = 2
-ROTATE_90_CLOCKWISE = 0
-ROTATE_180 = 1
-
-
-@cache
-def _generate_image(width: int, height: int):
-    return np.random.randint(0, 256, size=(height, width, 3), dtype=np.uint8)
-
-
-def cvtColor(color_image, color_conversion):  # noqa: N802
-    if color_conversion in [COLOR_RGB2BGR, COLOR_BGR2RGB]:
-        return color_image[:, :, [2, 1, 0]]
-    else:
-        raise NotImplementedError(color_conversion)
-
-
-def rotate(color_image, rotation):
-    if rotation is None:
-        return color_image
-    elif rotation == ROTATE_90_CLOCKWISE:
-        return np.rot90(color_image, k=1)
-    elif rotation == ROTATE_180:
-        return np.rot90(color_image, k=2)
-    elif rotation == ROTATE_90_COUNTERCLOCKWISE:
-        return np.rot90(color_image, k=3)
-    else:
-        raise NotImplementedError(rotation)
-
-
-class VideoCapture:
-    def __init__(self, *args, **kwargs):
-        self._mock_dict = {
-            CAP_PROP_FPS: 30,
-            CAP_PROP_FRAME_WIDTH: 640,
-            CAP_PROP_FRAME_HEIGHT: 480,
-        }
-        self._is_opened = True
-
-    def isOpened(self):  # noqa: N802
-        return self._is_opened
-
-    def set(self, propId: int, value: float) -> bool:  # noqa: N803
-        if not self._is_opened:
-            raise RuntimeError("Camera is not opened")
-        self._mock_dict[propId] = value
-        return True
-
-    def get(self, propId: int) -> float:  # noqa: N803
-        if not self._is_opened:
-            raise RuntimeError("Camera is not opened")
-        value = self._mock_dict[propId]
-        if value == 0:
-            if propId == CAP_PROP_FRAME_HEIGHT:
-                value = 480
-            elif propId == CAP_PROP_FRAME_WIDTH:
-                value = 640
-        return value
-
-    def read(self):
-        if not self._is_opened:
-            raise RuntimeError("Camera is not opened")
-        h = self.get(CAP_PROP_FRAME_HEIGHT)
-        w = self.get(CAP_PROP_FRAME_WIDTH)
-        ret = True
-        return ret, _generate_image(width=w, height=h)
-
-    def release(self):
-        self._is_opened = False
-
-    def __del__(self):
-        if self._is_opened:
-            self.release()