refactor(cameras): improvements opencv cam v0.1

2025-04-28 15:05:41 +02:00
parent f705dd8cec
commit c452d844ec
8 changed files with 658 additions and 320 deletions
--- a/lerobot/common/cameras/camera.py
+++ b/lerobot/common/cameras/camera.py
@@ -1,10 +1,30 @@
 #!/usr/bin/env python
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import abc
 import numpy as np
 from .configs import ColorMode
 # NOTE(Steven): Consider something like configure() if makes sense for both cameras
 class Camera(abc.ABC):
-    @abc.abstractproperty
+    @property
    @abc.abstractmethod
    def is_connected(self) -> bool:
        pass
@@ -13,11 +33,11 @@ class Camera(abc.ABC):
        pass
    @abc.abstractmethod
-    def read(self, temporary_color: str | None = None) -> np.ndarray:
+    def read(self, color_mode: ColorMode | None = None) -> np.ndarray:
        pass
    @abc.abstractmethod
-    def async_read(self) -> np.ndarray:
+    def async_read(self, timeout_ms: float = 2000) -> np.ndarray:
        pass
    @abc.abstractmethod
--- a/lerobot/common/cameras/configs.py
+++ b/lerobot/common/cameras/configs.py
@@ -1,3 +1,19 @@
 #!/usr/bin/env python
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import abc
 from dataclasses import dataclass
 from enum import Enum
@@ -6,8 +22,15 @@ import draccus
 class ColorMode(Enum):
-    RGB = 0
+    RGB = "rgb"
-    BGR = 1
+    BGR = "bgr"
 class Cv2Rotation(Enum):
    NO_ROTATION = 0
    ROTATE_90 = 90
    ROTATE_180 = 180
    ROTATE_270 = -90
@dataclass
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -13,35 +13,27 @@
 # limitations under the License.
 """
-This file contains utilities for recording frames from cameras. For more info look at `OpenCVCamera` docstring.
+Provides the OpenCVCamera class for capturing frames from cameras using OpenCV.
 """
-import argparse
+import contextlib
 import concurrent.futures
 import logging
 import math
 import platform
-import shutil
+import queue
 import time
 from pathlib import Path
 from threading import Event, Thread
 from typing import TypeAlias
 import cv2
 import numpy as np
 from PIL import Image
 from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
 from lerobot.common.utils.robot_utils import (
    busy_wait,
 )
 from lerobot.common.utils.utils import capture_timestamp_utc
 from ..camera import Camera
-from ..utils import get_cv2_backend, get_cv2_rotation
+from ..utils import IndexOrPath, get_cv2_backend, get_cv2_rotation
-from .configuration_opencv import OpenCVCameraConfig
+from .configuration_opencv import ColorMode, OpenCVCameraConfig
 IndexOrPath: TypeAlias = int | Path
 # The maximum opencv device index depends on your operating system. For instance,
 # if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
@@ -53,376 +45,505 @@ MAX_OPENCV_INDEX = 60
 logger = logging.getLogger(__name__)
 def is_valid_unix_path(path: str) -> bool:
    """Note: if 'path' points to a symlink, this will return True only if the target exists"""
    p = Path(path)
    return p.is_absolute() and p.exists()
 def get_camera_index_from_unix_port(port: Path) -> int:
    return int(str(port.resolve()).removeprefix("/dev/video"))
 def save_image(img_array: np.ndarray, camera_index: int, frame_index: int, images_dir: Path):
    img = Image.fromarray(img_array)
    path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
    path.parent.mkdir(parents=True, exist_ok=True)
    img.save(str(path), quality=100)
 def save_images_from_cameras(
    images_dir: Path,
    camera_idx_or_paths: list[IndexOrPath] | None = None,
    fps: int | None = None,
    width: int | None = None,
    height: int | None = None,
    record_time_s: int = 2,
 ):
    """
    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
    associated to a given camera index.
    """
    if not camera_idx_or_paths:
        camera_idx_or_paths = OpenCVCamera.find_cameras()
        if len(camera_idx_or_paths) == 0:
            raise RuntimeError(
                "Not a single camera was detected. Try re-plugging, or re-installing `opencv-python`, "
                "or your camera driver, or make sure your camera is compatible with opencv."
            )
    print("Connecting cameras")
    cameras = []
    for idx_or_path in camera_idx_or_paths:
        config = OpenCVCameraConfig(index_or_path=idx_or_path, fps=fps, width=width, height=height)
        camera = OpenCVCamera(config)
        camera.connect()
        print(
            f"OpenCVCamera({camera.index_or_path}, fps={camera.fps}, width={camera.capture_width}, "
            f"height={camera.capture_height}, color_mode={camera.color_mode})"
        )
        cameras.append(camera)
    images_dir = Path(images_dir)
    if images_dir.exists():
        shutil.rmtree(
            images_dir,
        )
    images_dir.mkdir(parents=True, exist_ok=True)
    print(f"Saving images to {images_dir}")
    frame_index = 0
    start_time = time.perf_counter()
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        while True:
            now = time.perf_counter()
            for camera in cameras:
                # If we use async_read when fps is None, the loop will go full speed, and we will endup
                # saving the same images from the cameras multiple times until the RAM/disk is full.
                image = camera.read() if fps is None else camera.async_read()
                executor.submit(
                    save_image,
                    image,
                    camera.camera_index,
                    frame_index,
                    images_dir,
                )
            if fps is not None:
                dt_s = time.perf_counter() - now
                busy_wait(1 / fps - dt_s)
            print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
            if time.perf_counter() - start_time > record_time_s:
                break
            frame_index += 1
    print(f"Images have been saved to {images_dir}")
 class OpenCVCamera(Camera):
    """
-    The OpenCVCamera class allows to efficiently record images from cameras. It relies on opencv2 to communicate
+    Manages camera interactions using OpenCV for efficient frame recording.
    with the cameras. Most cameras are compatible. For more info, see the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
-    An OpenCVCamera instance requires a camera index (e.g. `OpenCVCamera(camera_index=0)`). When you only have one camera
+    This class provides a high-level interface to connect to, configure, and read
-    like a webcam of a laptop, the camera index is expected to be 0, but it might also be very different, and the camera index
+    frames from cameras compatible with OpenCV's VideoCapture. It supports both
-    might change if you reboot your computer or re-plug your camera. This behavior depends on your operation system.
+    synchronous and asynchronous frame reading.
-    To find the camera indices of your cameras, you can run our utility script that will be save a few frames for each camera:
+    An OpenCVCamera instance requires a camera index (e.g., 0) or a device path
    (e.g., '/dev/video0' on Linux). Camera indices can be unstable across reboots
    or port changes, especially on Linux. Use the provided utility script to find
    available camera indices or paths:
    ```bash
-    python lerobot/common/robot_devices/cameras/opencv.py --images-dir outputs/images_from_opencv_cameras
+    NOTE(Steven): Point to future util
    ```
-    When an OpenCVCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode
+    The camera's default settings (FPS, resolution, color mode) are used unless
-    of the given camera will be used.
+    overridden in the configuration.
-    Example of usage:
+    Args:
-    ```python
+        config (OpenCVCameraConfig): Configuration object containing settings like
-    from lerobot.common.robot_devices.cameras.configs import OpenCVCameraConfig
+            camera index/path, desired FPS, width, height, color mode, and rotation.
-    config = OpenCVCameraConfig(index_or_path=0)
+    Example:
-    camera = OpenCVCamera(config)
+        ```python
-    camera.connect()
+        from lerobot.common.cameras.opencv import OpenCVCamera
-    color_image = camera.read()
+        from lerobot.common.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode
    # when done using the camera, consider disconnecting
    camera.disconnect()
    ```
-    Example of changing default fps, width, height and color_mode:
+        # Basic usage with camera index 0
-    ```python
+        config = OpenCVCameraConfig(index_or_path=0)
-    config = OpenCVCameraConfig(index_or_path=0, fps=30, width=1280, height=720)
+        camera = OpenCVCamera(config)
-    config = OpenCVCameraConfig(index_or_path=0, fps=90, width=640, height=480)
+        try:
-    config = OpenCVCameraConfig(index_or_path=0, fps=90, width=640, height=480, color_mode="bgr")
+            camera.connect()
-    # Note: might error out open `camera.connect()` if these settings are not compatible with the camera
+            print(f"Connected to {camera}")
-    ```
+            color_image = camera.read() # Synchronous read
            print(f"Read frame shape: {color_image.shape}")
            async_image = camera.async_read() # Asynchronous read
            print(f"Async read frame shape: {async_image.shape}")
        except Exception as e:
            print(f"An error occurred: {e}")
        finally:
            camera.disconnect()
            print(f"Disconnected from {camera}")
        # Example with custom settings
        custom_config = OpenCVCameraConfig(
            index_or_path='/dev/video0', # Or use an index
            fps=30,
            width=1280,
            height=720,
            color_mode=ColorMode.RGB,
            rotation=90
        )
        custom_camera = OpenCVCamera(custom_config)
        # ... connect, read, disconnect ...
        ```
    """
    def __init__(self, config: OpenCVCameraConfig):
        """
        Initializes the OpenCVCamera instance.
        Args:
            config: The configuration settings for the camera.
        """
        self.config = config
-        self.index_or_path = config.index_or_path
+        self.index_or_path: IndexOrPath = config.index_or_path
-        # Store the raw (capture) resolution from the config.
+        self.capture_width: int | None = config.width
-        self.capture_width = config.width
+        self.capture_height: int | None = config.height
        self.capture_height = config.height
        # If rotated by ±90, swap width and height.
        if config.rotation in [-90, 90]:
-            self.width = config.height
+            self.width: int | None = config.height
-            self.height = config.width
+            self.height: int | None = config.width
        else:
-            self.width = config.width
+            self.width: int | None = config.width
-            self.height = config.height
+            self.height: int | None = config.height
-        self.fps = config.fps
+        self.fps: int | None = config.fps
-        self.channels = config.channels
+        self.channels: int = config.channels
-        self.color_mode = config.color_mode
+        self.color_mode: ColorMode = config.color_mode
-        self.camera: cv2.VideoCapture | None = None
+        self.videocapture_camera: cv2.VideoCapture | None = None
        self.thread: Thread | None = None
        self.stop_event: Event | None = None
-        self.color_image = None
+        self.frame_queue: queue.Queue = queue.Queue(maxsize=1)
        self.logs = {}
-        self.rotation = get_cv2_rotation(config.rotation)
+        self.logs: dict = {}  # NOTE(Steven): Might be removed in the future
-        self.backend = get_cv2_backend()
+
        self.rotation: int | None = get_cv2_rotation(config.rotation)
        self.backend: int = get_cv2_backend()
    def __str__(self) -> str:
        """Returns a string representation of the camera instance."""
        return f"{self.__class__.__name__}({self.index_or_path})"
    @property
    def is_connected(self) -> bool:
-        return self.camera.isOpened() if isinstance(self.camera, cv2.VideoCapture) else False
+        """Checks if the camera is currently connected and opened."""
        return isinstance(self.videocapture_camera, cv2.VideoCapture) and self.videocapture_camera.isOpened()
    def _scan_available_cameras_and_raise(self, index_or_path: IndexOrPath) -> None:
        """
        Scans for available cameras and raises an error if the specified
        camera cannot be found or accessed.
        Args:
            index_or_path: The camera identifier that failed to connect.
        Raises:
            ValueError: If the specified `index_or_path` is not found among the
                list of detected, usable cameras.
            ConnectionError: If the camera connection fails (this specific method
                is called within the connection failure context).
        """
        cameras_info = self.find_cameras()
        available_cam_ids = [cam["index"] for cam in cameras_info]
        if index_or_path not in available_cam_ids:
            raise ValueError(
                f"Camera '{index_or_path}' not found or not accessible among available cameras: {available_cam_ids}. "
                "Ensure the camera is properly connected and drivers are installed. "
                "Run 'NOTE(Steven): Add script path' to list detected cameras."
            )
        raise ConnectionError(
            f"Failed to connect to camera {self}. Even though it might be listed, it could not be opened."
        )
    # NOTE(Steven): Moving it to a different function for now. To be evaluated later if it is worth it and if it makes sense to have it as an abstract method
    def _configure_capture_settings(self, fps: int | None, width: int | None, height: int | None) -> None:
        """
        Applies the specified FPS, width, and height settings to the connected camera.
        This method attempts to set the camera properties via OpenCV. It checks if
        the camera successfully applied the settings and raises an error if not.
        Args:
            fps: The desired frames per second. If None, the setting is skipped.
            width: The desired capture width. If None, the setting is skipped.
            height: The desired capture height. If None, the setting is skipped.
        Raises:
            RuntimeError: If the camera fails to set any of the specified properties
                          to the requested value.
            DeviceNotConnectedError: If the camera is not connected when attempting
                                     to configure settings.
        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")
        if fps is not None:
            self._set_fps(fps)
        if width is not None:
            self._set_capture_width(width)
        if height is not None:
            self._set_capture_height(height)
    def connect(self):
        """
        Connects to the camera device specified in the configuration.
        Initializes the OpenCV VideoCapture object, sets desired camera properties
        (FPS, width, height), and performs initial checks.
        Raises:
            DeviceAlreadyConnectedError: If the camera is already connected.
            ValueError: If the specified camera index/path is not found or accessible.
            ConnectionError: If the camera is found but fails to open.
            RuntimeError: If the camera opens but fails to apply requested FPS/resolution settings.
        """
        if self.is_connected:
            raise DeviceAlreadyConnectedError(f"{self} is already connected.")
-        # Use 1 thread to avoid blocking the main thread. Especially useful during data collection
+        # Use 1 thread for OpenCV operations to avoid potential conflicts or
-        # when other threads are used to save the images.
+        # blocking in multi-threaded applications, especially during data collection.
        cv2.setNumThreads(1)
-        self.camera = cv2.VideoCapture(self.index_or_path, self.backend)
+        logger.debug(f"Attempting to connect to camera {self.index_or_path} using backend {self.backend}...")
        self.videocapture_camera = cv2.VideoCapture(self.index_or_path, self.backend)
-        # If the camera doesn't work, display the camera indices corresponding to valid cameras.
+        if not self.videocapture_camera.isOpened():
-        if not self.camera.isOpened():
+            logger.error(f"Failed to open camera {self.index_or_path}.")
-            # Release camera to make it accessible for `find_camera_indices`
+            self.videocapture_camera.release()
-            self.camera.release()
+            self.videocapture_camera = None
-            # Verify that the provided `camera_index` is valid before printing the traceback
+            self._scan_available_cameras_and_raise(self.index_or_path)  # This raises an exception everytime
            cameras_info = self.find_cameras()
            available_cam_ids = [cam["index"] for cam in cameras_info]
            if self.index_or_path not in available_cam_ids:
                raise ValueError(
                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, "
                    f"but {self.index_or_path} is provided instead. To find the camera index you should use, "
                    "run `python lerobot/common/robot_devices/cameras/opencv.py`."
                )
-            raise ConnectionError(f"Can't access {self}.")
+        logger.debug(f"Successfully opened camera {self.index_or_path}. Applying configuration...")
-
+        self._configure_capture_settings(self.fps, self.capture_width, self.capture_height)
-        if self.fps is not None:
+        logger.debug(f"Camera {self.index_or_path} connected and configured successfully.")
            self._set_fps(self.fps)
        if self.capture_width is not None:
            self._set_capture_width(self.capture_width)
        if self.capture_height is not None:
            self._set_capture_height(self.capture_height)
    def _set_fps(self, fps: int) -> None:
-        self.camera.set(cv2.CAP_PROP_FPS, fps)
+        """Sets the camera's frames per second (FPS)."""
-        actual_fps = self.camera.get(cv2.CAP_PROP_FPS)
+        success = self.videocapture_camera.set(cv2.CAP_PROP_FPS, float(fps))
-        # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
+        actual_fps = self.videocapture_camera.get(cv2.CAP_PROP_FPS)
-        if not math.isclose(fps, actual_fps, rel_tol=1e-3):
+        # Use math.isclose for robust float comparison
-            raise RuntimeError(f"Can't set {fps=} for {self}. Actual value is {actual_fps}.")
+        if not success or not math.isclose(fps, actual_fps, rel_tol=1e-3):
            logger.warning(
                f"Requested FPS {fps} for {self}, but camera reported {actual_fps} (set success: {success}). "
                "This might be due to camera limitations."
            )
            raise RuntimeError(
                f"Failed to set requested FPS {fps} for {self}. Actual value reported: {actual_fps}."
            )
        logger.debug(f"FPS set to {actual_fps} for {self}.")
    def _set_capture_width(self, capture_width: int) -> None:
-        self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, capture_width)
+        """Sets the camera's frame capture width."""
-        actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
+        success = self.videocapture_camera.set(cv2.CAP_PROP_FRAME_WIDTH, float(capture_width))
-        if not math.isclose(self.capture_width, actual_width, rel_tol=1e-3):
+        actual_width = round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH))
-            raise RuntimeError(f"Can't set {capture_width=} for {self}. Actual value is {actual_width}.")
+        if not success or self.capture_width != actual_width:
            logger.warning(
                f"Requested capture width {capture_width} for {self}, but camera reported {actual_width} (set success: {success})."
            )
            raise RuntimeError(
                f"Failed to set requested capture width {capture_width} for {self}. Actual value: {actual_width}."
            )
        logger.debug(f"Capture width set to {actual_width} for {self}.")
    def _set_capture_height(self, capture_height: int) -> None:
-        self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, capture_height)
+        """Sets the camera's frame capture height."""
-        actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
+        success = self.videocapture_camera.set(cv2.CAP_PROP_FRAME_HEIGHT, float(capture_height))
-        if not math.isclose(self.capture_height, actual_height, rel_tol=1e-3):
+        actual_height = round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
-            raise RuntimeError(f"Can't set {capture_height=} for {self}. Actual value is {actual_height}.")
+        if not success or self.capture_height != actual_height:
            logger.warning(
                f"Requested capture height {capture_height} for {self}, but camera reported {actual_height} (set success: {success})."
            )
            raise RuntimeError(
                f"Failed to set requested capture height {capture_height} for {self}. Actual value: {actual_height}."
            )
        logger.debug(f"Capture height set to {actual_height} for {self}.")
    @staticmethod
    def find_cameras(max_index_search_range=MAX_OPENCV_INDEX) -> list[IndexOrPath]:
        """
        Detects available cameras connected to the system.
        On Linux, it scans '/dev/video*' paths. On other systems (like macOS, Windows),
        it checks indices from 0 up to `max_index_search_range`.
        Args:
            max_index_search_range (int): The maximum index to check on non-Linux systems.
        Returns:
            list[dict]: A list of dictionaries, where each dictionary contains information
                       about a found camera, primarily its 'index' (which can be an
                       integer index or a string path).
        Note:
            This method temporarily opens each potential camera to check availability,
            which might briefly interfere with other applications using cameras.
        """
        if platform.system() == "Linux":
-            print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
+            logger.info("Linux detected. Scanning '/dev/video*' device paths...")
-            possible_idx_or_paths = [str(port) for port in Path("/dev").glob("video*")]
+            possible_paths = sorted(Path("/dev").glob("video*"), key=lambda p: p.name)
            targets_to_scan = [str(p) for p in possible_paths]
            logger.debug(f"Found potential paths: {targets_to_scan}")
        else:
-            print(
+            logger.info(
-                f"{platform.system()} system detected. Finding available camera indices through "
+                f"{platform.system()} system detected. Scanning indices from 0 to {max_index_search_range}..."
                f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
            )
-            possible_idx_or_paths = range(max_index_search_range)
+            targets_to_scan = list(range(max_index_search_range))
-        found_idx_or_paths = []
+        found_cameras = []
-        for target in possible_idx_or_paths:
+        for target in targets_to_scan:
-            camera = cv2.VideoCapture(target)
+            camera = cv2.VideoCapture(target, get_cv2_backend())
-            is_open = camera.isOpened()
+            if camera.isOpened():
-            camera.release()
+                logger.debug(f"Camera found and accessible at '{target}'")
-            if is_open:
+                found_cameras.append(target)
-                print(f"Camera found at {target}")
+                camera.release()
                found_idx_or_paths.append(target)
-        return found_idx_or_paths
+        if not found_cameras:
            logger.warning("No OpenCV cameras detected.")
        else:
            logger.info(f"Detected accessible cameras: {found_cameras}")
-    def read(self, temporary_color_mode: str | None = None) -> np.ndarray:
+        return found_cameras
        """Read a frame from the camera returned in the format (height, width, channels)
        (e.g. 480 x 640 x 3), contrarily to the pytorch format which is channel first.
-        Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
+    def read(self, color_mode: ColorMode | None = None) -> np.ndarray:
-        If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
+        """
        Reads a single frame synchronously from the camera.
        This is a blocking call. It waits for the next available frame from the
        camera hardware via OpenCV.
        Args:
            color_mode (Optional[ColorMode]): If specified, overrides the default
                color mode (`self.color_mode`) for this read operation (e.g.,
                request RGB even if default is BGR).
        Returns:
            np.ndarray: The captured frame as a NumPy array in the format
                       (height, width, channels), using the specified or default
                       color mode and applying any configured rotation.
        Raises:
            DeviceNotConnectedError: If the camera is not connected.
            RuntimeError: If reading the frame from the camera fails or if the
                          received frame dimensions don't match expectations before rotation.
            ValueError: If an invalid `color_mode` is requested.
        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"{self} is not connected.")
-        start = time.perf_counter()
+        start_time = time.perf_counter()
-        ret, color_image = self.camera.read()
+        ret, frame = self.videocapture_camera.read()
        if not ret:
            raise RuntimeError(f"Can't capture color image from {self}.")
-        self.color_image = self._postprocess_image(color_image, temporary_color_mode)
+        if not ret or frame is None:
-
+            raise RuntimeError(
-        dt_ms = (time.perf_counter() - start) * 1e3
+                f"Failed to capture frame from {self}. '.read()' returned status={ret} and frame is None."
        logger.debug(f"{self} read: {dt_ms:.1f}ms")
        # log the utc time at which the image was received
        self.logs["timestamp_utc"] = capture_timestamp_utc()
    def _postprocess_image(self, image, temporary_color_mode: str | None = None):
        requested_color_mode = self.color_mode if temporary_color_mode is None else temporary_color_mode
        if requested_color_mode not in ["rgb", "bgr"]:
            raise ValueError(
                f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
            )
-        # OpenCV uses BGR format as default (blue, green, red) for all operations, including displaying images.
+        # Post-process the frame (color conversion, dimension check, rotation)
-        # However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
+        processed_frame = self._postprocess_image(frame, color_mode)
-        # so we convert the image color from BGR to RGB.
+
-        if requested_color_mode == "rgb":
+        read_duration_ms = (time.perf_counter() - start_time) * 1e3
-            color_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        logger.debug(f"{self} synchronous read took: {read_duration_ms:.1f}ms")
        self.logs["timestamp_utc"] = capture_timestamp_utc()
        return processed_frame
    def _postprocess_image(self, image: np.ndarray, color_mode: ColorMode | None = None) -> np.ndarray:
        """
        Applies color conversion, dimension validation, and rotation to a raw frame.
        Args:
            image (np.ndarray): The raw image frame (expected BGR format from OpenCV).
            color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
                                             uses the instance's default `self.color_mode`.
        Returns:
            np.ndarray: The processed image frame.
        Raises:
            ValueError: If the requested `color_mode` is invalid.
            RuntimeError: If the raw frame dimensions do not match the configured
                          `capture_width` and `capture_height`.
        """
        requested_color_mode = self.color_mode if color_mode is None else color_mode
        if requested_color_mode not in (ColorMode.RGB, ColorMode.BGR):
            raise ValueError(
                f"Invalid requested color mode '{requested_color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
            )
        h, w, c = image.shape
        h, w, _ = color_image.shape
        if h != self.capture_height or w != self.capture_width:
            raise RuntimeError(
-                f"Can't capture color image with expected height and width ({self.height} x {self.width}). "
+                f"Captured frame dimensions ({h}x{w}) do not match configured capture dimensions ({self.capture_height}x{self.capture_width}) for {self}."
-                f"({h} x {w}) returned instead."
+            )
        if c != self.channels:
            logger.warning(
                f"Captured frame channels ({c}) do not match configured channels ({self.channels}) for {self}."
            )
-        if self.rotation is not None:
+        processed_image = image
-            color_image = cv2.rotate(color_image, self.rotation)
+        if requested_color_mode == ColorMode.RGB:
            processed_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            logger.debug(f"Converted frame from BGR to RGB for {self}.")
-        return color_image
+        if self.rotation is not None:
            processed_image = cv2.rotate(processed_image, self.rotation)
            logger.debug(f"Rotated frame by {self.config.rotation} degrees for {self}.")
        return processed_image
    def _read_loop(self):
        """
        Internal loop run by the background thread for asynchronous reading.
        Continuously reads frames from the camera using the synchronous `read()`
        method and places the latest frame into the `frame_queue`. It overwrites
        any previous frame in the queue to ensure only the most recent one is available.
        """
        logger.debug(f"Starting read loop thread for {self}.")
        while not self.stop_event.is_set():
            try:
-                self.color_image = self.read()
+                color_image = self.read()
            except Exception as e:
                print(f"Error reading in thread: {e}")
-    def async_read(self):
+                with contextlib.suppress(queue.Empty):
                    _ = self.frame_queue.get_nowait()
                self.frame_queue.put(color_image)
                logger.debug(f"Frame placed in queue for {self}.")
            except DeviceNotConnectedError:
                logger.error(f"Read loop for {self} stopped: Camera disconnected.")
                break
            except Exception as e:
                logger.warning(f"Error reading frame in background thread for {self}: {e}")
        logger.debug(f"Stopping read loop thread for {self}.")
    def _ensure_read_thread_running(self):
        """Starts or restarts the background read thread if it's not running."""
        logger.debug(f"Read thread for {self} is not running. Starting...")
        if self.thread is not None:
            self.thread.join(timeout=0.1)
        if self.stop_event is not None:
            self.stop_event.set()
        self.stop_event = Event()
        self.thread = Thread(
            target=self._read_loop, args=(), name=f"OpenCVCameraReadLoop-{self}-{self.index_or_path}"
        )
        self.thread.daemon = True
        self.thread.start()
        logger.debug(f"Read thread started for {self}.")
    def async_read(self, timeout_ms: float = 2000) -> np.ndarray:
        """
        Reads the latest available frame asynchronously.
        This method retrieves the most recent frame captured by the background
        read thread. It does not block waiting for the camera hardware directly,
        only waits for a frame to appear in the internal queue up to the specified
        timeout.
        Args:
            timeout_ms (float): Maximum time in milliseconds to wait for a frame
                to become available in the queue. Defaults to 2000ms (2 seconds).
        Returns:
            np.ndarray: The latest captured frame as a NumPy array in the format
                       (height, width, channels), processed according to configuration.
        Raises:
            DeviceNotConnectedError: If the camera is not connected.
            TimeoutError: If no frame becomes available within the specified timeout.
            RuntimeError: If an unexpected error occurs while retrieving from the queue.
        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"{self} is not connected.")
-        if self.thread is None:
+        if self.thread is None or not self.thread.is_alive():
-            self.stop_event = Event()
+            # Ensure the background reading thread is operational
-            self.thread = Thread(target=self._read_loop, args=())
+            self._ensure_read_thread_running()
            self.thread.daemon = True
            self.thread.start()
-        num_tries = 0
+        try:
-        while True:
+            # Get the latest frame from the queue, waiting up to the timeout
-            if self.color_image is not None:
+            return self.frame_queue.get(timeout=timeout_ms / 1000.0)
-                return self.color_image
+        except queue.Empty as e:
            thread_alive = self.thread is not None and self.thread.is_alive()
            logger.error(
                f"Timeout waiting for frame from {self} queue after {timeout_ms}ms. "
                f"(Read thread alive: {thread_alive})"
            )
            raise TimeoutError(
                f"Timed out waiting for frame from camera {self.index_or_path} after {timeout_ms} ms. "
                f"Read thread alive: {thread_alive}."
            ) from e
        except Exception as e:
            logger.exception(f"Unexpected error getting frame from queue for {self}: {e}")
            raise RuntimeError(f"Error getting frame from queue for camera {self.index_or_path}: {e}") from e
-            time.sleep(1 / self.fps)
+    def _shutdown_read_thread(self):
-            num_tries += 1
+        """Cleans the background read thread if it's running."""
-            if num_tries > self.fps * 2:
+        if self.stop_event is not None:
-                raise TimeoutError("Timed out waiting for async_read() to start.")
+            logger.debug(f"Signaling stop event for read thread of {self}.")
            self.stop_event.set()
        else:
            logger.warning(f"Stop event not found for thread of {self}, cannot signal.")
        logger.debug(f"Waiting for read thread of {self} to join...")
        self.thread.join(timeout=2.0)
        if self.thread.is_alive():
            logger.warning(f"Read thread for {self} did not terminate gracefully after 2 seconds.")
        else:
            logger.debug(f"Read thread for {self} joined successfully.")
        self.thread = None
        self.stop_event = None
    def disconnect(self):
-        if not self.is_connected:
+        """
-            raise DeviceNotConnectedError(f"{self} is not connected.")
+        Disconnects from the camera and cleans up resources.
        Stops the background read thread (if running) and releases the OpenCV
        VideoCapture object.
        Raises:
            DeviceNotConnectedError: If the camera is already disconnected.
        """
        if not self.is_connected and self.thread is None:
            raise DeviceNotConnectedError(
                f"Attempted to disconnect {self}, but it appears already disconnected."
            )
        logger.debug(f"Disconnecting from camera {self.index_or_path}...")
        if self.thread is not None:
-            self.stop_event.set()
+            self._shutdown_read_thread()
            self.thread.join()  # wait for the thread to finish
            self.thread = None
            self.stop_event = None
-        self.camera.release()
+        if self.videocapture_camera is not None:
-        self.camera = None
+            logger.debug(f"Releasing OpenCV VideoCapture object for {self}.")
            self.videocapture_camera.release()
            self.videocapture_camera = None
        else:
            logger.debug(
                f"No OpenCV VideoCapture object to release for {self} (was already None or failed connection)."
            )
-
+        logger.debug(f"Camera {self.index_or_path} disconnected successfully.")
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Save a few frames using `OpenCVCamera` for all cameras connected to the computer, or a selected subset."
    )
    parser.add_argument(
        "--camera-ids",
        type=int,
        nargs="*",
        default=None,
        help="List of camera indices used to instantiate the `OpenCVCamera`. If not provided, find and use all available camera indices.",
    )
    parser.add_argument(
        "--fps",
        type=int,
        default=None,
        help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
    )
    parser.add_argument(
        "--width",
        type=int,
        default=None,
        help="Set the width for all cameras. If not provided, use the default width of each camera.",
    )
    parser.add_argument(
        "--height",
        type=int,
        default=None,
        help="Set the height for all cameras. If not provided, use the default height of each camera.",
    )
    parser.add_argument(
        "--images-dir",
        type=Path,
        default="outputs/images_from_opencv_cameras",
        help="Set directory to save a few frames for each camera.",
    )
    parser.add_argument(
        "--record-time-s",
        type=float,
        default=4.0,
        help="Set the number of seconds used to record the frames. By default, 2 seconds.",
    )
    args = parser.parse_args()
    save_images_from_cameras(**vars(args))
--- a/lerobot/common/cameras/opencv/configuration_opencv.py
+++ b/lerobot/common/cameras/opencv/configuration_opencv.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
 from pathlib import Path
-from ..configs import CameraConfig, ColorMode
+from ..configs import CameraConfig, ColorMode, Cv2Rotation
@CameraConfig.register_subclass("opencv")
@@ -23,17 +23,24 @@ class OpenCVCameraConfig(CameraConfig):
    width: int | None = None
    height: int | None = None
    color_mode: ColorMode = ColorMode.RGB
-    channels: int = 3
+    channels: int = 3  # NOTE(Steven): Why is this a config?
-    rotation: int | None = None
+    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
    def __post_init__(self):
-        if self.color_mode not in [ColorMode.RGB, ColorMode.BGR]:
+        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
            raise ValueError(
-                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
+                f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
            )
        if self.rotation not in (
            Cv2Rotation.NO_ROTATION,
            Cv2Rotation.ROTATE_90,
            Cv2Rotation.ROTATE_180,
            Cv2Rotation.ROTATE_270,
        ):
            raise ValueError(
                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
            )
        if self.channels != 3:
            raise NotImplementedError(f"Unsupported number of channels: {self.channels}")
        if self.rotation not in [-90, None, 90, 180]:
            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
--- a/lerobot/common/cameras/utils.py
+++ b/lerobot/common/cameras/utils.py
@@ -1,7 +1,30 @@
 #!/usr/bin/env python
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import platform
 from pathlib import Path
 from typing import TypeAlias
 import numpy as np
 from PIL import Image
 from .camera import Camera
-from .configs import CameraConfig
+from .configs import CameraConfig, Cv2Rotation
 IndexOrPath: TypeAlias = int | Path
 def make_cameras_from_configs(camera_configs: dict[str, CameraConfig]) -> dict[str, Camera]:
@@ -23,13 +46,14 @@ def make_cameras_from_configs(camera_configs: dict[str, CameraConfig]) -> dict[s
    return cameras
-def get_cv2_rotation(rotation: int) -> int:
+def get_cv2_rotation(rotation: Cv2Rotation) -> int:
    import cv2
    return {
-        -90: cv2.ROTATE_90_COUNTERCLOCKWISE,
+        Cv2Rotation.ROTATE_270: cv2.ROTATE_90_COUNTERCLOCKWISE,
-        90: cv2.ROTATE_90_CLOCKWISE,
+        Cv2Rotation.ROTATE_90: cv2.ROTATE_90_CLOCKWISE,
-        180: cv2.ROTATE_180,
+        Cv2Rotation.ROTATE_180: cv2.ROTATE_180,
        Cv2Rotation.NO_ROTATION: 0,
    }.get(rotation)
@@ -41,3 +65,131 @@ def get_cv2_backend() -> int:
        "Windows": cv2.CAP_AVFOUNDATION,
        "Darwin": cv2.CAP_ANY,
    }.get(platform.system(), cv2.CAP_V4L2)
 def save_image(img_array: np.ndarray, camera_index: int, frame_index: int, images_dir: Path):
    img = Image.fromarray(img_array)
    path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
    path.parent.mkdir(parents=True, exist_ok=True)
    img.save(str(path), quality=100)
 # NOTE(Steven): This should be use with both cameras implementations
 # def save_images_from_cameras(
 #     images_dir: Path,
 #     camera_idx_or_paths: list[IndexOrPath] | None = None,
 #     fps: int | None = None,
 #     width: int | None = None,
 #     height: int | None = None,
 #     record_time_s: int = 2,
 # ):
 #     """
 #     Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
 #     associated to a given camera index.
 #     """
 #     if not camera_idx_or_paths:
 #         camera_idx_or_paths = OpenCVCamera.find_cameras()
 #         if len(camera_idx_or_paths) == 0:
 #             raise RuntimeError(
 #                 "Not a single camera was detected. Try re-plugging, or re-installing `opencv-python`, "
 #                 "or your camera driver, or make sure your camera is compatible with opencv."
 #             )
 #     print("Connecting cameras")
 #     cameras = []
 #     for idx_or_path in camera_idx_or_paths:
 #         config = OpenCVCameraConfig(index_or_path=idx_or_path, fps=fps, width=width, height=height)
 #         camera = OpenCVCamera(config)
 #         camera.connect()
 #         print(
 #             f"OpenCVCamera({camera.index_or_path}, fps={camera.fps}, width={camera.capture_width}, "
 #             f"height={camera.capture_height}, color_mode={camera.color_mode})"
 #         )
 #         cameras.append(camera)
 #     images_dir = Path(images_dir)
 #     if images_dir.exists():
 #         shutil.rmtree(
 #             images_dir,
 #         )
 #     images_dir.mkdir(parents=True, exist_ok=True)
 #     print(f"Saving images to {images_dir}")
 #     frame_index = 0
 #     start_time = time.perf_counter()
 #     with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
 #         while True:
 #             now = time.perf_counter()
 #             for camera in cameras:
 #                 # If we use async_read when fps is None, the loop will go full speed, and we will endup
 #                 # saving the same images from the cameras multiple times until the RAM/disk is full.
 #                 image = camera.read() if fps is None else camera.async_read()
 #                 executor.submit(
 #                     save_image,
 #                     image,
 #                     camera.camera_index,
 #                     frame_index,
 #                     images_dir,
 #                 )
 #             if fps is not None:
 #                 dt_s = time.perf_counter() - now
 #                 busy_wait(1 / fps - dt_s)
 #             print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
 #             if time.perf_counter() - start_time > record_time_s:
 #                 break
 #             frame_index += 1
 #     print(f"Images have been saved to {images_dir}")
 #     # NOTE(Steven): Cameras don't get disconnected
 # # NOTE(Steven): Update this to be valid for both cameras type
 # if __name__ == "__main__":
 #     parser = argparse.ArgumentParser(
 #         description="Save a few frames using `OpenCVCamera` for all cameras connected to the computer, or a selected subset."
 #     )
 #     parser.add_argument(
 #         "--camera-ids",
 #         type=int,
 #         nargs="*",
 #         default=None,
 #         help="List of camera indices used to instantiate the `OpenCVCamera`. If not provided, find and use all available camera indices.",
 #     )
 #     parser.add_argument(
 #         "--fps",
 #         type=int,
 #         default=None,
 #         help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
 #     )
 #     parser.add_argument(
 #         "--width",
 #         type=str,
 #         default=None,
 #         help="Set the width for all cameras. If not provided, use the default width of each camera.",
 #     )
 #     parser.add_argument(
 #         "--height",
 #         type=str,
 #         default=None,
 #         help="Set the height for all cameras. If not provided, use the default height of each camera.",
 #     )
 #     parser.add_argument(
 #         "--images-dir",
 #         type=Path,
 #         default="outputs/images_from_opencv_cameras",
 #         help="Set directory to save a few frames for each camera.",
 #     )
 #     parser.add_argument(
 #         "--record-time-s",
 #         type=float,
 #         default=4.0,
 #         help="Set the number of seconds used to record the frames. By default, 2 seconds.",
 #     )
 #     args = parser.parse_args()
 #     save_images_from_cameras(**vars(args))
--- a/lerobot/common/utils/control_utils.py
+++ b/lerobot/common/utils/control_utils.py
@@ -38,6 +38,7 @@ from lerobot.common.utils.robot_utils import busy_wait
 from lerobot.common.utils.utils import get_safe_torch_device, has_method
 # NOTE(Steven): Consider integrating this in camera class
 def log_control_info(robot: Robot, dt_s, episode_index=None, frame_index=None, fps=None):
    log_items = []
    if episode_index is not None:
--- a/lerobot/find_cameras.py
+++ b/lerobot/find_cameras.py
@@ -0,0 +1 @@
 # NOTE(Steven): Place here the code for save_img (valid for both imnplementations:: python -m lerobot.find_cameras)
--- a/tests/cameras/test_opencv.py
+++ b/tests/cameras/test_opencv.py
@@ -0,0 +1,13 @@
 # from lerobot.common.cameras.opencv import OpenCVCamera, OpenCVCameraConfig
 # class TestCameras(unittest.TestCase):
 # # NOTE(Steven): Independent test func
 # def test_connect():
 #     config = OpenCVCameraConfig(camera_id=0)
 #     camera = OpenCVCamera(config)
 #     camera.connect()
 #     assert camera.is_connected
 # # 1. Refactor -> opencv , write some test , think about realsese, iterate
		`@@ -0,0 +1 @@`
							`# NOTE(Steven): Place here the code for save_img (valid for both imnplementations:: python -m lerobot.find_cameras)`