refactor(cameras): opencv camera init

2025-04-21 16:14:06 +02:00
parent 8a6412b0db
commit 0b5b438f50
2 changed files with 139 additions and 208 deletions
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -18,14 +18,16 @@ This file contains utilities for recording frames from cameras. For more info lo
 import argparse
 import concurrent.futures
 import logging
 import math
 import platform
 import shutil
 import threading
 import time
 from pathlib import Path
-from threading import Thread
+from threading import Event, Thread
 from typing import TypeAlias
 import cv2
 import numpy as np
 from PIL import Image
@@ -36,8 +38,11 @@ from lerobot.common.utils.robot_utils import (
 from lerobot.common.utils.utils import capture_timestamp_utc
 from ..camera import Camera
 from ..utils import get_cv2_backend, get_cv2_rotation
 from .configuration_opencv import OpenCVCameraConfig
 IndexOrPath: TypeAlias = int | Path
 # The maximum opencv device index depends on your operating system. For instance,
 # if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
 # on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
@@ -45,63 +50,7 @@ from .configuration_opencv import OpenCVCameraConfig
 # treat the same cameras as new devices. Thus we select a higher bound to search indices.
 MAX_OPENCV_INDEX = 60
-
+logger = logging.getLogger(__name__)
 def find_cameras(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False) -> list[dict]:
    cameras = []
    if platform.system() == "Linux":
        print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
        possible_ports = [str(port) for port in Path("/dev").glob("video*")]
        ports = _find_cameras(possible_ports, mock=mock)
        for port in ports:
            cameras.append(
                {
                    "port": port,
                    "index": int(port.removeprefix("/dev/video")),
                }
            )
    else:
        print(
            "Mac or Windows detected. Finding available camera indices through "
            f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
        )
        possible_indices = range(max_index_search_range)
        indices = _find_cameras(possible_indices, mock=mock)
        for index in indices:
            cameras.append(
                {
                    "port": None,
                    "index": index,
                }
            )
    return cameras
 def _find_cameras(
    possible_camera_ids: list[int | str], raise_when_empty=False, mock=False
 ) -> list[int | str]:
    if mock:
        import tests.cameras.mock_cv2 as cv2
    else:
        import cv2
    camera_ids = []
    for camera_idx in possible_camera_ids:
        camera = cv2.VideoCapture(camera_idx)
        is_open = camera.isOpened()
        camera.release()
        if is_open:
            print(f"Camera found at index {camera_idx}")
            camera_ids.append(camera_idx)
    if raise_when_empty and len(camera_ids) == 0:
        raise OSError(
            "Not a single camera was detected. Try re-plugging, or re-installing `opencv2`, "
            "or your camera driver, or make sure your camera is compatible with opencv2."
        )
    return camera_ids
 def is_valid_unix_path(path: str) -> bool:
@@ -114,7 +63,7 @@ def get_camera_index_from_unix_port(port: Path) -> int:
    return int(str(port.resolve()).removeprefix("/dev/video"))
-def save_image(img_array, camera_index, frame_index, images_dir):
+def save_image(img_array: np.ndarray, camera_index: int, frame_index: int, images_dir: Path):
    img = Image.fromarray(img_array)
    path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
    path.parent.mkdir(parents=True, exist_ok=True)
@@ -123,29 +72,32 @@ def save_image(img_array, camera_index, frame_index, images_dir):
 def save_images_from_cameras(
    images_dir: Path,
-    camera_ids: list | None = None,
+    camera_idx_or_paths: list[IndexOrPath] | None = None,
-    fps=None,
+    fps: int | None = None,
-    width=None,
+    width: int | None = None,
-    height=None,
+    height: int | None = None,
-    record_time_s=2,
+    record_time_s: int = 2,
    mock=False,
 ):
    """
    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
    associated to a given camera index.
    """
-    if camera_ids is None or len(camera_ids) == 0:
+    if not camera_idx_or_paths:
-        camera_infos = find_cameras(mock=mock)
+        camera_idx_or_paths = OpenCVCamera.find_cameras()
-        camera_ids = [cam["index"] for cam in camera_infos]
+        if len(camera_idx_or_paths) == 0:
            raise RuntimeError(
                "Not a single camera was detected. Try re-plugging, or re-installing `opencv-python`, "
                "or your camera driver, or make sure your camera is compatible with opencv."
            )
    print("Connecting cameras")
    cameras = []
-    for cam_idx in camera_ids:
+    for idx_or_path in camera_idx_or_paths:
-        config = OpenCVCameraConfig(camera_index=cam_idx, fps=fps, width=width, height=height, mock=mock)
+        config = OpenCVCameraConfig(index_or_path=idx_or_path, fps=fps, width=width, height=height)
        camera = OpenCVCamera(config)
        camera.connect()
        print(
-            f"OpenCVCamera({camera.camera_index}, fps={camera.fps}, width={camera.capture_width}, "
+            f"OpenCVCamera({camera.index_or_path}, fps={camera.fps}, width={camera.capture_width}, "
            f"height={camera.capture_height}, color_mode={camera.color_mode})"
        )
        cameras.append(camera)
@@ -212,7 +164,7 @@ class OpenCVCamera(Camera):
    ```python
    from lerobot.common.robot_devices.cameras.configs import OpenCVCameraConfig
-    config = OpenCVCameraConfig(camera_index=0)
+    config = OpenCVCameraConfig(index_or_path=0)
    camera = OpenCVCamera(config)
    camera.connect()
    color_image = camera.read()
@@ -222,28 +174,16 @@ class OpenCVCamera(Camera):
    Example of changing default fps, width, height and color_mode:
    ```python
-    config = OpenCVCameraConfig(camera_index=0, fps=30, width=1280, height=720)
+    config = OpenCVCameraConfig(index_or_path=0, fps=30, width=1280, height=720)
-    config = OpenCVCameraConfig(camera_index=0, fps=90, width=640, height=480)
+    config = OpenCVCameraConfig(index_or_path=0, fps=90, width=640, height=480)
-    config = OpenCVCameraConfig(camera_index=0, fps=90, width=640, height=480, color_mode="bgr")
+    config = OpenCVCameraConfig(index_or_path=0, fps=90, width=640, height=480, color_mode="bgr")
    # Note: might error out open `camera.connect()` if these settings are not compatible with the camera
    ```
    """
    def __init__(self, config: OpenCVCameraConfig):
        self.config = config
-        self.camera_index = config.camera_index
+        self.index_or_path = config.index_or_path
        self.port = None
        # Linux uses ports for connecting to cameras
        if platform.system() == "Linux":
            if isinstance(self.camera_index, int):
                self.port = Path(f"/dev/video{self.camera_index}")
            elif isinstance(self.camera_index, str) and is_valid_unix_path(self.camera_index):
                self.port = Path(self.camera_index)
                # Retrieve the camera index from a potentially symlinked path
                self.camera_index = get_camera_index_from_unix_port(self.port)
            else:
                raise ValueError(f"Please check the provided camera_index: {self.camera_index}")
        # Store the raw (capture) resolution from the config.
        self.capture_width = config.width
@@ -260,113 +200,97 @@ class OpenCVCamera(Camera):
        self.fps = config.fps
        self.channels = config.channels
        self.color_mode = config.color_mode
        self.mock = config.mock
-        self.camera = None
+        self.camera: cv2.VideoCapture | None = None
-        self.is_connected = False
+        self.thread: Thread | None = None
-        self.thread = None
+        self.stop_event: Event | None = None
        self.stop_event = None
        self.color_image = None
        self.logs = {}
-        if self.mock:
+        self.rotation = get_cv2_rotation(config.rotation)
-            import tests.cameras.mock_cv2 as cv2
+        self.backend = get_cv2_backend()
        else:
            import cv2
-        self.rotation = None
+    def __str__(self) -> str:
-        if config.rotation == -90:
+        return f"{self.__class__.__name__}({self.index_or_path})"
-            self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
+
-        elif config.rotation == 90:
+    @property
-            self.rotation = cv2.ROTATE_90_CLOCKWISE
+    def is_connected(self) -> bool:
-        elif config.rotation == 180:
+        return self.camera.isOpened() if isinstance(self.camera, cv2.VideoCapture) else False
            self.rotation = cv2.ROTATE_180
    def connect(self):
        if self.is_connected:
-            raise DeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")
+            raise DeviceAlreadyConnectedError(f"{self} is already connected.")
-        if self.mock:
+        # Use 1 thread to avoid blocking the main thread. Especially useful during data collection
-            import tests.cameras.mock_cv2 as cv2
+        # when other threads are used to save the images.
-        else:
+        cv2.setNumThreads(1)
            import cv2
-            # Use 1 thread to avoid blocking the main thread. Especially useful during data collection
+        self.camera = cv2.VideoCapture(self.index_or_path, self.backend)
            # when other threads are used to save the images.
            cv2.setNumThreads(1)
-        backend = (
+        # If the camera doesn't work, display the camera indices corresponding to valid cameras.
-            cv2.CAP_V4L2
+        if not self.camera.isOpened():
-            if platform.system() == "Linux"
+            # Release camera to make it accessible for `find_camera_indices`
-            else cv2.CAP_DSHOW
+            self.camera.release()
            if platform.system() == "Windows"
            else cv2.CAP_AVFOUNDATION
            if platform.system() == "Darwin"
            else cv2.CAP_ANY
        )
        camera_idx = f"/dev/video{self.camera_index}" if platform.system() == "Linux" else self.camera_index
        # First create a temporary camera trying to access `camera_index`,
        # and verify it is a valid camera by calling `isOpened`.
        tmp_camera = cv2.VideoCapture(camera_idx, backend)
        is_camera_open = tmp_camera.isOpened()
        # Release camera to make it accessible for `find_camera_indices`
        tmp_camera.release()
        del tmp_camera
        # If the camera doesn't work, display the camera indices corresponding to
        # valid cameras.
        if not is_camera_open:
            # Verify that the provided `camera_index` is valid before printing the traceback
-            cameras_info = find_cameras()
+            cameras_info = self.find_cameras()
            available_cam_ids = [cam["index"] for cam in cameras_info]
-            if self.camera_index not in available_cam_ids:
+            if self.index_or_path not in available_cam_ids:
                raise ValueError(
-                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
+                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, "
-                    "To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`."
+                    f"but {self.index_or_path} is provided instead. To find the camera index you should use, "
                    "run `python lerobot/common/robot_devices/cameras/opencv.py`."
                )
-            raise OSError(f"Can't access OpenCVCamera({camera_idx}).")
+            raise ConnectionError(f"Can't access {self}.")
        # Secondly, create the camera that will be used downstream.
        # Note: For some unknown reason, calling `isOpened` blocks the camera which then
        # needs to be re-created.
        self.camera = cv2.VideoCapture(camera_idx, backend)
        if self.fps is not None:
-            self.camera.set(cv2.CAP_PROP_FPS, self.fps)
+            self._set_fps(self.fps)
        if self.capture_width is not None:
-            self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, self.capture_width)
+            self._set_capture_width(self.capture_width)
        if self.capture_height is not None:
-            self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, self.capture_height)
+            self._set_capture_height(self.capture_height)
    def _set_fps(self, fps: int) -> None:
        self.camera.set(cv2.CAP_PROP_FPS, fps)
        actual_fps = self.camera.get(cv2.CAP_PROP_FPS)
        actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
        actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
        # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
-        if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
+        if not math.isclose(fps, actual_fps, rel_tol=1e-3):
-            # Using `OSError` since it's a broad that encompasses issues related to device communication
+            raise RuntimeError(f"Can't set {fps=} for {self}. Actual value is {actual_fps}.")
            raise OSError(
                f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
            )
        if self.capture_width is not None and not math.isclose(
            self.capture_width, actual_width, rel_tol=1e-3
        ):
            raise OSError(
                f"Can't set {self.capture_width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
            )
        if self.capture_height is not None and not math.isclose(
            self.capture_height, actual_height, rel_tol=1e-3
        ):
            raise OSError(
                f"Can't set {self.capture_height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
            )
-        self.fps = round(actual_fps)
+    def _set_capture_width(self, capture_width: int) -> None:
-        self.capture_width = round(actual_width)
+        self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, capture_width)
-        self.capture_height = round(actual_height)
+        actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
-        self.is_connected = True
+        if not math.isclose(self.capture_width, actual_width, rel_tol=1e-3):
            raise RuntimeError(f"Can't set {capture_width=} for {self}. Actual value is {actual_width}.")
    def _set_capture_height(self, capture_height: int) -> None:
        self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, capture_height)
        actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
        if not math.isclose(self.capture_height, actual_height, rel_tol=1e-3):
            raise RuntimeError(f"Can't set {capture_height=} for {self}. Actual value is {actual_height}.")
    @staticmethod
    def find_cameras(max_index_search_range=MAX_OPENCV_INDEX) -> list[IndexOrPath]:
        if platform.system() == "Linux":
            print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
            possible_idx_or_paths = [str(port) for port in Path("/dev").glob("video*")]
        else:
            print(
                f"{platform.system()} system detected. Finding available camera indices through "
                f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
            )
            possible_idx_or_paths = range(max_index_search_range)
        found_idx_or_paths = []
        for target in possible_idx_or_paths:
            camera = cv2.VideoCapture(target)
            is_open = camera.isOpened()
            camera.release()
            if is_open:
                print(f"Camera found at {target}")
                found_idx_or_paths.append(target)
        return found_idx_or_paths
    def read(self, temporary_color_mode: str | None = None) -> np.ndarray:
        """Read a frame from the camera returned in the format (height, width, channels)
@@ -376,17 +300,23 @@ class OpenCVCamera(Camera):
        If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
        """
        if not self.is_connected:
-            raise DeviceNotConnectedError(
+            raise DeviceNotConnectedError(f"{self} is not connected.")
                f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
            )
-        start_time = time.perf_counter()
+        start = time.perf_counter()
        ret, color_image = self.camera.read()
        if not ret:
-            raise OSError(f"Can't capture color image from camera {self.camera_index}.")
+            raise RuntimeError(f"Can't capture color image from {self}.")
        self.color_image = self._postprocess_image(color_image, temporary_color_mode)
        dt_ms = (time.perf_counter() - start) * 1e3
        logger.debug(f"{self} read: {dt_ms:.1f}ms")
        # log the utc time at which the image was received
        self.logs["timestamp_utc"] = capture_timestamp_utc()
    def _postprocess_image(self, image, temporary_color_mode: str | None = None):
        requested_color_mode = self.color_mode if temporary_color_mode is None else temporary_color_mode
        if requested_color_mode not in ["rgb", "bgr"]:
@@ -398,33 +328,21 @@ class OpenCVCamera(Camera):
        # However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
        # so we convert the image color from BGR to RGB.
        if requested_color_mode == "rgb":
-            if self.mock:
+            color_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                import tests.cameras.mock_cv2 as cv2
            else:
                import cv2
            color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
        h, w, _ = color_image.shape
        if h != self.capture_height or w != self.capture_width:
-            raise OSError(
+            raise RuntimeError(
-                f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
+                f"Can't capture color image with expected height and width ({self.height} x {self.width}). "
                f"({h} x {w}) returned instead."
            )
        if self.rotation is not None:
            color_image = cv2.rotate(color_image, self.rotation)
        # log the number of seconds it took to read the image
        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
        # log the utc time at which the image was received
        self.logs["timestamp_utc"] = capture_timestamp_utc()
        self.color_image = color_image
        return color_image
-    def read_loop(self):
+    def _read_loop(self):
        while not self.stop_event.is_set():
            try:
                self.color_image = self.read()
@@ -433,13 +351,11 @@ class OpenCVCamera(Camera):
    def async_read(self):
        if not self.is_connected:
-            raise DeviceNotConnectedError(
+            raise DeviceNotConnectedError(f"{self} is not connected.")
                f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
            )
        if self.thread is None:
-            self.stop_event = threading.Event()
+            self.stop_event = Event()
-            self.thread = Thread(target=self.read_loop, args=())
+            self.thread = Thread(target=self._read_loop, args=())
            self.thread.daemon = True
            self.thread.start()
@@ -455,9 +371,7 @@ class OpenCVCamera(Camera):
    def disconnect(self):
        if not self.is_connected:
-            raise DeviceNotConnectedError(
+            raise DeviceNotConnectedError(f"{self} is not connected.")
                f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
            )
        if self.thread is not None:
            self.stop_event.set()
@@ -467,11 +381,6 @@ class OpenCVCamera(Camera):
        self.camera.release()
        self.camera = None
        self.is_connected = False
    def __del__(self):
        if getattr(self, "is_connected", False):
            self.disconnect()
 if __name__ == "__main__":
--- a/lerobot/common/cameras/utils.py
+++ b/lerobot/common/cameras/utils.py
@@ -1,3 +1,5 @@
 import platform
 from .camera import Camera
 from .configs import CameraConfig
@@ -19,3 +21,23 @@ def make_cameras_from_configs(camera_configs: dict[str, CameraConfig]) -> dict[s
            raise ValueError(f"The motor type '{cfg.type}' is not valid.")
    return cameras
 def get_cv2_rotation(rotation: int) -> int:
    import cv2
    return {
        -90: cv2.ROTATE_90_COUNTERCLOCKWISE,
        90: cv2.ROTATE_90_CLOCKWISE,
        180: cv2.ROTATE_180,
    }.get(rotation)
 def get_cv2_backend() -> int:
    import cv2
    return {
        "Linux": cv2.CAP_DSHOW,
        "Windows": cv2.CAP_AVFOUNDATION,
        "Darwin": cv2.CAP_ANY,
    }.get(platform.system(), cv2.CAP_V4L2)