refactor(cameras): opencv camera init

This commit is contained in:
Simon Alibert
2025-04-21 16:14:06 +02:00
committed by Steven Palma
parent 8a6412b0db
commit 0b5b438f50
2 changed files with 139 additions and 208 deletions

View File

@@ -18,14 +18,16 @@ This file contains utilities for recording frames from cameras. For more info lo
import argparse import argparse
import concurrent.futures import concurrent.futures
import logging
import math import math
import platform import platform
import shutil import shutil
import threading
import time import time
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Event, Thread
from typing import TypeAlias
import cv2
import numpy as np import numpy as np
from PIL import Image from PIL import Image
@@ -36,8 +38,11 @@ from lerobot.common.utils.robot_utils import (
from lerobot.common.utils.utils import capture_timestamp_utc from lerobot.common.utils.utils import capture_timestamp_utc
from ..camera import Camera from ..camera import Camera
from ..utils import get_cv2_backend, get_cv2_rotation
from .configuration_opencv import OpenCVCameraConfig from .configuration_opencv import OpenCVCameraConfig
IndexOrPath: TypeAlias = int | Path
# The maximum opencv device index depends on your operating system. For instance, # The maximum opencv device index depends on your operating system. For instance,
# if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case # if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
# on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23. # on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
@@ -45,63 +50,7 @@ from .configuration_opencv import OpenCVCameraConfig
# treat the same cameras as new devices. Thus we select a higher bound to search indices. # treat the same cameras as new devices. Thus we select a higher bound to search indices.
MAX_OPENCV_INDEX = 60 MAX_OPENCV_INDEX = 60
logger = logging.getLogger(__name__)
def find_cameras(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False) -> list[dict]:
cameras = []
if platform.system() == "Linux":
print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
possible_ports = [str(port) for port in Path("/dev").glob("video*")]
ports = _find_cameras(possible_ports, mock=mock)
for port in ports:
cameras.append(
{
"port": port,
"index": int(port.removeprefix("/dev/video")),
}
)
else:
print(
"Mac or Windows detected. Finding available camera indices through "
f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
)
possible_indices = range(max_index_search_range)
indices = _find_cameras(possible_indices, mock=mock)
for index in indices:
cameras.append(
{
"port": None,
"index": index,
}
)
return cameras
def _find_cameras(
possible_camera_ids: list[int | str], raise_when_empty=False, mock=False
) -> list[int | str]:
if mock:
import tests.cameras.mock_cv2 as cv2
else:
import cv2
camera_ids = []
for camera_idx in possible_camera_ids:
camera = cv2.VideoCapture(camera_idx)
is_open = camera.isOpened()
camera.release()
if is_open:
print(f"Camera found at index {camera_idx}")
camera_ids.append(camera_idx)
if raise_when_empty and len(camera_ids) == 0:
raise OSError(
"Not a single camera was detected. Try re-plugging, or re-installing `opencv2`, "
"or your camera driver, or make sure your camera is compatible with opencv2."
)
return camera_ids
def is_valid_unix_path(path: str) -> bool: def is_valid_unix_path(path: str) -> bool:
@@ -114,7 +63,7 @@ def get_camera_index_from_unix_port(port: Path) -> int:
return int(str(port.resolve()).removeprefix("/dev/video")) return int(str(port.resolve()).removeprefix("/dev/video"))
def save_image(img_array, camera_index, frame_index, images_dir): def save_image(img_array: np.ndarray, camera_index: int, frame_index: int, images_dir: Path):
img = Image.fromarray(img_array) img = Image.fromarray(img_array)
path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png" path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
@@ -123,29 +72,32 @@ def save_image(img_array, camera_index, frame_index, images_dir):
def save_images_from_cameras( def save_images_from_cameras(
images_dir: Path, images_dir: Path,
camera_ids: list | None = None, camera_idx_or_paths: list[IndexOrPath] | None = None,
fps=None, fps: int | None = None,
width=None, width: int | None = None,
height=None, height: int | None = None,
record_time_s=2, record_time_s: int = 2,
mock=False,
): ):
""" """
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
associated to a given camera index. associated to a given camera index.
""" """
if camera_ids is None or len(camera_ids) == 0: if not camera_idx_or_paths:
camera_infos = find_cameras(mock=mock) camera_idx_or_paths = OpenCVCamera.find_cameras()
camera_ids = [cam["index"] for cam in camera_infos] if len(camera_idx_or_paths) == 0:
raise RuntimeError(
"Not a single camera was detected. Try re-plugging, or re-installing `opencv-python`, "
"or your camera driver, or make sure your camera is compatible with opencv."
)
print("Connecting cameras") print("Connecting cameras")
cameras = [] cameras = []
for cam_idx in camera_ids: for idx_or_path in camera_idx_or_paths:
config = OpenCVCameraConfig(camera_index=cam_idx, fps=fps, width=width, height=height, mock=mock) config = OpenCVCameraConfig(index_or_path=idx_or_path, fps=fps, width=width, height=height)
camera = OpenCVCamera(config) camera = OpenCVCamera(config)
camera.connect() camera.connect()
print( print(
f"OpenCVCamera({camera.camera_index}, fps={camera.fps}, width={camera.capture_width}, " f"OpenCVCamera({camera.index_or_path}, fps={camera.fps}, width={camera.capture_width}, "
f"height={camera.capture_height}, color_mode={camera.color_mode})" f"height={camera.capture_height}, color_mode={camera.color_mode})"
) )
cameras.append(camera) cameras.append(camera)
@@ -212,7 +164,7 @@ class OpenCVCamera(Camera):
```python ```python
from lerobot.common.robot_devices.cameras.configs import OpenCVCameraConfig from lerobot.common.robot_devices.cameras.configs import OpenCVCameraConfig
config = OpenCVCameraConfig(camera_index=0) config = OpenCVCameraConfig(index_or_path=0)
camera = OpenCVCamera(config) camera = OpenCVCamera(config)
camera.connect() camera.connect()
color_image = camera.read() color_image = camera.read()
@@ -222,28 +174,16 @@ class OpenCVCamera(Camera):
Example of changing default fps, width, height and color_mode: Example of changing default fps, width, height and color_mode:
```python ```python
config = OpenCVCameraConfig(camera_index=0, fps=30, width=1280, height=720) config = OpenCVCameraConfig(index_or_path=0, fps=30, width=1280, height=720)
config = OpenCVCameraConfig(camera_index=0, fps=90, width=640, height=480) config = OpenCVCameraConfig(index_or_path=0, fps=90, width=640, height=480)
config = OpenCVCameraConfig(camera_index=0, fps=90, width=640, height=480, color_mode="bgr") config = OpenCVCameraConfig(index_or_path=0, fps=90, width=640, height=480, color_mode="bgr")
# Note: might error out open `camera.connect()` if these settings are not compatible with the camera # Note: might error out open `camera.connect()` if these settings are not compatible with the camera
``` ```
""" """
def __init__(self, config: OpenCVCameraConfig): def __init__(self, config: OpenCVCameraConfig):
self.config = config self.config = config
self.camera_index = config.camera_index self.index_or_path = config.index_or_path
self.port = None
# Linux uses ports for connecting to cameras
if platform.system() == "Linux":
if isinstance(self.camera_index, int):
self.port = Path(f"/dev/video{self.camera_index}")
elif isinstance(self.camera_index, str) and is_valid_unix_path(self.camera_index):
self.port = Path(self.camera_index)
# Retrieve the camera index from a potentially symlinked path
self.camera_index = get_camera_index_from_unix_port(self.port)
else:
raise ValueError(f"Please check the provided camera_index: {self.camera_index}")
# Store the raw (capture) resolution from the config. # Store the raw (capture) resolution from the config.
self.capture_width = config.width self.capture_width = config.width
@@ -260,113 +200,97 @@ class OpenCVCamera(Camera):
self.fps = config.fps self.fps = config.fps
self.channels = config.channels self.channels = config.channels
self.color_mode = config.color_mode self.color_mode = config.color_mode
self.mock = config.mock
self.camera = None self.camera: cv2.VideoCapture | None = None
self.is_connected = False self.thread: Thread | None = None
self.thread = None self.stop_event: Event | None = None
self.stop_event = None
self.color_image = None self.color_image = None
self.logs = {} self.logs = {}
if self.mock: self.rotation = get_cv2_rotation(config.rotation)
import tests.cameras.mock_cv2 as cv2 self.backend = get_cv2_backend()
else:
import cv2
self.rotation = None def __str__(self) -> str:
if config.rotation == -90: return f"{self.__class__.__name__}({self.index_or_path})"
self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
elif config.rotation == 90: @property
self.rotation = cv2.ROTATE_90_CLOCKWISE def is_connected(self) -> bool:
elif config.rotation == 180: return self.camera.isOpened() if isinstance(self.camera, cv2.VideoCapture) else False
self.rotation = cv2.ROTATE_180
def connect(self): def connect(self):
if self.is_connected: if self.is_connected:
raise DeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.") raise DeviceAlreadyConnectedError(f"{self} is already connected.")
if self.mock: # Use 1 thread to avoid blocking the main thread. Especially useful during data collection
import tests.cameras.mock_cv2 as cv2 # when other threads are used to save the images.
else: cv2.setNumThreads(1)
import cv2
# Use 1 thread to avoid blocking the main thread. Especially useful during data collection self.camera = cv2.VideoCapture(self.index_or_path, self.backend)
# when other threads are used to save the images.
cv2.setNumThreads(1)
backend = ( # If the camera doesn't work, display the camera indices corresponding to valid cameras.
cv2.CAP_V4L2 if not self.camera.isOpened():
if platform.system() == "Linux" # Release camera to make it accessible for `find_camera_indices`
else cv2.CAP_DSHOW self.camera.release()
if platform.system() == "Windows"
else cv2.CAP_AVFOUNDATION
if platform.system() == "Darwin"
else cv2.CAP_ANY
)
camera_idx = f"/dev/video{self.camera_index}" if platform.system() == "Linux" else self.camera_index
# First create a temporary camera trying to access `camera_index`,
# and verify it is a valid camera by calling `isOpened`.
tmp_camera = cv2.VideoCapture(camera_idx, backend)
is_camera_open = tmp_camera.isOpened()
# Release camera to make it accessible for `find_camera_indices`
tmp_camera.release()
del tmp_camera
# If the camera doesn't work, display the camera indices corresponding to
# valid cameras.
if not is_camera_open:
# Verify that the provided `camera_index` is valid before printing the traceback # Verify that the provided `camera_index` is valid before printing the traceback
cameras_info = find_cameras() cameras_info = self.find_cameras()
available_cam_ids = [cam["index"] for cam in cameras_info] available_cam_ids = [cam["index"] for cam in cameras_info]
if self.camera_index not in available_cam_ids: if self.index_or_path not in available_cam_ids:
raise ValueError( raise ValueError(
f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. " f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, "
"To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`." f"but {self.index_or_path} is provided instead. To find the camera index you should use, "
"run `python lerobot/common/robot_devices/cameras/opencv.py`."
) )
raise OSError(f"Can't access OpenCVCamera({camera_idx}).") raise ConnectionError(f"Can't access {self}.")
# Secondly, create the camera that will be used downstream.
# Note: For some unknown reason, calling `isOpened` blocks the camera which then
# needs to be re-created.
self.camera = cv2.VideoCapture(camera_idx, backend)
if self.fps is not None: if self.fps is not None:
self.camera.set(cv2.CAP_PROP_FPS, self.fps) self._set_fps(self.fps)
if self.capture_width is not None: if self.capture_width is not None:
self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, self.capture_width) self._set_capture_width(self.capture_width)
if self.capture_height is not None: if self.capture_height is not None:
self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, self.capture_height) self._set_capture_height(self.capture_height)
def _set_fps(self, fps: int) -> None:
self.camera.set(cv2.CAP_PROP_FPS, fps)
actual_fps = self.camera.get(cv2.CAP_PROP_FPS) actual_fps = self.camera.get(cv2.CAP_PROP_FPS)
actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
# Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30) # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3): if not math.isclose(fps, actual_fps, rel_tol=1e-3):
# Using `OSError` since it's a broad that encompasses issues related to device communication raise RuntimeError(f"Can't set {fps=} for {self}. Actual value is {actual_fps}.")
raise OSError(
f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
)
if self.capture_width is not None and not math.isclose(
self.capture_width, actual_width, rel_tol=1e-3
):
raise OSError(
f"Can't set {self.capture_width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
)
if self.capture_height is not None and not math.isclose(
self.capture_height, actual_height, rel_tol=1e-3
):
raise OSError(
f"Can't set {self.capture_height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
)
self.fps = round(actual_fps) def _set_capture_width(self, capture_width: int) -> None:
self.capture_width = round(actual_width) self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, capture_width)
self.capture_height = round(actual_height) actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
self.is_connected = True if not math.isclose(self.capture_width, actual_width, rel_tol=1e-3):
raise RuntimeError(f"Can't set {capture_width=} for {self}. Actual value is {actual_width}.")
def _set_capture_height(self, capture_height: int) -> None:
self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, capture_height)
actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
if not math.isclose(self.capture_height, actual_height, rel_tol=1e-3):
raise RuntimeError(f"Can't set {capture_height=} for {self}. Actual value is {actual_height}.")
@staticmethod
def find_cameras(max_index_search_range=MAX_OPENCV_INDEX) -> list[IndexOrPath]:
if platform.system() == "Linux":
print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
possible_idx_or_paths = [str(port) for port in Path("/dev").glob("video*")]
else:
print(
f"{platform.system()} system detected. Finding available camera indices through "
f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
)
possible_idx_or_paths = range(max_index_search_range)
found_idx_or_paths = []
for target in possible_idx_or_paths:
camera = cv2.VideoCapture(target)
is_open = camera.isOpened()
camera.release()
if is_open:
print(f"Camera found at {target}")
found_idx_or_paths.append(target)
return found_idx_or_paths
def read(self, temporary_color_mode: str | None = None) -> np.ndarray: def read(self, temporary_color_mode: str | None = None) -> np.ndarray:
"""Read a frame from the camera returned in the format (height, width, channels) """Read a frame from the camera returned in the format (height, width, channels)
@@ -376,17 +300,23 @@ class OpenCVCamera(Camera):
If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`. If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
""" """
if not self.is_connected: if not self.is_connected:
raise DeviceNotConnectedError( raise DeviceNotConnectedError(f"{self} is not connected.")
f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
start_time = time.perf_counter() start = time.perf_counter()
ret, color_image = self.camera.read() ret, color_image = self.camera.read()
if not ret: if not ret:
raise OSError(f"Can't capture color image from camera {self.camera_index}.") raise RuntimeError(f"Can't capture color image from {self}.")
self.color_image = self._postprocess_image(color_image, temporary_color_mode)
dt_ms = (time.perf_counter() - start) * 1e3
logger.debug(f"{self} read: {dt_ms:.1f}ms")
# log the utc time at which the image was received
self.logs["timestamp_utc"] = capture_timestamp_utc()
def _postprocess_image(self, image, temporary_color_mode: str | None = None):
requested_color_mode = self.color_mode if temporary_color_mode is None else temporary_color_mode requested_color_mode = self.color_mode if temporary_color_mode is None else temporary_color_mode
if requested_color_mode not in ["rgb", "bgr"]: if requested_color_mode not in ["rgb", "bgr"]:
@@ -398,33 +328,21 @@ class OpenCVCamera(Camera):
# However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks, # However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
# so we convert the image color from BGR to RGB. # so we convert the image color from BGR to RGB.
if requested_color_mode == "rgb": if requested_color_mode == "rgb":
if self.mock: color_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
import tests.cameras.mock_cv2 as cv2
else:
import cv2
color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
h, w, _ = color_image.shape h, w, _ = color_image.shape
if h != self.capture_height or w != self.capture_width: if h != self.capture_height or w != self.capture_width:
raise OSError( raise RuntimeError(
f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead." f"Can't capture color image with expected height and width ({self.height} x {self.width}). "
f"({h} x {w}) returned instead."
) )
if self.rotation is not None: if self.rotation is not None:
color_image = cv2.rotate(color_image, self.rotation) color_image = cv2.rotate(color_image, self.rotation)
# log the number of seconds it took to read the image
self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
# log the utc time at which the image was received
self.logs["timestamp_utc"] = capture_timestamp_utc()
self.color_image = color_image
return color_image return color_image
def read_loop(self): def _read_loop(self):
while not self.stop_event.is_set(): while not self.stop_event.is_set():
try: try:
self.color_image = self.read() self.color_image = self.read()
@@ -433,13 +351,11 @@ class OpenCVCamera(Camera):
def async_read(self): def async_read(self):
if not self.is_connected: if not self.is_connected:
raise DeviceNotConnectedError( raise DeviceNotConnectedError(f"{self} is not connected.")
f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
if self.thread is None: if self.thread is None:
self.stop_event = threading.Event() self.stop_event = Event()
self.thread = Thread(target=self.read_loop, args=()) self.thread = Thread(target=self._read_loop, args=())
self.thread.daemon = True self.thread.daemon = True
self.thread.start() self.thread.start()
@@ -455,9 +371,7 @@ class OpenCVCamera(Camera):
def disconnect(self): def disconnect(self):
if not self.is_connected: if not self.is_connected:
raise DeviceNotConnectedError( raise DeviceNotConnectedError(f"{self} is not connected.")
f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
if self.thread is not None: if self.thread is not None:
self.stop_event.set() self.stop_event.set()
@@ -467,11 +381,6 @@ class OpenCVCamera(Camera):
self.camera.release() self.camera.release()
self.camera = None self.camera = None
self.is_connected = False
def __del__(self):
if getattr(self, "is_connected", False):
self.disconnect()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,3 +1,5 @@
import platform
from .camera import Camera from .camera import Camera
from .configs import CameraConfig from .configs import CameraConfig
@@ -19,3 +21,23 @@ def make_cameras_from_configs(camera_configs: dict[str, CameraConfig]) -> dict[s
raise ValueError(f"The motor type '{cfg.type}' is not valid.") raise ValueError(f"The motor type '{cfg.type}' is not valid.")
return cameras return cameras
def get_cv2_rotation(rotation: int) -> int:
import cv2
return {
-90: cv2.ROTATE_90_COUNTERCLOCKWISE,
90: cv2.ROTATE_90_CLOCKWISE,
180: cv2.ROTATE_180,
}.get(rotation)
def get_cv2_backend() -> int:
import cv2
return {
"Linux": cv2.CAP_DSHOW,
"Windows": cv2.CAP_AVFOUNDATION,
"Darwin": cv2.CAP_ANY,
}.get(platform.system(), cv2.CAP_V4L2)