forked from tangger/lerobot
529 lines
22 KiB
Python
529 lines
22 KiB
Python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Provides the OpenCVCamera class for capturing frames from cameras using OpenCV.
|
|
"""
|
|
|
|
import contextlib
|
|
import logging
|
|
import math
|
|
import platform
|
|
import queue
|
|
import time
|
|
from pathlib import Path
|
|
from threading import Event, Thread
|
|
from typing import Any, Dict, List
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
|
|
|
|
from ..camera import Camera
|
|
from ..utils import get_cv2_backend, get_cv2_rotation
|
|
from .configuration_opencv import ColorMode, OpenCVCameraConfig
|
|
|
|
# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
|
|
# if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
|
|
# on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
|
|
# When you change the USB port or reboot the computer, the operating system might
|
|
# treat the same cameras as new devices. Thus we select a higher bound to search indices.
|
|
MAX_OPENCV_INDEX = 60
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class OpenCVCamera(Camera):
|
|
"""
|
|
Manages camera interactions using OpenCV for efficient frame recording.
|
|
|
|
This class provides a high-level interface to connect to, configure, and read
|
|
frames from cameras compatible with OpenCV's VideoCapture. It supports both
|
|
synchronous and asynchronous frame reading.
|
|
|
|
An OpenCVCamera instance requires a camera index (e.g., 0) or a device path
|
|
(e.g., '/dev/video0' on Linux). Camera indices can be unstable across reboots
|
|
or port changes, especially on Linux. Use the provided utility script to find
|
|
available camera indices or paths:
|
|
```bash
|
|
python -m lerobot.find_cameras
|
|
```
|
|
|
|
The camera's default settings (FPS, resolution, color mode) are used unless
|
|
overridden in the configuration.
|
|
|
|
Example:
|
|
```python
|
|
from lerobot.common.cameras.opencv import OpenCVCamera
|
|
from lerobot.common.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode
|
|
|
|
# Basic usage with camera index 0
|
|
config = OpenCVCameraConfig(index_or_path=0)
|
|
camera = OpenCVCamera(config)
|
|
try:
|
|
camera.connect()
|
|
print(f"Connected to {camera}")
|
|
color_image = camera.read() # Synchronous read
|
|
print(f"Read frame shape: {color_image.shape}")
|
|
async_image = camera.async_read() # Asynchronous read
|
|
print(f"Async read frame shape: {async_image.shape}")
|
|
except Exception as e:
|
|
print(f"An error occurred: {e}")
|
|
finally:
|
|
camera.disconnect()
|
|
print(f"Disconnected from {camera}")
|
|
|
|
# Example with custom settings
|
|
custom_config = OpenCVCameraConfig(
|
|
index_or_path='/dev/video0', # Or use an index
|
|
fps=30,
|
|
width=1280,
|
|
height=720,
|
|
color_mode=ColorMode.RGB,
|
|
rotation=90
|
|
)
|
|
custom_camera = OpenCVCamera(custom_config)
|
|
# ... connect, read, disconnect ...
|
|
```
|
|
"""
|
|
|
|
def __init__(self, config: OpenCVCameraConfig):
|
|
"""
|
|
Initializes the OpenCVCamera instance.
|
|
|
|
Args:
|
|
config: The configuration settings for the camera.
|
|
"""
|
|
super().__init__(config)
|
|
|
|
self.config = config
|
|
self.index_or_path = config.index_or_path
|
|
|
|
self.fps = config.fps
|
|
self.color_mode = config.color_mode
|
|
|
|
self.videocapture: cv2.VideoCapture | None = None
|
|
|
|
self.thread: Thread | None = None
|
|
self.stop_event: Event | None = None
|
|
self.frame_queue: queue.Queue = queue.Queue(maxsize=1)
|
|
|
|
self.rotation: int | None = get_cv2_rotation(config.rotation)
|
|
self.backend: int = get_cv2_backend() # NOTE(Steven): If we specify backend the opencv open fails
|
|
|
|
if self.height and self.width:
|
|
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
|
|
self.capture_width, self.capture_height = self.height, self.width
|
|
else:
|
|
self.capture_width, self.capture_height = self.width, self.height
|
|
|
|
def __str__(self) -> str:
|
|
return f"{self.__class__.__name__}({self.index_or_path})"
|
|
|
|
@property
|
|
def is_connected(self) -> bool:
|
|
"""Checks if the camera is currently connected and opened."""
|
|
return isinstance(self.videocapture, cv2.VideoCapture) and self.videocapture.isOpened()
|
|
|
|
def connect(self, warmup: bool = True):
|
|
"""
|
|
Connects to the OpenCV camera specified in the configuration.
|
|
|
|
Initializes the OpenCV VideoCapture object, sets desired camera properties
|
|
(FPS, width, height), and performs initial checks.
|
|
|
|
Raises:
|
|
DeviceAlreadyConnectedError: If the camera is already connected.
|
|
ValueError: If the specified camera index/path is not found or accessible.
|
|
ConnectionError: If the camera is found but fails to open.
|
|
RuntimeError: If the camera opens but fails to apply requested FPS/resolution settings.
|
|
"""
|
|
if self.is_connected:
|
|
raise DeviceAlreadyConnectedError(f"{self} is already connected.")
|
|
|
|
# Use 1 thread for OpenCV operations to avoid potential conflicts or
|
|
# blocking in multi-threaded applications, especially during data collection.
|
|
cv2.setNumThreads(1)
|
|
|
|
self.videocapture = cv2.VideoCapture(self.index_or_path)
|
|
|
|
if not self.videocapture.isOpened():
|
|
self.videocapture.release()
|
|
self.videocapture = None
|
|
raise ConnectionError(
|
|
f"Failed to open OpenCV camera {self.index_or_path}."
|
|
f"Run 'python -m find_cameras Run 'python -m find_cameras' for details about the available cameras in your system."
|
|
)
|
|
|
|
self._configure_capture_settings()
|
|
|
|
if warmup:
|
|
logger.debug(f"Reading a warm-up frame for {self.index_or_path}...")
|
|
self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs
|
|
|
|
logger.debug(f"Camera {self.index_or_path} connected and configured successfully.")
|
|
|
|
def _configure_capture_settings(self) -> None:
|
|
"""
|
|
Applies the specified FPS, width, and height settings to the connected camera.
|
|
|
|
This method attempts to set the camera properties via OpenCV. It checks if
|
|
the camera successfully applied the settings and raises an error if not.
|
|
|
|
Args:
|
|
fps: The desired frames per second. If None, the setting is skipped.
|
|
width: The desired capture width. If None, the setting is skipped.
|
|
height: The desired capture height. If None, the setting is skipped.
|
|
|
|
Raises:
|
|
RuntimeError: If the camera fails to set any of the specified properties
|
|
to the requested value.
|
|
DeviceNotConnectedError: If the camera is not connected when attempting
|
|
to configure settings.
|
|
"""
|
|
if not self.is_connected:
|
|
raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")
|
|
|
|
if self.fps is None:
|
|
self.fps = self.videocapture.get(cv2.CAP_PROP_FPS)
|
|
logger.info(f"FPS set to camera default: {self.fps}.")
|
|
else:
|
|
self._validate_fps()
|
|
|
|
default_width = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)))
|
|
default_height = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
|
|
|
if self.width is None or self.height is None:
|
|
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
|
|
self.width, self.height = default_height, default_width
|
|
self.capture_width, self.capture_height = default_width, default_height
|
|
else:
|
|
self.width, self.height = default_width, default_height
|
|
self.capture_width, self.capture_height = default_width, default_height
|
|
logger.info(f"Capture width set to camera default: {self.width}.")
|
|
logger.info(f"Capture height set to camera default: {self.height}.")
|
|
else:
|
|
self._validate_width_and_height()
|
|
|
|
def _validate_fps(self) -> None:
|
|
"""Validates and sets the camera's frames per second (FPS)."""
|
|
|
|
success = self.videocapture.set(cv2.CAP_PROP_FPS, float(self.fps))
|
|
actual_fps = self.videocapture.get(cv2.CAP_PROP_FPS)
|
|
# Use math.isclose for robust float comparison
|
|
if not success or not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
|
|
logger.warning(
|
|
f"Requested FPS {self.fps} for {self}, but camera reported {actual_fps} (set success: {success}). "
|
|
"This might be due to camera limitations."
|
|
)
|
|
raise RuntimeError(
|
|
f"Failed to set requested FPS {self.fps} for {self}. Actual value reported: {actual_fps}."
|
|
)
|
|
logger.debug(f"FPS set to {actual_fps} for {self}.")
|
|
|
|
def _validate_width_and_height(self) -> None:
|
|
"""Validates and sets the camera's frame capture width and height."""
|
|
|
|
success = self.videocapture.set(cv2.CAP_PROP_FRAME_WIDTH, float(self.capture_width))
|
|
actual_width = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)))
|
|
if not success or self.capture_width != actual_width:
|
|
logger.warning(
|
|
f"Requested capture width {self.capture_width} for {self}, but camera reported {actual_width} (set success: {success})."
|
|
)
|
|
raise RuntimeError(
|
|
f"Failed to set requested capture width {self.capture_width} for {self}. Actual value: {actual_width}."
|
|
)
|
|
logger.debug(f"Capture width set to {actual_width} for {self}.")
|
|
|
|
success = self.videocapture.set(cv2.CAP_PROP_FRAME_HEIGHT, float(self.capture_height))
|
|
actual_height = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
|
if not success or self.capture_height != actual_height:
|
|
logger.warning(
|
|
f"Requested capture height {self.capture_height} for {self}, but camera reported {actual_height} (set success: {success})."
|
|
)
|
|
raise RuntimeError(
|
|
f"Failed to set requested capture height {self.capture_height} for {self}. Actual value: {actual_height}."
|
|
)
|
|
logger.debug(f"Capture height set to {actual_height} for {self}.")
|
|
|
|
@staticmethod
|
|
def find_cameras(max_index_search_range=MAX_OPENCV_INDEX) -> List[Dict[str, Any]]:
|
|
"""
|
|
Detects available OpenCV cameras connected to the system.
|
|
|
|
On Linux, it scans '/dev/video*' paths. On other systems (like macOS, Windows),
|
|
it checks indices from 0 up to `max_index_search_range`.
|
|
|
|
Args:
|
|
max_index_search_range (int): The maximum index to check on non-Linux systems.
|
|
|
|
Returns:
|
|
List[Dict[str, Any]]: A list of dictionaries,
|
|
where each dictionary contains 'type', 'id' (port index or path),
|
|
and the default profile properties (width, height, fps, format).
|
|
"""
|
|
found_cameras_info = []
|
|
|
|
if platform.system() == "Linux":
|
|
logger.info("Linux detected. Scanning '/dev/video*' device paths...")
|
|
possible_paths = sorted(Path("/dev").glob("video*"), key=lambda p: p.name)
|
|
targets_to_scan = [str(p) for p in possible_paths]
|
|
logger.debug(f"Found potential paths: {targets_to_scan}")
|
|
else:
|
|
logger.info(
|
|
f"{platform.system()} system detected. Scanning indices from 0 to {max_index_search_range}..."
|
|
)
|
|
targets_to_scan = list(range(max_index_search_range))
|
|
|
|
for target in targets_to_scan:
|
|
camera = cv2.VideoCapture(target)
|
|
if camera.isOpened():
|
|
default_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
default_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
default_fps = camera.get(cv2.CAP_PROP_FPS)
|
|
default_format = camera.get(cv2.CAP_PROP_FORMAT)
|
|
camera_info = {
|
|
"name": f"OpenCV Camera @ {target}",
|
|
"type": "OpenCV",
|
|
"id": target,
|
|
"backend_api": camera.getBackendName(),
|
|
"default_stream_profile": {
|
|
"format": default_format,
|
|
"width": default_width,
|
|
"height": default_height,
|
|
"fps": default_fps,
|
|
},
|
|
}
|
|
|
|
found_cameras_info.append(camera_info)
|
|
logger.debug(f"Found OpenCV camera:: {camera_info}")
|
|
camera.release()
|
|
|
|
if not found_cameras_info:
|
|
logger.warning("No OpenCV devices detected.")
|
|
|
|
logger.info(f"Detected OpenCV cameras: {[cam['id'] for cam in found_cameras_info]}")
|
|
return found_cameras_info
|
|
|
|
def read(self, color_mode: ColorMode | None = None) -> np.ndarray:
|
|
"""
|
|
Reads a single frame synchronously from the camera.
|
|
|
|
This is a blocking call. It waits for the next available frame from the
|
|
camera hardware via OpenCV.
|
|
|
|
Args:
|
|
color_mode (Optional[ColorMode]): If specified, overrides the default
|
|
color mode (`self.color_mode`) for this read operation (e.g.,
|
|
request RGB even if default is BGR).
|
|
|
|
Returns:
|
|
np.ndarray: The captured frame as a NumPy array in the format
|
|
(height, width, channels), using the specified or default
|
|
color mode and applying any configured rotation.
|
|
|
|
Raises:
|
|
DeviceNotConnectedError: If the camera is not connected.
|
|
RuntimeError: If reading the frame from the camera fails or if the
|
|
received frame dimensions don't match expectations before rotation.
|
|
ValueError: If an invalid `color_mode` is requested.
|
|
"""
|
|
if not self.is_connected:
|
|
raise DeviceNotConnectedError(f"{self} is not connected.")
|
|
|
|
start_time = time.perf_counter()
|
|
|
|
# NOTE(Steven): Are we okay with this blocking an undefined amount of time?
|
|
ret, frame = self.videocapture.read()
|
|
|
|
if not ret or frame is None:
|
|
raise RuntimeError(
|
|
f"Failed to capture frame from {self}. '.read()' returned status={ret} and frame is None."
|
|
)
|
|
|
|
# Post-process the frame (color conversion, dimension check, rotation)
|
|
processed_frame = self._postprocess_image(frame, color_mode)
|
|
|
|
read_duration_ms = (time.perf_counter() - start_time) * 1e3
|
|
logger.debug(f"{self} synchronous read took: {read_duration_ms:.1f}ms")
|
|
|
|
return processed_frame
|
|
|
|
def _postprocess_image(self, image: np.ndarray, color_mode: ColorMode | None = None) -> np.ndarray:
|
|
"""
|
|
Applies color conversion, dimension validation, and rotation to a raw frame.
|
|
|
|
Args:
|
|
image (np.ndarray): The raw image frame (expected BGR format from OpenCV).
|
|
color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
|
|
uses the instance's default `self.color_mode`.
|
|
|
|
Returns:
|
|
np.ndarray: The processed image frame.
|
|
|
|
Raises:
|
|
ValueError: If the requested `color_mode` is invalid.
|
|
RuntimeError: If the raw frame dimensions do not match the configured
|
|
`width` and `height`.
|
|
"""
|
|
requested_color_mode = self.color_mode if color_mode is None else color_mode
|
|
|
|
if requested_color_mode not in (ColorMode.RGB, ColorMode.BGR):
|
|
raise ValueError(
|
|
f"Invalid requested color mode '{requested_color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
|
|
)
|
|
|
|
h, w, c = image.shape
|
|
|
|
if h != self.capture_height or w != self.capture_width:
|
|
raise RuntimeError(
|
|
f"Captured frame dimensions ({h}x{w}) do not match configured capture dimensions ({self.capture_height}x{self.capture_width}) for {self}."
|
|
)
|
|
|
|
processed_image = image
|
|
if requested_color_mode == ColorMode.RGB:
|
|
processed_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
logger.debug(f"Converted frame from BGR to RGB for {self}.")
|
|
|
|
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
|
|
processed_image = cv2.rotate(processed_image, self.rotation)
|
|
logger.debug(f"Rotated frame by {self.config.rotation} degrees for {self}.")
|
|
|
|
return processed_image
|
|
|
|
def _read_loop(self):
|
|
"""
|
|
Internal loop run by the background thread for asynchronous reading.
|
|
|
|
Continuously reads frames from the camera using the synchronous `read()`
|
|
method and places the latest frame into the `frame_queue`. It overwrites
|
|
any previous frame in the queue.
|
|
"""
|
|
logger.debug(f"Starting read loop thread for {self}.")
|
|
while not self.stop_event.is_set():
|
|
try:
|
|
color_image = self.read()
|
|
|
|
with contextlib.suppress(queue.Empty):
|
|
_ = self.frame_queue.get_nowait()
|
|
self.frame_queue.put(color_image)
|
|
logger.debug(f"Frame placed in queue for {self}.")
|
|
|
|
except DeviceNotConnectedError:
|
|
logger.error(f"Read loop for {self} stopped: Camera disconnected.")
|
|
break
|
|
except Exception as e:
|
|
logger.warning(f"Error reading frame in background thread for {self}: {e}")
|
|
|
|
logger.debug(f"Stopping read loop thread for {self}.")
|
|
|
|
def _start_read_thread(self) -> None:
|
|
"""Starts or restarts the background read thread if it's not running."""
|
|
if self.thread is not None and self.thread.is_alive():
|
|
self.thread.join(timeout=0.1)
|
|
if self.stop_event is not None:
|
|
self.stop_event.set()
|
|
|
|
self.stop_event = Event()
|
|
self.thread = Thread(
|
|
target=self._read_loop, args=(), name=f"OpenCVCameraReadLoop-{self}-{self.index_or_path}"
|
|
)
|
|
self.thread.daemon = True
|
|
self.thread.start()
|
|
logger.debug(f"Read thread started for {self}.")
|
|
|
|
def _stop_read_thread(self) -> None:
|
|
"""Signals the background read thread to stop and waits for it to join."""
|
|
if self.stop_event is not None:
|
|
logger.debug(f"Signaling stop event for read thread of {self}.")
|
|
self.stop_event.set()
|
|
|
|
if self.thread is not None and self.thread.is_alive():
|
|
logger.debug(f"Waiting for read thread of {self} to join...")
|
|
self.thread.join(timeout=2.0)
|
|
if self.thread.is_alive():
|
|
logger.warning(f"Read thread for {self} did not terminate gracefully after 2 seconds.")
|
|
else:
|
|
logger.debug(f"Read thread for {self} joined successfully.")
|
|
|
|
self.thread = None
|
|
self.stop_event = None
|
|
|
|
def async_read(self, timeout_ms: float = 2000) -> np.ndarray:
|
|
"""
|
|
Reads the latest available frame asynchronously.
|
|
|
|
This method retrieves the most recent frame captured by the background
|
|
read thread. It does not block waiting for the camera hardware directly,
|
|
only waits for a frame to appear in the internal queue up to the specified
|
|
timeout.
|
|
|
|
Args:
|
|
timeout_ms (float): Maximum time in milliseconds to wait for a frame
|
|
to become available in the queue. Defaults to 2000ms (2 seconds).
|
|
|
|
Returns:
|
|
np.ndarray: The latest captured frame as a NumPy array in the format
|
|
(height, width, channels), processed according to configuration.
|
|
|
|
Raises:
|
|
DeviceNotConnectedError: If the camera is not connected.
|
|
TimeoutError: If no frame becomes available within the specified timeout.
|
|
RuntimeError: If an unexpected error occurs while retrieving from the queue.
|
|
"""
|
|
if not self.is_connected:
|
|
raise DeviceNotConnectedError(f"{self} is not connected.")
|
|
|
|
if self.thread is None or not self.thread.is_alive():
|
|
self._start_read_thread()
|
|
|
|
try:
|
|
return self.frame_queue.get(timeout=timeout_ms / 1000.0)
|
|
except queue.Empty as e:
|
|
thread_alive = self.thread is not None and self.thread.is_alive()
|
|
logger.error(
|
|
f"Timeout waiting for frame from {self} queue after {timeout_ms}ms. "
|
|
f"(Read thread alive: {thread_alive})"
|
|
)
|
|
raise TimeoutError(
|
|
f"Timed out waiting for frame from camera {self.index_or_path} after {timeout_ms} ms. "
|
|
f"Read thread alive: {thread_alive}."
|
|
) from e
|
|
except Exception as e:
|
|
logger.exception(f"Unexpected error getting frame from queue for {self}: {e}")
|
|
raise RuntimeError(f"Error getting frame from queue for camera {self.index_or_path}: {e}") from e
|
|
|
|
def disconnect(self):
|
|
"""
|
|
Disconnects from the camera and cleans up resources.
|
|
|
|
Stops the background read thread (if running) and releases the OpenCV
|
|
VideoCapture object.
|
|
|
|
Raises:
|
|
DeviceNotConnectedError: If the camera is already disconnected.
|
|
"""
|
|
if not self.is_connected and self.thread is None:
|
|
raise DeviceNotConnectedError(f"{self} not connected.")
|
|
|
|
if self.thread is not None:
|
|
self._stop_read_thread()
|
|
|
|
if self.videocapture is not None:
|
|
self.videocapture.release()
|
|
self.videocapture = None
|
|
|
|
logger.info(f"{self} disconnected.")
|