lerobot/lerobot/common/robot_devices/cameras/opencv.py

import argparse
import math
import time
from dataclasses import dataclass, replace
from pathlib import Path
from threading import Thread

import cv2

from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
# Using 1 thread to avoid blocking the main thread.
# Especially useful during data collection when other threads are used
# to save the images.
cv2.setNumThreads(1)
import numpy as np

from lerobot.common.robot_devices.cameras.utils import save_color_image
from lerobot.common.utils.utils import capture_timestamp_utc


def find_camera_indices(raise_when_empty=False, max_index_search_range=60):
    camera_ids = []
    for camera_idx in range(max_index_search_range):
        camera = cv2.VideoCapture(camera_idx)
        is_open = camera.isOpened()
        camera.release()

        if is_open:
            print(f"Camera found at index {camera_idx}")
            camera_ids.append(camera_idx)

    if raise_when_empty and len(camera_ids) == 0:
        raise OSError(
            "Not a single camera was detected. Try re-plugging, or re-installing `opencv2`, or your camera driver, or make sure your camera is compatible with opencv2."
        )

    return camera_ids


def benchmark_cameras(cameras, out_dir=None, save_images=False, num_warmup_frames=4):
    if out_dir:
        out_dir = Path(out_dir)
        out_dir.mkdir(parents=True, exist_ok=True)

    for _ in range(num_warmup_frames):
        for camera in cameras:
            try:
                camera.capture_image()
                time.sleep(0.01)
            except OSError as e:
                print(e)

    while True:
        now = time.time()
        for camera in cameras:
            color_image = camera.capture_image("bgr" if save_images else "rgb")

            if save_images:
                image_path = out_dir / f"camera_{camera.camera_index:02}.png"
                print(f"Write to {image_path}")
                save_color_image(color_image, image_path, write_shape=True)

        dt_s = time.time() - now
        dt_ms = dt_s * 1000
        freq = 1 / dt_s
        print(f"Latency (ms): {dt_ms:.2f}\tFrequency: {freq:.2f}")

        if save_images:
            break
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break


@dataclass
class OpenCVCameraConfig:
    """
    Example of tested options for Intel Real Sense D405:

    ```python
    OpenCVCameraConfig(30, 640, 480)
    OpenCVCameraConfig(60, 640, 480)
    OpenCVCameraConfig(90, 640, 480)
    OpenCVCameraConfig(30, 1280, 720)
    ```
    """

    fps: int | None = None
    width: int | None = None
    height: int | None = None
    color: str = "rgb"


class OpenCVCamera:
    # TODO(rcadene): improve dosctring
    """
    https://docs.opencv.org/4.x/d0/da7/videoio_overview.html
    https://docs.opencv.org/4.x/d4/d15/group__videoio__flags__base.html#ga023786be1ee68a9105bf2e48c700294d

    Example of uage:

    ```python
    camera = OpenCVCamera(camera_index=0)
    camera.connect()
    color_image = camera.read()
    # when done using the camera, consider disconnecting
    camera.disconnect()
    ```
    """

    def __init__(self, camera_index: int, config: OpenCVCameraConfig | None = None, **kwargs):
        if config is None:
            config = OpenCVCameraConfig()
        # Overwrite config arguments using kwargs
        config = replace(config, **kwargs)

        self.camera_index = camera_index
        self.fps = config.fps
        self.width = config.width
        self.height = config.height
        self.color = config.color

        if not isinstance(self.camera_index, int):
            raise ValueError(f"Camera index must be provided as an int, but {self.camera_index} was given instead.")

        if self.color not in ["rgb", "bgr"]:
            raise ValueError(f"Expected color values are 'rgb' or 'bgr', but {self.color} is provided.")

        self.camera = None
        self.is_connected = False
        self.thread = None
        self.color_image = None
        self.logs = {}

    def connect(self):
        if self.is_connected:
            raise RobotDeviceAlreadyConnectedError(f"Camera {self.camera_index} is already connected.")

        # First create a temporary camera trying to access `camera_index`,
        # and verify it is a valid camera by calling `isOpened`.
        tmp_camera = cv2.VideoCapture(self.camera_index)
        is_camera_open = tmp_camera.isOpened()
        # Release camera to make it accessible for `find_camera_indices`
        del tmp_camera

        # If the camera doesn't work, display the camera indices corresponding to
        # valid cameras.
        if not is_camera_open:
            # Verify that the provided `camera_index` is valid before printing the traceback
            available_cam_ids = find_camera_indices()
            if self.camera_index not in available_cam_ids:
                raise ValueError(
                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead."
                )

            raise OSError(f"Can't access camera {self.camera_index}.")

        # Secondly, create the camera that will be used downstream.
        # Note: For some unknown reason, calling `isOpened` blocks the camera which then
        # needs to be re-created.
        self.camera = cv2.VideoCapture(self.camera_index)

        if self.fps:
            self.camera.set(cv2.CAP_PROP_FPS, self.fps)
        if self.width:
            self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
        if self.height:
            self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height)

        actual_fps = self.camera.get(cv2.CAP_PROP_FPS)
        actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
        actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)

        if self.fps and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
            raise OSError(
                f"Can't set {self.fps=} for camera {self.camera_index}. Actual value is {actual_fps}."
            )
        if self.width and self.width != actual_width:
            raise OSError(
                f"Can't set {self.width=} for camera {self.camera_index}. Actual value is {actual_width}."
            )
        if self.height and self.height != actual_height:
            raise OSError(
                f"Can't set {self.height=} for camera {self.camera_index}. Actual value is {actual_height}."
            )

        self.fps = actual_fps
        self.width = actual_width
        self.height = actual_height

        self.is_connected = True

    def read(self, temporary_color: str | None = None) -> np.ndarray:
        """Read a frame from the camera returned in the format (height, width, channels)
           (e.g. (640, 480, 3)), contrarily to the pytorch format which is channel first.

           Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
           If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
        """
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first.")

        start_time = time.perf_counter()

        ret, color_image = self.camera.read()
        if not ret:
            raise OSError(f"Can't capture color image from camera {self.camera_index}.")

        requested_color = self.color if temporary_color is None else temporary_color

        if requested_color not in ["rgb", "bgr"]:
            raise ValueError(f"Expected color values are 'rgb' or 'bgr', but {requested_color} is provided.")

        # OpenCV uses BGR format as default (blue, green red) for all operations, including displaying images.
        # However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
        # so we convert the image color from BGR to RGB.
        if requested_color == "rgb":
            color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)

        # log the number of seconds it took to read the image
        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time

        # log the utc time at which the image was received
        self.logs["timestamp_utc"] = capture_timestamp_utc()

        return color_image

    def read_loop(self):
        while True:
            self.color_image = self.read()

    def async_read(self):
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first.")

        if self.thread is None:
            self.thread = Thread(target=self.read_loop, args=())
            self.thread.daemon = True
            self.thread.start()

        num_tries = 0
        while self.color_image is None:
            num_tries += 1
            time.sleep(1/self.fps)
            if num_tries > self.fps:
                if self.thread.ident is None and not self.thread.is_alive():
                    raise Exception("The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called.")

        return self.color_image

    def disconnect(self):
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first.")

        self.camera.release()
        self.camera = None

        if self.thread is not None and self.thread.is_alive():
            # wait for the thread to finish
            self.thread.join()
            self.thread = None

        self.is_connected = False

    def __del__(self):
        if self.is_connected:
            self.disconnect()


def save_images_config(config: OpenCVCameraConfig, out_dir: Path):
    cameras = []

    available_cam_ids = find_camera_indices()
    print(f"Available camera indices: {available_cam_ids}")
    for camera_idx in available_cam_ids:
        camera = OpenCVCamera(camera_idx, config)
        cameras.append(camera)

    out_dir = out_dir.parent / f"{out_dir.name}_{config.width}x{config.height}_{config.fps}"
    benchmark_cameras(cameras, out_dir, save_images=True)


def benchmark_config(config: OpenCVCameraConfig, camera_ids: list[int]):
    cameras = [OpenCVCamera(idx, config) for idx in camera_ids]
    benchmark_cameras(cameras)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", type=str, choices=["save_images", "benchmark"], default="save_images")
    parser.add_argument("--camera-ids", type=int, nargs="*", default=[16, 4, 22, 10])
    parser.add_argument("--fps", type=int, default=30)
    parser.add_argument("--width", type=str, default=640)
    parser.add_argument("--height", type=str, default=480)
    parser.add_argument("--out-dir", type=Path, default="outputs/benchmark_cameras/opencv/2024_06_22_1727")
    args = parser.parse_args()

    config = OpenCVCameraConfig(args.fps, args.width, args.height)
    # config = OpenCVCameraConfig()
    # config = OpenCVCameraConfig(60, 640, 480)
    # config = OpenCVCameraConfig(90, 640, 480)
    # config = OpenCVCameraConfig(30, 1280, 720)

    if args.mode == "save_images":
        save_images_config(config, args.out_dir)
    elif args.mode == "benchmark":
        benchmark_config(config, args.camera_ids)
    else:
        raise ValueError(args.mode)