forked from tangger/lerobot
Reorganize files
This commit is contained in:
100
lerobot/common/cameras/configs.py
Normal file
100
lerobot/common/cameras/configs.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import abc
|
||||
from dataclasses import dataclass
|
||||
|
||||
import draccus
|
||||
|
||||
|
||||
@dataclass
|
||||
class CameraConfig(draccus.ChoiceRegistry, abc.ABC):
|
||||
@property
|
||||
def type(self) -> str:
|
||||
return self.get_choice_name(self.__class__)
|
||||
|
||||
|
||||
@CameraConfig.register_subclass("opencv")
|
||||
@dataclass
|
||||
class OpenCVCameraConfig(CameraConfig):
|
||||
"""
|
||||
Example of tested options for Intel Real Sense D405:
|
||||
|
||||
```python
|
||||
OpenCVCameraConfig(0, 30, 640, 480)
|
||||
OpenCVCameraConfig(0, 60, 640, 480)
|
||||
OpenCVCameraConfig(0, 90, 640, 480)
|
||||
OpenCVCameraConfig(0, 30, 1280, 720)
|
||||
```
|
||||
"""
|
||||
|
||||
camera_index: int
|
||||
fps: int | None = None
|
||||
width: int | None = None
|
||||
height: int | None = None
|
||||
color_mode: str = "rgb"
|
||||
channels: int | None = None
|
||||
rotation: int | None = None
|
||||
mock: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
if self.color_mode not in ["rgb", "bgr"]:
|
||||
raise ValueError(
|
||||
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
|
||||
)
|
||||
|
||||
self.channels = 3
|
||||
|
||||
if self.rotation not in [-90, None, 90, 180]:
|
||||
raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
|
||||
|
||||
|
||||
@CameraConfig.register_subclass("intelrealsense")
|
||||
@dataclass
|
||||
class IntelRealSenseCameraConfig(CameraConfig):
|
||||
"""
|
||||
Example of tested options for Intel Real Sense D405:
|
||||
|
||||
```python
|
||||
IntelRealSenseCameraConfig(128422271347, 30, 640, 480)
|
||||
IntelRealSenseCameraConfig(128422271347, 60, 640, 480)
|
||||
IntelRealSenseCameraConfig(128422271347, 90, 640, 480)
|
||||
IntelRealSenseCameraConfig(128422271347, 30, 1280, 720)
|
||||
IntelRealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)
|
||||
IntelRealSenseCameraConfig(128422271347, 30, 640, 480, rotation=90)
|
||||
```
|
||||
"""
|
||||
|
||||
name: str | None = None
|
||||
serial_number: int | None = None
|
||||
fps: int | None = None
|
||||
width: int | None = None
|
||||
height: int | None = None
|
||||
color_mode: str = "rgb"
|
||||
channels: int | None = None
|
||||
use_depth: bool = False
|
||||
force_hardware_reset: bool = True
|
||||
rotation: int | None = None
|
||||
mock: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
# bool is stronger than is None, since it works with empty strings
|
||||
if bool(self.name) and bool(self.serial_number):
|
||||
raise ValueError(
|
||||
f"One of them must be set: name or serial_number, but {self.name=} and {self.serial_number=} provided."
|
||||
)
|
||||
|
||||
if self.color_mode not in ["rgb", "bgr"]:
|
||||
raise ValueError(
|
||||
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
|
||||
)
|
||||
|
||||
self.channels = 3
|
||||
|
||||
at_least_one_is_not_none = self.fps is not None or self.width is not None or self.height is not None
|
||||
at_least_one_is_none = self.fps is None or self.width is None or self.height is None
|
||||
if at_least_one_is_not_none and at_least_one_is_none:
|
||||
raise ValueError(
|
||||
"For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
|
||||
f"but {self.fps=}, {self.width=}, {self.height=} were provided."
|
||||
)
|
||||
|
||||
if self.rotation not in [-90, None, 90, 180]:
|
||||
raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
|
||||
510
lerobot/common/cameras/intelrealsense.py
Normal file
510
lerobot/common/cameras/intelrealsense.py
Normal file
@@ -0,0 +1,510 @@
|
||||
"""
|
||||
This file contains utilities for recording frames from Intel Realsense cameras.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import logging
|
||||
import math
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from lerobot.common.cameras.configs import IntelRealSenseCameraConfig
|
||||
from lerobot.common.utils.robot_utils import (
|
||||
RobotDeviceAlreadyConnectedError,
|
||||
RobotDeviceNotConnectedError,
|
||||
busy_wait,
|
||||
)
|
||||
from lerobot.common.utils.utils import capture_timestamp_utc
|
||||
|
||||
SERIAL_NUMBER_INDEX = 1
|
||||
|
||||
|
||||
def find_cameras(raise_when_empty=True, mock=False) -> list[dict]:
|
||||
"""
|
||||
Find the names and the serial numbers of the Intel RealSense cameras
|
||||
connected to the computer.
|
||||
"""
|
||||
if mock:
|
||||
import tests.mock_pyrealsense2 as rs
|
||||
else:
|
||||
import pyrealsense2 as rs
|
||||
|
||||
cameras = []
|
||||
for device in rs.context().query_devices():
|
||||
serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
|
||||
name = device.get_info(rs.camera_info.name)
|
||||
cameras.append(
|
||||
{
|
||||
"serial_number": serial_number,
|
||||
"name": name,
|
||||
}
|
||||
)
|
||||
|
||||
if raise_when_empty and len(cameras) == 0:
|
||||
raise OSError(
|
||||
"Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
|
||||
)
|
||||
|
||||
return cameras
|
||||
|
||||
|
||||
def save_image(img_array, serial_number, frame_index, images_dir):
|
||||
try:
|
||||
img = Image.fromarray(img_array)
|
||||
path = images_dir / f"camera_{serial_number}_frame_{frame_index:06d}.png"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(str(path), quality=100)
|
||||
logging.info(f"Saved image: {path}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to save image for camera {serial_number} frame {frame_index}: {e}")
|
||||
|
||||
|
||||
def save_images_from_cameras(
|
||||
images_dir: Path,
|
||||
serial_numbers: list[int] | None = None,
|
||||
fps=None,
|
||||
width=None,
|
||||
height=None,
|
||||
record_time_s=2,
|
||||
mock=False,
|
||||
):
|
||||
"""
|
||||
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
|
||||
associated to a given serial number.
|
||||
"""
|
||||
if serial_numbers is None or len(serial_numbers) == 0:
|
||||
camera_infos = find_cameras(mock=mock)
|
||||
serial_numbers = [cam["serial_number"] for cam in camera_infos]
|
||||
|
||||
if mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
print("Connecting cameras")
|
||||
cameras = []
|
||||
for cam_sn in serial_numbers:
|
||||
print(f"{cam_sn=}")
|
||||
config = IntelRealSenseCameraConfig(
|
||||
serial_number=cam_sn, fps=fps, width=width, height=height, mock=mock
|
||||
)
|
||||
camera = IntelRealSenseCamera(config)
|
||||
camera.connect()
|
||||
print(
|
||||
f"IntelRealSenseCamera({camera.serial_number}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
|
||||
)
|
||||
cameras.append(camera)
|
||||
|
||||
images_dir = Path(images_dir)
|
||||
if images_dir.exists():
|
||||
shutil.rmtree(
|
||||
images_dir,
|
||||
)
|
||||
images_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Saving images to {images_dir}")
|
||||
frame_index = 0
|
||||
start_time = time.perf_counter()
|
||||
try:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
while True:
|
||||
now = time.perf_counter()
|
||||
|
||||
for camera in cameras:
|
||||
# If we use async_read when fps is None, the loop will go full speed, and we will end up
|
||||
# saving the same images from the cameras multiple times until the RAM/disk is full.
|
||||
image = camera.read() if fps is None else camera.async_read()
|
||||
if image is None:
|
||||
print("No Frame")
|
||||
|
||||
bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
executor.submit(
|
||||
save_image,
|
||||
bgr_converted_image,
|
||||
camera.serial_number,
|
||||
frame_index,
|
||||
images_dir,
|
||||
)
|
||||
|
||||
if fps is not None:
|
||||
dt_s = time.perf_counter() - now
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
if time.perf_counter() - start_time > record_time_s:
|
||||
break
|
||||
|
||||
print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
|
||||
|
||||
frame_index += 1
|
||||
finally:
|
||||
print(f"Images have been saved to {images_dir}")
|
||||
for camera in cameras:
|
||||
camera.disconnect()
|
||||
|
||||
|
||||
class IntelRealSenseCamera:
|
||||
"""
|
||||
The IntelRealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras:
|
||||
- is instantiated with the serial number of the camera - won't randomly change as it can be the case of OpenCVCamera for Linux,
|
||||
- can also be instantiated with the camera's name — if it's unique — using IntelRealSenseCamera.init_from_name(),
|
||||
- depth map can be returned.
|
||||
|
||||
To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera:
|
||||
```bash
|
||||
python lerobot/common/robot_devices/cameras/intelrealsense.py --images-dir outputs/images_from_intelrealsense_cameras
|
||||
```
|
||||
|
||||
When an IntelRealSenseCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode
|
||||
of the given camera will be used.
|
||||
|
||||
Example of instantiating with a serial number:
|
||||
```python
|
||||
from lerobot.common.robot_devices.cameras.configs import IntelRealSenseCameraConfig
|
||||
|
||||
config = IntelRealSenseCameraConfig(serial_number=128422271347)
|
||||
camera = IntelRealSenseCamera(config)
|
||||
camera.connect()
|
||||
color_image = camera.read()
|
||||
# when done using the camera, consider disconnecting
|
||||
camera.disconnect()
|
||||
```
|
||||
|
||||
Example of instantiating with a name if it's unique:
|
||||
```
|
||||
config = IntelRealSenseCameraConfig(name="Intel RealSense D405")
|
||||
```
|
||||
|
||||
Example of changing default fps, width, height and color_mode:
|
||||
```python
|
||||
config = IntelRealSenseCameraConfig(serial_number=128422271347, fps=30, width=1280, height=720)
|
||||
config = IntelRealSenseCameraConfig(serial_number=128422271347, fps=90, width=640, height=480)
|
||||
config = IntelRealSenseCameraConfig(serial_number=128422271347, fps=90, width=640, height=480, color_mode="bgr")
|
||||
# Note: might error out upon `camera.connect()` if these settings are not compatible with the camera
|
||||
```
|
||||
|
||||
Example of returning depth:
|
||||
```python
|
||||
config = IntelRealSenseCameraConfig(serial_number=128422271347, use_depth=True)
|
||||
camera = IntelRealSenseCamera(config)
|
||||
camera.connect()
|
||||
color_image, depth_map = camera.read()
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: IntelRealSenseCameraConfig,
|
||||
):
|
||||
self.config = config
|
||||
if config.name is not None:
|
||||
self.serial_number = self.find_serial_number_from_name(config.name)
|
||||
else:
|
||||
self.serial_number = config.serial_number
|
||||
self.fps = config.fps
|
||||
self.width = config.width
|
||||
self.height = config.height
|
||||
self.channels = config.channels
|
||||
self.color_mode = config.color_mode
|
||||
self.use_depth = config.use_depth
|
||||
self.force_hardware_reset = config.force_hardware_reset
|
||||
self.mock = config.mock
|
||||
|
||||
self.camera = None
|
||||
self.is_connected = False
|
||||
self.thread = None
|
||||
self.stop_event = None
|
||||
self.color_image = None
|
||||
self.depth_map = None
|
||||
self.logs = {}
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
# TODO(alibets): Do we keep original width/height or do we define them after rotation?
|
||||
self.rotation = None
|
||||
if config.rotation == -90:
|
||||
self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
|
||||
elif config.rotation == 90:
|
||||
self.rotation = cv2.ROTATE_90_CLOCKWISE
|
||||
elif config.rotation == 180:
|
||||
self.rotation = cv2.ROTATE_180
|
||||
|
||||
def find_serial_number_from_name(self, name):
|
||||
camera_infos = find_cameras()
|
||||
camera_names = [cam["name"] for cam in camera_infos]
|
||||
this_name_count = Counter(camera_names)[name]
|
||||
if this_name_count > 1:
|
||||
# TODO(aliberts): Test this with multiple identical cameras (Aloha)
|
||||
raise ValueError(
|
||||
f"Multiple {name} cameras have been detected. Please use their serial number to instantiate them."
|
||||
)
|
||||
|
||||
name_to_serial_dict = {cam["name"]: cam["serial_number"] for cam in camera_infos}
|
||||
cam_sn = name_to_serial_dict[name]
|
||||
|
||||
return cam_sn
|
||||
|
||||
def connect(self):
|
||||
if self.is_connected:
|
||||
raise RobotDeviceAlreadyConnectedError(
|
||||
f"IntelRealSenseCamera({self.serial_number}) is already connected."
|
||||
)
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_pyrealsense2 as rs
|
||||
else:
|
||||
import pyrealsense2 as rs
|
||||
|
||||
config = rs.config()
|
||||
config.enable_device(str(self.serial_number))
|
||||
|
||||
if self.fps and self.width and self.height:
|
||||
# TODO(rcadene): can we set rgb8 directly?
|
||||
config.enable_stream(rs.stream.color, self.width, self.height, rs.format.rgb8, self.fps)
|
||||
else:
|
||||
config.enable_stream(rs.stream.color)
|
||||
|
||||
if self.use_depth:
|
||||
if self.fps and self.width and self.height:
|
||||
config.enable_stream(rs.stream.depth, self.width, self.height, rs.format.z16, self.fps)
|
||||
else:
|
||||
config.enable_stream(rs.stream.depth)
|
||||
|
||||
self.camera = rs.pipeline()
|
||||
try:
|
||||
profile = self.camera.start(config)
|
||||
is_camera_open = True
|
||||
except RuntimeError:
|
||||
is_camera_open = False
|
||||
traceback.print_exc()
|
||||
|
||||
# If the camera doesn't work, display the camera indices corresponding to
|
||||
# valid cameras.
|
||||
if not is_camera_open:
|
||||
# Verify that the provided `serial_number` is valid before printing the traceback
|
||||
camera_infos = find_cameras()
|
||||
serial_numbers = [cam["serial_number"] for cam in camera_infos]
|
||||
if self.serial_number not in serial_numbers:
|
||||
raise ValueError(
|
||||
f"`serial_number` is expected to be one of these available cameras {serial_numbers}, but {self.serial_number} is provided instead. "
|
||||
"To find the serial number you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
|
||||
)
|
||||
|
||||
raise OSError(f"Can't access IntelRealSenseCamera({self.serial_number}).")
|
||||
|
||||
color_stream = profile.get_stream(rs.stream.color)
|
||||
color_profile = color_stream.as_video_stream_profile()
|
||||
actual_fps = color_profile.fps()
|
||||
actual_width = color_profile.width()
|
||||
actual_height = color_profile.height()
|
||||
|
||||
# Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
|
||||
if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
|
||||
# Using `OSError` since it's a broad that encompasses issues related to device communication
|
||||
raise OSError(
|
||||
f"Can't set {self.fps=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_fps}."
|
||||
)
|
||||
if self.width is not None and self.width != actual_width:
|
||||
raise OSError(
|
||||
f"Can't set {self.width=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_width}."
|
||||
)
|
||||
if self.height is not None and self.height != actual_height:
|
||||
raise OSError(
|
||||
f"Can't set {self.height=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_height}."
|
||||
)
|
||||
|
||||
self.fps = round(actual_fps)
|
||||
self.width = round(actual_width)
|
||||
self.height = round(actual_height)
|
||||
|
||||
self.is_connected = True
|
||||
|
||||
def read(self, temporary_color: str | None = None) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
|
||||
"""Read a frame from the camera returned in the format height x width x channels (e.g. 480 x 640 x 3)
|
||||
of type `np.uint8`, contrarily to the pytorch format which is float channel first.
|
||||
|
||||
When `use_depth=True`, returns a tuple `(color_image, depth_map)` with a depth map in the format
|
||||
height x width (e.g. 480 x 640) of type np.uint16.
|
||||
|
||||
Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
|
||||
If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
|
||||
"""
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
start_time = time.perf_counter()
|
||||
|
||||
frame = self.camera.wait_for_frames(timeout_ms=5000)
|
||||
|
||||
color_frame = frame.get_color_frame()
|
||||
|
||||
if not color_frame:
|
||||
raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.serial_number}).")
|
||||
|
||||
color_image = np.asanyarray(color_frame.get_data())
|
||||
|
||||
requested_color_mode = self.color_mode if temporary_color is None else temporary_color
|
||||
if requested_color_mode not in ["rgb", "bgr"]:
|
||||
raise ValueError(
|
||||
f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
|
||||
)
|
||||
|
||||
# IntelRealSense uses RGB format as default (red, green, blue).
|
||||
if requested_color_mode == "bgr":
|
||||
color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
h, w, _ = color_image.shape
|
||||
if h != self.height or w != self.width:
|
||||
raise OSError(
|
||||
f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
|
||||
)
|
||||
|
||||
if self.rotation is not None:
|
||||
color_image = cv2.rotate(color_image, self.rotation)
|
||||
|
||||
# log the number of seconds it took to read the image
|
||||
self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
|
||||
|
||||
# log the utc time at which the image was received
|
||||
self.logs["timestamp_utc"] = capture_timestamp_utc()
|
||||
|
||||
if self.use_depth:
|
||||
depth_frame = frame.get_depth_frame()
|
||||
if not depth_frame:
|
||||
raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.serial_number}).")
|
||||
|
||||
depth_map = np.asanyarray(depth_frame.get_data())
|
||||
|
||||
h, w = depth_map.shape
|
||||
if h != self.height or w != self.width:
|
||||
raise OSError(
|
||||
f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
|
||||
)
|
||||
|
||||
if self.rotation is not None:
|
||||
depth_map = cv2.rotate(depth_map, self.rotation)
|
||||
|
||||
return color_image, depth_map
|
||||
else:
|
||||
return color_image
|
||||
|
||||
def read_loop(self):
|
||||
while not self.stop_event.is_set():
|
||||
if self.use_depth:
|
||||
self.color_image, self.depth_map = self.read()
|
||||
else:
|
||||
self.color_image = self.read()
|
||||
|
||||
def async_read(self):
|
||||
"""Access the latest color image"""
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.thread is None:
|
||||
self.stop_event = threading.Event()
|
||||
self.thread = Thread(target=self.read_loop, args=())
|
||||
self.thread.daemon = True
|
||||
self.thread.start()
|
||||
|
||||
num_tries = 0
|
||||
while self.color_image is None:
|
||||
# TODO(rcadene, aliberts): intelrealsense has diverged compared to opencv over here
|
||||
num_tries += 1
|
||||
time.sleep(1 / self.fps)
|
||||
if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
|
||||
raise Exception(
|
||||
"The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called."
|
||||
)
|
||||
|
||||
if self.use_depth:
|
||||
return self.color_image, self.depth_map
|
||||
else:
|
||||
return self.color_image
|
||||
|
||||
def disconnect(self):
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.thread is not None and self.thread.is_alive():
|
||||
# wait for the thread to finish
|
||||
self.stop_event.set()
|
||||
self.thread.join()
|
||||
self.thread = None
|
||||
self.stop_event = None
|
||||
|
||||
self.camera.stop()
|
||||
self.camera = None
|
||||
|
||||
self.is_connected = False
|
||||
|
||||
def __del__(self):
|
||||
if getattr(self, "is_connected", False):
|
||||
self.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Save a few frames using `IntelRealSenseCamera` for all cameras connected to the computer, or a selected subset."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--serial-numbers",
|
||||
type=int,
|
||||
nargs="*",
|
||||
default=None,
|
||||
help="List of serial numbers used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fps",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--width",
|
||||
type=str,
|
||||
default=640,
|
||||
help="Set the width for all cameras. If not provided, use the default width of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--height",
|
||||
type=str,
|
||||
default=480,
|
||||
help="Set the height for all cameras. If not provided, use the default height of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--images-dir",
|
||||
type=Path,
|
||||
default="outputs/images_from_intelrealsense_cameras",
|
||||
help="Set directory to save a few frames for each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--record-time-s",
|
||||
type=float,
|
||||
default=2.0,
|
||||
help="Set the number of seconds used to record the frames. By default, 2 seconds.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
save_images_from_cameras(**vars(args))
|
||||
482
lerobot/common/cameras/opencv.py
Normal file
482
lerobot/common/cameras/opencv.py
Normal file
@@ -0,0 +1,482 @@
|
||||
"""
|
||||
This file contains utilities for recording frames from cameras. For more info look at `OpenCVCamera` docstring.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import math
|
||||
import platform
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from lerobot.common.cameras.configs import OpenCVCameraConfig
|
||||
from lerobot.common.utils.robot_utils import (
|
||||
RobotDeviceAlreadyConnectedError,
|
||||
RobotDeviceNotConnectedError,
|
||||
busy_wait,
|
||||
)
|
||||
from lerobot.common.utils.utils import capture_timestamp_utc
|
||||
|
||||
# The maximum opencv device index depends on your operating system. For instance,
|
||||
# if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
|
||||
# on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
|
||||
# When you change the USB port or reboot the computer, the operating system might
|
||||
# treat the same cameras as new devices. Thus we select a higher bound to search indices.
|
||||
MAX_OPENCV_INDEX = 60
|
||||
|
||||
|
||||
def find_cameras(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False) -> list[dict]:
|
||||
cameras = []
|
||||
if platform.system() == "Linux":
|
||||
print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
|
||||
possible_ports = [str(port) for port in Path("/dev").glob("video*")]
|
||||
ports = _find_cameras(possible_ports, mock=mock)
|
||||
for port in ports:
|
||||
cameras.append(
|
||||
{
|
||||
"port": port,
|
||||
"index": int(port.removeprefix("/dev/video")),
|
||||
}
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"Mac or Windows detected. Finding available camera indices through "
|
||||
f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
|
||||
)
|
||||
possible_indices = range(max_index_search_range)
|
||||
indices = _find_cameras(possible_indices, mock=mock)
|
||||
for index in indices:
|
||||
cameras.append(
|
||||
{
|
||||
"port": None,
|
||||
"index": index,
|
||||
}
|
||||
)
|
||||
|
||||
return cameras
|
||||
|
||||
|
||||
def _find_cameras(
|
||||
possible_camera_ids: list[int | str], raise_when_empty=False, mock=False
|
||||
) -> list[int | str]:
|
||||
if mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
camera_ids = []
|
||||
for camera_idx in possible_camera_ids:
|
||||
camera = cv2.VideoCapture(camera_idx)
|
||||
is_open = camera.isOpened()
|
||||
camera.release()
|
||||
|
||||
if is_open:
|
||||
print(f"Camera found at index {camera_idx}")
|
||||
camera_ids.append(camera_idx)
|
||||
|
||||
if raise_when_empty and len(camera_ids) == 0:
|
||||
raise OSError(
|
||||
"Not a single camera was detected. Try re-plugging, or re-installing `opencv2`, "
|
||||
"or your camera driver, or make sure your camera is compatible with opencv2."
|
||||
)
|
||||
|
||||
return camera_ids
|
||||
|
||||
|
||||
def is_valid_unix_path(path: str) -> bool:
|
||||
"""Note: if 'path' points to a symlink, this will return True only if the target exists"""
|
||||
p = Path(path)
|
||||
return p.is_absolute() and p.exists()
|
||||
|
||||
|
||||
def get_camera_index_from_unix_port(port: Path) -> int:
|
||||
return int(str(port.resolve()).removeprefix("/dev/video"))
|
||||
|
||||
|
||||
def save_image(img_array, camera_index, frame_index, images_dir):
|
||||
img = Image.fromarray(img_array)
|
||||
path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(str(path), quality=100)
|
||||
|
||||
|
||||
def save_images_from_cameras(
|
||||
images_dir: Path,
|
||||
camera_ids: list | None = None,
|
||||
fps=None,
|
||||
width=None,
|
||||
height=None,
|
||||
record_time_s=2,
|
||||
mock=False,
|
||||
):
|
||||
"""
|
||||
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
|
||||
associated to a given camera index.
|
||||
"""
|
||||
if camera_ids is None or len(camera_ids) == 0:
|
||||
camera_infos = find_cameras(mock=mock)
|
||||
camera_ids = [cam["index"] for cam in camera_infos]
|
||||
|
||||
print("Connecting cameras")
|
||||
cameras = []
|
||||
for cam_idx in camera_ids:
|
||||
config = OpenCVCameraConfig(camera_index=cam_idx, fps=fps, width=width, height=height, mock=mock)
|
||||
camera = OpenCVCamera(config)
|
||||
camera.connect()
|
||||
print(
|
||||
f"OpenCVCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, "
|
||||
f"height={camera.height}, color_mode={camera.color_mode})"
|
||||
)
|
||||
cameras.append(camera)
|
||||
|
||||
images_dir = Path(images_dir)
|
||||
if images_dir.exists():
|
||||
shutil.rmtree(
|
||||
images_dir,
|
||||
)
|
||||
images_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Saving images to {images_dir}")
|
||||
frame_index = 0
|
||||
start_time = time.perf_counter()
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
while True:
|
||||
now = time.perf_counter()
|
||||
|
||||
for camera in cameras:
|
||||
# If we use async_read when fps is None, the loop will go full speed, and we will endup
|
||||
# saving the same images from the cameras multiple times until the RAM/disk is full.
|
||||
image = camera.read() if fps is None else camera.async_read()
|
||||
|
||||
executor.submit(
|
||||
save_image,
|
||||
image,
|
||||
camera.camera_index,
|
||||
frame_index,
|
||||
images_dir,
|
||||
)
|
||||
|
||||
if fps is not None:
|
||||
dt_s = time.perf_counter() - now
|
||||
busy_wait(1 / fps - dt_s)
|
||||
|
||||
print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
|
||||
|
||||
if time.perf_counter() - start_time > record_time_s:
|
||||
break
|
||||
|
||||
frame_index += 1
|
||||
|
||||
print(f"Images have been saved to {images_dir}")
|
||||
|
||||
|
||||
class OpenCVCamera:
|
||||
"""
|
||||
The OpenCVCamera class allows to efficiently record images from cameras. It relies on opencv2 to communicate
|
||||
with the cameras. Most cameras are compatible. For more info, see the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
|
||||
|
||||
An OpenCVCamera instance requires a camera index (e.g. `OpenCVCamera(camera_index=0)`). When you only have one camera
|
||||
like a webcam of a laptop, the camera index is expected to be 0, but it might also be very different, and the camera index
|
||||
might change if you reboot your computer or re-plug your camera. This behavior depends on your operation system.
|
||||
|
||||
To find the camera indices of your cameras, you can run our utility script that will be save a few frames for each camera:
|
||||
```bash
|
||||
python lerobot/common/robot_devices/cameras/opencv.py --images-dir outputs/images_from_opencv_cameras
|
||||
```
|
||||
|
||||
When an OpenCVCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode
|
||||
of the given camera will be used.
|
||||
|
||||
Example of usage:
|
||||
```python
|
||||
from lerobot.common.robot_devices.cameras.configs import OpenCVCameraConfig
|
||||
|
||||
config = OpenCVCameraConfig(camera_index=0)
|
||||
camera = OpenCVCamera(config)
|
||||
camera.connect()
|
||||
color_image = camera.read()
|
||||
# when done using the camera, consider disconnecting
|
||||
camera.disconnect()
|
||||
```
|
||||
|
||||
Example of changing default fps, width, height and color_mode:
|
||||
```python
|
||||
config = OpenCVCameraConfig(camera_index=0, fps=30, width=1280, height=720)
|
||||
config = OpenCVCameraConfig(camera_index=0, fps=90, width=640, height=480)
|
||||
config = OpenCVCameraConfig(camera_index=0, fps=90, width=640, height=480, color_mode="bgr")
|
||||
# Note: might error out open `camera.connect()` if these settings are not compatible with the camera
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, config: OpenCVCameraConfig):
|
||||
self.config = config
|
||||
self.camera_index = config.camera_index
|
||||
self.port = None
|
||||
|
||||
# Linux uses ports for connecting to cameras
|
||||
if platform.system() == "Linux":
|
||||
if isinstance(self.camera_index, int):
|
||||
self.port = Path(f"/dev/video{self.camera_index}")
|
||||
elif isinstance(self.camera_index, str) and is_valid_unix_path(self.camera_index):
|
||||
self.port = Path(self.camera_index)
|
||||
# Retrieve the camera index from a potentially symlinked path
|
||||
self.camera_index = get_camera_index_from_unix_port(self.port)
|
||||
else:
|
||||
raise ValueError(f"Please check the provided camera_index: {self.camera_index}")
|
||||
|
||||
self.fps = config.fps
|
||||
self.width = config.width
|
||||
self.height = config.height
|
||||
self.channels = config.channels
|
||||
self.color_mode = config.color_mode
|
||||
self.mock = config.mock
|
||||
|
||||
self.camera = None
|
||||
self.is_connected = False
|
||||
self.thread = None
|
||||
self.stop_event = None
|
||||
self.color_image = None
|
||||
self.logs = {}
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
# TODO(aliberts): Do we keep original width/height or do we define them after rotation?
|
||||
self.rotation = None
|
||||
if config.rotation == -90:
|
||||
self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
|
||||
elif config.rotation == 90:
|
||||
self.rotation = cv2.ROTATE_90_CLOCKWISE
|
||||
elif config.rotation == 180:
|
||||
self.rotation = cv2.ROTATE_180
|
||||
|
||||
def connect(self):
|
||||
if self.is_connected:
|
||||
raise RobotDeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")
|
||||
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
# Use 1 thread to avoid blocking the main thread. Especially useful during data collection
|
||||
# when other threads are used to save the images.
|
||||
cv2.setNumThreads(1)
|
||||
|
||||
camera_idx = f"/dev/video{self.camera_index}" if platform.system() == "Linux" else self.camera_index
|
||||
# First create a temporary camera trying to access `camera_index`,
|
||||
# and verify it is a valid camera by calling `isOpened`.
|
||||
tmp_camera = cv2.VideoCapture(camera_idx)
|
||||
is_camera_open = tmp_camera.isOpened()
|
||||
# Release camera to make it accessible for `find_camera_indices`
|
||||
tmp_camera.release()
|
||||
del tmp_camera
|
||||
|
||||
# If the camera doesn't work, display the camera indices corresponding to
|
||||
# valid cameras.
|
||||
if not is_camera_open:
|
||||
# Verify that the provided `camera_index` is valid before printing the traceback
|
||||
cameras_info = find_cameras()
|
||||
available_cam_ids = [cam["index"] for cam in cameras_info]
|
||||
if self.camera_index not in available_cam_ids:
|
||||
raise ValueError(
|
||||
f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
|
||||
"To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`."
|
||||
)
|
||||
|
||||
raise OSError(f"Can't access OpenCVCamera({camera_idx}).")
|
||||
|
||||
# Secondly, create the camera that will be used downstream.
|
||||
# Note: For some unknown reason, calling `isOpened` blocks the camera which then
|
||||
# needs to be re-created.
|
||||
self.camera = cv2.VideoCapture(camera_idx)
|
||||
|
||||
if self.fps is not None:
|
||||
self.camera.set(cv2.CAP_PROP_FPS, self.fps)
|
||||
if self.width is not None:
|
||||
self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
|
||||
if self.height is not None:
|
||||
self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height)
|
||||
|
||||
actual_fps = self.camera.get(cv2.CAP_PROP_FPS)
|
||||
actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
|
||||
actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
||||
|
||||
# Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
|
||||
if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
|
||||
# Using `OSError` since it's a broad that encompasses issues related to device communication
|
||||
raise OSError(
|
||||
f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
|
||||
)
|
||||
if self.width is not None and not math.isclose(self.width, actual_width, rel_tol=1e-3):
|
||||
raise OSError(
|
||||
f"Can't set {self.width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
|
||||
)
|
||||
if self.height is not None and not math.isclose(self.height, actual_height, rel_tol=1e-3):
|
||||
raise OSError(
|
||||
f"Can't set {self.height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
|
||||
)
|
||||
|
||||
self.fps = round(actual_fps)
|
||||
self.width = round(actual_width)
|
||||
self.height = round(actual_height)
|
||||
|
||||
self.is_connected = True
|
||||
|
||||
def read(self, temporary_color_mode: str | None = None) -> np.ndarray:
|
||||
"""Read a frame from the camera returned in the format (height, width, channels)
|
||||
(e.g. 480 x 640 x 3), contrarily to the pytorch format which is channel first.
|
||||
|
||||
Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
|
||||
If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
|
||||
"""
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
start_time = time.perf_counter()
|
||||
|
||||
ret, color_image = self.camera.read()
|
||||
|
||||
if not ret:
|
||||
raise OSError(f"Can't capture color image from camera {self.camera_index}.")
|
||||
|
||||
requested_color_mode = self.color_mode if temporary_color_mode is None else temporary_color_mode
|
||||
|
||||
if requested_color_mode not in ["rgb", "bgr"]:
|
||||
raise ValueError(
|
||||
f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
|
||||
)
|
||||
|
||||
# OpenCV uses BGR format as default (blue, green, red) for all operations, including displaying images.
|
||||
# However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
|
||||
# so we convert the image color from BGR to RGB.
|
||||
if requested_color_mode == "rgb":
|
||||
if self.mock:
|
||||
import tests.mock_cv2 as cv2
|
||||
else:
|
||||
import cv2
|
||||
|
||||
color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
h, w, _ = color_image.shape
|
||||
if h != self.height or w != self.width:
|
||||
raise OSError(
|
||||
f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
|
||||
)
|
||||
|
||||
if self.rotation is not None:
|
||||
color_image = cv2.rotate(color_image, self.rotation)
|
||||
|
||||
# log the number of seconds it took to read the image
|
||||
self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
|
||||
|
||||
# log the utc time at which the image was received
|
||||
self.logs["timestamp_utc"] = capture_timestamp_utc()
|
||||
|
||||
self.color_image = color_image
|
||||
|
||||
return color_image
|
||||
|
||||
def read_loop(self):
|
||||
while not self.stop_event.is_set():
|
||||
try:
|
||||
self.color_image = self.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading in thread: {e}")
|
||||
|
||||
def async_read(self):
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.thread is None:
|
||||
self.stop_event = threading.Event()
|
||||
self.thread = Thread(target=self.read_loop, args=())
|
||||
self.thread.daemon = True
|
||||
self.thread.start()
|
||||
|
||||
num_tries = 0
|
||||
while True:
|
||||
if self.color_image is not None:
|
||||
return self.color_image
|
||||
|
||||
time.sleep(1 / self.fps)
|
||||
num_tries += 1
|
||||
if num_tries > self.fps * 2:
|
||||
raise TimeoutError("Timed out waiting for async_read() to start.")
|
||||
|
||||
def disconnect(self):
|
||||
if not self.is_connected:
|
||||
raise RobotDeviceNotConnectedError(
|
||||
f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
||||
)
|
||||
|
||||
if self.thread is not None:
|
||||
self.stop_event.set()
|
||||
self.thread.join() # wait for the thread to finish
|
||||
self.thread = None
|
||||
self.stop_event = None
|
||||
|
||||
self.camera.release()
|
||||
self.camera = None
|
||||
self.is_connected = False
|
||||
|
||||
def __del__(self):
|
||||
if getattr(self, "is_connected", False):
|
||||
self.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Save a few frames using `OpenCVCamera` for all cameras connected to the computer, or a selected subset."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--camera-ids",
|
||||
type=int,
|
||||
nargs="*",
|
||||
default=None,
|
||||
help="List of camera indices used to instantiate the `OpenCVCamera`. If not provided, find and use all available camera indices.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fps",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--width",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Set the width for all cameras. If not provided, use the default width of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--height",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Set the height for all cameras. If not provided, use the default height of each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--images-dir",
|
||||
type=Path,
|
||||
default="outputs/images_from_opencv_cameras",
|
||||
help="Set directory to save a few frames for each camera.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--record-time-s",
|
||||
type=float,
|
||||
default=4.0,
|
||||
help="Set the number of seconds used to record the frames. By default, 2 seconds.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
save_images_from_cameras(**vars(args))
|
||||
53
lerobot/common/cameras/utils.py
Normal file
53
lerobot/common/cameras/utils.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from typing import Protocol
|
||||
|
||||
import numpy as np
|
||||
|
||||
from lerobot.common.cameras.configs import (
|
||||
CameraConfig,
|
||||
IntelRealSenseCameraConfig,
|
||||
OpenCVCameraConfig,
|
||||
)
|
||||
|
||||
|
||||
# Defines a camera type
|
||||
class Camera(Protocol):
|
||||
def connect(self): ...
|
||||
def read(self, temporary_color: str | None = None) -> np.ndarray: ...
|
||||
def async_read(self) -> np.ndarray: ...
|
||||
def disconnect(self): ...
|
||||
|
||||
|
||||
def make_cameras_from_configs(camera_configs: dict[str, CameraConfig]) -> list[Camera]:
|
||||
cameras = {}
|
||||
|
||||
for key, cfg in camera_configs.items():
|
||||
if cfg.type == "opencv":
|
||||
from lerobot.common.cameras.opencv import OpenCVCamera
|
||||
|
||||
cameras[key] = OpenCVCamera(cfg)
|
||||
|
||||
elif cfg.type == "intelrealsense":
|
||||
from lerobot.common.cameras.intelrealsense import IntelRealSenseCamera
|
||||
|
||||
cameras[key] = IntelRealSenseCamera(cfg)
|
||||
else:
|
||||
raise ValueError(f"The motor type '{cfg.type}' is not valid.")
|
||||
|
||||
return cameras
|
||||
|
||||
|
||||
def make_camera(camera_type, **kwargs) -> Camera:
|
||||
if camera_type == "opencv":
|
||||
from lerobot.common.cameras.opencv import OpenCVCamera
|
||||
|
||||
config = OpenCVCameraConfig(**kwargs)
|
||||
return OpenCVCamera(config)
|
||||
|
||||
elif camera_type == "intelrealsense":
|
||||
from lerobot.common.cameras.intelrealsense import IntelRealSenseCamera
|
||||
|
||||
config = IntelRealSenseCameraConfig(**kwargs)
|
||||
return IntelRealSenseCamera(config)
|
||||
|
||||
else:
|
||||
raise ValueError(f"The camera type '{camera_type}' is not valid.")
|
||||
Reference in New Issue
Block a user