512 lines
18 KiB
Python
512 lines
18 KiB
Python
"""
|
|
This file contains utilities for recording frames from Intel Realsense cameras.
|
|
"""
|
|
|
|
import argparse
|
|
import concurrent.futures
|
|
import logging
|
|
import math
|
|
import shutil
|
|
import threading
|
|
import time
|
|
import traceback
|
|
from dataclasses import dataclass, replace
|
|
from pathlib import Path
|
|
from threading import Thread
|
|
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
from lerobot.common.robot_devices.utils import (
|
|
RobotDeviceAlreadyConnectedError,
|
|
RobotDeviceNotConnectedError,
|
|
)
|
|
from lerobot.common.utils.utils import capture_timestamp_utc
|
|
from lerobot.scripts.control_robot import busy_wait
|
|
|
|
SERIAL_NUMBER_INDEX = 1
|
|
|
|
|
|
def find_camera_indices(raise_when_empty=True, mock=False) -> list[int]:
|
|
"""
|
|
Find the serial numbers of the Intel RealSense cameras
|
|
connected to the computer.
|
|
"""
|
|
if mock:
|
|
from tests.mock_pyrealsense2 import (
|
|
RSCameraInfo,
|
|
RSContext,
|
|
)
|
|
else:
|
|
from pyrealsense2 import camera_info as RSCameraInfo # noqa: N812
|
|
from pyrealsense2 import context as RSContext # noqa: N812
|
|
|
|
camera_ids = []
|
|
for device in RSContext().query_devices():
|
|
serial_number = int(device.get_info(RSCameraInfo(SERIAL_NUMBER_INDEX)))
|
|
camera_ids.append(serial_number)
|
|
|
|
if raise_when_empty and len(camera_ids) == 0:
|
|
raise OSError(
|
|
"Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
|
|
)
|
|
|
|
return camera_ids
|
|
|
|
|
|
def save_image(img_array, camera_idx, frame_index, images_dir):
|
|
try:
|
|
img = Image.fromarray(img_array)
|
|
path = images_dir / f"camera_{camera_idx}_frame_{frame_index:06d}.png"
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
img.save(str(path), quality=100)
|
|
logging.info(f"Saved image: {path}")
|
|
except Exception as e:
|
|
logging.error(f"Failed to save image for camera {camera_idx} frame {frame_index}: {e}")
|
|
|
|
|
|
def save_images_from_cameras(
|
|
images_dir: Path,
|
|
camera_ids: list[int] | None = None,
|
|
fps=None,
|
|
width=None,
|
|
height=None,
|
|
record_time_s=2,
|
|
mock=False,
|
|
):
|
|
"""
|
|
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
|
|
associated to a given camera index.
|
|
"""
|
|
if camera_ids is None:
|
|
camera_ids = find_camera_indices(mock=mock)
|
|
|
|
if mock:
|
|
from tests.mock_cv2 import COLOR_RGB2BGR, cvtColor
|
|
else:
|
|
from cv2 import COLOR_RGB2BGR, cvtColor
|
|
|
|
print("Connecting cameras")
|
|
cameras = []
|
|
for cam_idx in camera_ids:
|
|
camera = IntelRealSenseCamera(cam_idx, fps=fps, width=width, height=height, mock=mock)
|
|
camera.connect()
|
|
print(
|
|
f"IntelRealSenseCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
|
|
)
|
|
cameras.append(camera)
|
|
|
|
images_dir = Path(images_dir)
|
|
if images_dir.exists():
|
|
shutil.rmtree(
|
|
images_dir,
|
|
)
|
|
images_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f"Saving images to {images_dir}")
|
|
frame_index = 0
|
|
start_time = time.perf_counter()
|
|
try:
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
|
while True:
|
|
now = time.perf_counter()
|
|
|
|
for camera in cameras:
|
|
# If we use async_read when fps is None, the loop will go full speed, and we will end up
|
|
# saving the same images from the cameras multiple times until the RAM/disk is full.
|
|
image = camera.read() if fps is None else camera.async_read()
|
|
if image is None:
|
|
print("No Frame")
|
|
|
|
bgr_converted_image = cvtColor(image, COLOR_RGB2BGR)
|
|
|
|
executor.submit(
|
|
save_image,
|
|
bgr_converted_image,
|
|
camera.camera_index,
|
|
frame_index,
|
|
images_dir,
|
|
)
|
|
|
|
if fps is not None:
|
|
dt_s = time.perf_counter() - now
|
|
busy_wait(1 / fps - dt_s)
|
|
|
|
if time.perf_counter() - start_time > record_time_s:
|
|
break
|
|
|
|
print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
|
|
|
|
frame_index += 1
|
|
finally:
|
|
print(f"Images have been saved to {images_dir}")
|
|
for camera in cameras:
|
|
camera.disconnect()
|
|
|
|
|
|
@dataclass
|
|
class IntelRealSenseCameraConfig:
|
|
"""
|
|
Example of tested options for Intel Real Sense D405:
|
|
|
|
```python
|
|
IntelRealSenseCameraConfig(30, 640, 480)
|
|
IntelRealSenseCameraConfig(60, 640, 480)
|
|
IntelRealSenseCameraConfig(90, 640, 480)
|
|
IntelRealSenseCameraConfig(30, 1280, 720)
|
|
IntelRealSenseCameraConfig(30, 640, 480, use_depth=True)
|
|
```
|
|
"""
|
|
|
|
fps: int | None = None
|
|
width: int | None = None
|
|
height: int | None = None
|
|
color_mode: str = "rgb"
|
|
use_depth: bool = False
|
|
force_hardware_reset: bool = True
|
|
mock: bool = False
|
|
|
|
def __post_init__(self):
|
|
if self.color_mode not in ["rgb", "bgr"]:
|
|
raise ValueError(
|
|
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
|
|
)
|
|
|
|
at_least_one_is_not_none = self.fps is not None or self.width is not None or self.height is not None
|
|
at_least_one_is_none = self.fps is None or self.width is None or self.height is None
|
|
if at_least_one_is_not_none and at_least_one_is_none:
|
|
raise ValueError(
|
|
"For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
|
|
f"but {self.fps=}, {self.width=}, {self.height=} were provided."
|
|
)
|
|
|
|
|
|
class IntelRealSenseCamera:
|
|
"""
|
|
The IntelRealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras:
|
|
- camera_index corresponds to the serial number of the camera,
|
|
- camera_index won't randomly change as it can be the case of OpenCVCamera for Linux,
|
|
- read is more reliable than OpenCVCamera,
|
|
- depth map can be returned.
|
|
|
|
To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera:
|
|
```bash
|
|
python lerobot/common/robot_devices/cameras/intelrealsense.py --images-dir outputs/images_from_intelrealsense_cameras
|
|
```
|
|
|
|
When an IntelRealSenseCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode
|
|
of the given camera will be used.
|
|
|
|
Example of usage:
|
|
```python
|
|
camera_index = 128422271347
|
|
camera = IntelRealSenseCamera(camera_index)
|
|
camera.connect()
|
|
color_image = camera.read()
|
|
# when done using the camera, consider disconnecting
|
|
camera.disconnect()
|
|
```
|
|
|
|
Example of changing default fps, width, height and color_mode:
|
|
```python
|
|
camera = IntelRealSenseCamera(camera_index, fps=30, width=1280, height=720)
|
|
camera = connect() # applies the settings, might error out if these settings are not compatible with the camera
|
|
|
|
camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480)
|
|
camera = connect()
|
|
|
|
camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480, color_mode="bgr")
|
|
camera = connect()
|
|
```
|
|
|
|
Example of returning depth:
|
|
```python
|
|
camera = IntelRealSenseCamera(camera_index, use_depth=True)
|
|
camera.connect()
|
|
color_image, depth_map = camera.read()
|
|
```
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
camera_index: int,
|
|
config: IntelRealSenseCameraConfig | None = None,
|
|
**kwargs,
|
|
):
|
|
if config is None:
|
|
config = IntelRealSenseCameraConfig()
|
|
|
|
# Overwrite the config arguments using kwargs
|
|
config = replace(config, **kwargs)
|
|
|
|
self.camera_index = camera_index
|
|
self.fps = config.fps
|
|
self.width = config.width
|
|
self.height = config.height
|
|
self.color_mode = config.color_mode
|
|
self.use_depth = config.use_depth
|
|
self.force_hardware_reset = config.force_hardware_reset
|
|
self.mock = config.mock
|
|
|
|
self.camera = None
|
|
self.is_connected = False
|
|
self.thread = None
|
|
self.stop_event = None
|
|
self.color_image = None
|
|
self.depth_map = None
|
|
self.logs = {}
|
|
|
|
def connect(self):
|
|
if self.is_connected:
|
|
raise RobotDeviceAlreadyConnectedError(
|
|
f"IntelRealSenseCamera({self.camera_index}) is already connected."
|
|
)
|
|
|
|
if self.mock:
|
|
from tests.mock_pyrealsense2 import (
|
|
RSConfig,
|
|
RSFormat,
|
|
RSPipeline,
|
|
RSStream,
|
|
)
|
|
else:
|
|
from pyrealsense2 import config as RSConfig # noqa: N812
|
|
from pyrealsense2 import format as RSFormat # noqa: N812
|
|
from pyrealsense2 import pipeline as RSPipeline # noqa: N812
|
|
from pyrealsense2 import stream as RSStream # noqa: N812
|
|
|
|
config = RSConfig()
|
|
config.enable_device(str(self.camera_index))
|
|
|
|
if self.fps and self.width and self.height:
|
|
# TODO(rcadene): can we set rgb8 directly?
|
|
config.enable_stream(RSStream.color, self.width, self.height, RSFormat.rgb8, self.fps)
|
|
else:
|
|
config.enable_stream(RSStream.color)
|
|
|
|
if self.use_depth:
|
|
if self.fps and self.width and self.height:
|
|
config.enable_stream(RSStream.depth, self.width, self.height, RSFormat.z16, self.fps)
|
|
else:
|
|
config.enable_stream(RSStream.depth)
|
|
|
|
self.camera = RSPipeline()
|
|
try:
|
|
profile = self.camera.start(config)
|
|
is_camera_open = True
|
|
except RuntimeError:
|
|
is_camera_open = False
|
|
traceback.print_exc()
|
|
|
|
# If the camera doesn't work, display the camera indices corresponding to
|
|
# valid cameras.
|
|
if not is_camera_open:
|
|
# Verify that the provided `camera_index` is valid before printing the traceback
|
|
available_cam_ids = find_camera_indices()
|
|
if self.camera_index not in available_cam_ids:
|
|
raise ValueError(
|
|
f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
|
|
"To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
|
|
)
|
|
|
|
raise OSError(f"Can't access IntelRealSenseCamera({self.camera_index}).")
|
|
|
|
color_stream = profile.get_stream(RSStream.color)
|
|
color_profile = color_stream.as_video_stream_profile()
|
|
actual_fps = color_profile.fps()
|
|
actual_width = color_profile.width()
|
|
actual_height = color_profile.height()
|
|
|
|
# Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
|
|
if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
|
|
# Using `OSError` since it's a broad that encompasses issues related to device communication
|
|
raise OSError(
|
|
f"Can't set {self.fps=} for IntelRealSenseCamera({self.camera_index}). Actual value is {actual_fps}."
|
|
)
|
|
if self.width is not None and self.width != actual_width:
|
|
raise OSError(
|
|
f"Can't set {self.width=} for IntelRealSenseCamera({self.camera_index}). Actual value is {actual_width}."
|
|
)
|
|
if self.height is not None and self.height != actual_height:
|
|
raise OSError(
|
|
f"Can't set {self.height=} for IntelRealSenseCamera({self.camera_index}). Actual value is {actual_height}."
|
|
)
|
|
|
|
self.fps = round(actual_fps)
|
|
self.width = round(actual_width)
|
|
self.height = round(actual_height)
|
|
|
|
self.is_connected = True
|
|
|
|
def read(self, temporary_color: str | None = None) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
|
|
"""Read a frame from the camera returned in the format height x width x channels (e.g. 480 x 640 x 3)
|
|
of type `np.uint8`, contrarily to the pytorch format which is float channel first.
|
|
|
|
When `use_depth=True`, returns a tuple `(color_image, depth_map)` with a depth map in the format
|
|
height x width (e.g. 480 x 640) of type np.uint16.
|
|
|
|
Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
|
|
If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
|
|
"""
|
|
if not self.is_connected:
|
|
raise RobotDeviceNotConnectedError(
|
|
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
|
)
|
|
|
|
start_time = time.perf_counter()
|
|
|
|
frame = self.camera.wait_for_frames(timeout_ms=5000)
|
|
|
|
color_frame = frame.get_color_frame()
|
|
|
|
if not color_frame:
|
|
raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.camera_index}).")
|
|
|
|
color_image = np.asanyarray(color_frame.get_data())
|
|
|
|
requested_color_mode = self.color_mode if temporary_color is None else temporary_color
|
|
if requested_color_mode not in ["rgb", "bgr"]:
|
|
raise ValueError(
|
|
f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
|
|
)
|
|
|
|
# IntelRealSense uses RGB format as default (red, green, blue).
|
|
if requested_color_mode == "bgr":
|
|
if self.mock:
|
|
from tests.mock_cv2 import COLOR_RGB2BGR, cvtColor
|
|
else:
|
|
from cv2 import COLOR_RGB2BGR, cvtColor
|
|
|
|
color_image = cvtColor(color_image, COLOR_RGB2BGR)
|
|
|
|
h, w, _ = color_image.shape
|
|
if h != self.height or w != self.width:
|
|
raise OSError(
|
|
f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
|
|
)
|
|
|
|
# log the number of seconds it took to read the image
|
|
self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
|
|
|
|
# log the utc time at which the image was received
|
|
self.logs["timestamp_utc"] = capture_timestamp_utc()
|
|
|
|
if self.use_depth:
|
|
depth_frame = frame.get_depth_frame()
|
|
if not depth_frame:
|
|
raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.camera_index}).")
|
|
|
|
depth_map = np.asanyarray(depth_frame.get_data())
|
|
|
|
h, w = depth_map.shape
|
|
if h != self.height or w != self.width:
|
|
raise OSError(
|
|
f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
|
|
)
|
|
|
|
return color_image, depth_map
|
|
else:
|
|
return color_image
|
|
|
|
def read_loop(self):
|
|
while not self.stop_event.is_set():
|
|
if self.use_depth:
|
|
self.color_image, self.depth_map = self.read()
|
|
else:
|
|
self.color_image = self.read()
|
|
|
|
def async_read(self):
|
|
"""Access the latest color image"""
|
|
if not self.is_connected:
|
|
raise RobotDeviceNotConnectedError(
|
|
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
|
)
|
|
|
|
if self.thread is None:
|
|
self.stop_event = threading.Event()
|
|
self.thread = Thread(target=self.read_loop, args=())
|
|
self.thread.daemon = True
|
|
self.thread.start()
|
|
|
|
num_tries = 0
|
|
while self.color_image is None:
|
|
# TODO(rcadene, aliberts): intelrealsense has diverged compared to opencv over here
|
|
num_tries += 1
|
|
time.sleep(1 / self.fps)
|
|
if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
|
|
raise Exception(
|
|
"The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called."
|
|
)
|
|
|
|
if self.use_depth:
|
|
return self.color_image, self.depth_map
|
|
else:
|
|
return self.color_image
|
|
|
|
def disconnect(self):
|
|
if not self.is_connected:
|
|
raise RobotDeviceNotConnectedError(
|
|
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
|
|
)
|
|
|
|
if self.thread is not None and self.thread.is_alive():
|
|
# wait for the thread to finish
|
|
self.stop_event.set()
|
|
self.thread.join()
|
|
self.thread = None
|
|
self.stop_event = None
|
|
|
|
self.camera.stop()
|
|
self.camera = None
|
|
|
|
self.is_connected = False
|
|
|
|
def __del__(self):
|
|
if getattr(self, "is_connected", False):
|
|
self.disconnect()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="Save a few frames using `IntelRealSenseCamera` for all cameras connected to the computer, or a selected subset."
|
|
)
|
|
parser.add_argument(
|
|
"--camera-ids",
|
|
type=int,
|
|
nargs="*",
|
|
default=None,
|
|
help="List of camera indices used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
|
|
)
|
|
parser.add_argument(
|
|
"--fps",
|
|
type=int,
|
|
default=30,
|
|
help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
|
|
)
|
|
parser.add_argument(
|
|
"--width",
|
|
type=str,
|
|
default=640,
|
|
help="Set the width for all cameras. If not provided, use the default width of each camera.",
|
|
)
|
|
parser.add_argument(
|
|
"--height",
|
|
type=str,
|
|
default=480,
|
|
help="Set the height for all cameras. If not provided, use the default height of each camera.",
|
|
)
|
|
parser.add_argument(
|
|
"--images-dir",
|
|
type=Path,
|
|
default="outputs/images_from_intelrealsense_cameras",
|
|
help="Set directory to save a few frames for each camera.",
|
|
)
|
|
parser.add_argument(
|
|
"--record-time-s",
|
|
type=float,
|
|
default=2.0,
|
|
help="Set the number of seconds used to record the frames. By default, 2 seconds.",
|
|
)
|
|
args = parser.parse_args()
|
|
save_images_from_cameras(**vars(args))
|