Add IntelRealSenseCamera (#410)

Co-authored-by: Simon Alibert <simon.alibert@huggingface.co>
Co-authored-by: shantanuparab-tr <shantanu@trossenrobotics.com>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
This commit is contained in:
Remi
2024-09-05 23:59:41 +02:00
committed by GitHub
parent 9d0c6fe419
commit 9c9f5cac90
5 changed files with 496 additions and 18 deletions

View File

@@ -0,0 +1,448 @@
"""
This file contains utilities for recording frames from Intel Realsense cameras.
"""
import argparse
import concurrent.futures
import logging
import shutil
import threading
import time
import traceback
from dataclasses import dataclass, replace
from pathlib import Path
from threading import Thread
import cv2
import numpy as np
import pyrealsense2 as rs
from PIL import Image
from lerobot.common.robot_devices.utils import (
RobotDeviceAlreadyConnectedError,
RobotDeviceNotConnectedError,
)
from lerobot.common.utils.utils import capture_timestamp_utc
from lerobot.scripts.control_robot import busy_wait
SERIAL_NUMBER_INDEX = 1
def find_camera_indices(raise_when_empty=True) -> list[int]:
"""
Find the serial numbers of the Intel RealSense cameras
connected to the computer.
"""
camera_ids = []
for device in rs.context().query_devices():
serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
camera_ids.append(serial_number)
if raise_when_empty and len(camera_ids) == 0:
raise OSError(
"Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
)
return camera_ids
def save_image(img_array, camera_idx, frame_index, images_dir):
try:
img = Image.fromarray(img_array)
path = images_dir / f"camera_{camera_idx}_frame_{frame_index:06d}.png"
path.parent.mkdir(parents=True, exist_ok=True)
img.save(str(path), quality=100)
logging.info(f"Saved image: {path}")
except Exception as e:
logging.error(f"Failed to save image for camera {camera_idx} frame {frame_index}: {e}")
def save_images_from_cameras(
images_dir: Path,
camera_ids: list[int] | None = None,
fps=None,
width=None,
height=None,
record_time_s=2,
):
"""
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
associated to a given camera index.
"""
if camera_ids is None:
camera_ids = find_camera_indices()
print("Connecting cameras")
cameras = []
for cam_idx in camera_ids:
camera = IntelRealSenseCamera(cam_idx, fps=fps, width=width, height=height)
camera.connect()
print(
f"IntelRealSenseCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
)
cameras.append(camera)
images_dir = Path(images_dir)
if images_dir.exists():
shutil.rmtree(
images_dir,
)
images_dir.mkdir(parents=True, exist_ok=True)
print(f"Saving images to {images_dir}")
frame_index = 0
start_time = time.perf_counter()
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
while True:
now = time.perf_counter()
for camera in cameras:
# If we use async_read when fps is None, the loop will go full speed, and we will end up
# saving the same images from the cameras multiple times until the RAM/disk is full.
image = camera.read() if fps is None else camera.async_read()
if image is None:
print("No Frame")
bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
executor.submit(
save_image,
bgr_converted_image,
camera.camera_index,
frame_index,
images_dir,
)
if fps is not None:
dt_s = time.perf_counter() - now
busy_wait(1 / fps - dt_s)
if time.perf_counter() - start_time > record_time_s:
break
print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
frame_index += 1
finally:
print(f"Images have been saved to {images_dir}")
for camera in cameras:
camera.disconnect()
@dataclass
class IntelRealSenseCameraConfig:
"""
Example of tested options for Intel Real Sense D405:
```python
IntelRealSenseCameraConfig(30, 640, 480)
IntelRealSenseCameraConfig(60, 640, 480)
IntelRealSenseCameraConfig(90, 640, 480)
IntelRealSenseCameraConfig(30, 1280, 720)
IntelRealSenseCameraConfig(30, 640, 480, use_depth=True)
```
"""
fps: int | None = None
width: int | None = None
height: int | None = None
color_mode: str = "rgb"
use_depth: bool = False
force_hardware_reset: bool = True
def __post_init__(self):
if self.color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
)
if (self.fps or self.width or self.height) and not (self.fps and self.width and self.height):
raise ValueError(
"For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
f"but {self.fps=}, {self.width=}, {self.height=} were provided."
)
class IntelRealSenseCamera:
"""
The IntelRealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras:
- camera_index corresponds to the serial number of the camera,
- camera_index won't randomly change as it can be the case of OpenCVCamera for Linux,
- read is more reliable than OpenCVCamera,
- depth map can be returned.
To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera:
```bash
python lerobot/common/robot_devices/cameras/intelrealsense.py --images-dir outputs/images_from_intelrealsense_cameras
```
When an IntelRealSenseCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode
of the given camera will be used.
Example of usage:
```python
camera_index = 128422271347
camera = IntelRealSenseCamera(camera_index)
camera.connect()
color_image = camera.read()
# when done using the camera, consider disconnecting
camera.disconnect()
```
Example of changing default fps, width, height and color_mode:
```python
camera = IntelRealSenseCamera(camera_index, fps=30, width=1280, height=720)
camera = connect() # applies the settings, might error out if these settings are not compatible with the camera
camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480)
camera = connect()
camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480, color_mode="bgr")
camera = connect()
```
Example of returning depth:
```python
camera = IntelRealSenseCamera(camera_index, use_depth=True)
camera.connect()
color_image, depth_map = camera.read()
```
"""
def __init__(
self,
camera_index: int,
config: IntelRealSenseCameraConfig | None = None,
**kwargs,
):
if config is None:
config = IntelRealSenseCameraConfig()
# Overwrite the config arguments using kwargs
config = replace(config, **kwargs)
self.camera_index = camera_index
self.fps = config.fps
self.width = config.width
self.height = config.height
self.color_mode = config.color_mode
self.use_depth = config.use_depth
self.force_hardware_reset = config.force_hardware_reset
self.camera = None
self.is_connected = False
self.thread = None
self.stop_event = None
self.color_image = None
self.depth_map = None
self.logs = {}
def connect(self):
if self.is_connected:
raise RobotDeviceAlreadyConnectedError(
f"IntelRealSenseCamera({self.camera_index}) is already connected."
)
config = rs.config()
config.enable_device(str(self.camera_index))
if self.fps and self.width and self.height:
# TODO(rcadene): can we set rgb8 directly?
config.enable_stream(rs.stream.color, self.width, self.height, rs.format.rgb8, self.fps)
else:
config.enable_stream(rs.stream.color)
if self.use_depth:
if self.fps and self.width and self.height:
config.enable_stream(rs.stream.depth, self.width, self.height, rs.format.z16, self.fps)
else:
config.enable_stream(rs.stream.depth)
self.camera = rs.pipeline()
try:
self.camera.start(config)
is_camera_open = True
except RuntimeError:
is_camera_open = False
traceback.print_exc()
# If the camera doesn't work, display the camera indices corresponding to
# valid cameras.
if not is_camera_open:
# Verify that the provided `camera_index` is valid before printing the traceback
available_cam_ids = find_camera_indices()
if self.camera_index not in available_cam_ids:
raise ValueError(
f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
"To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
)
raise OSError(f"Can't access IntelRealSenseCamera({self.camera_index}).")
self.is_connected = True
def read(self, temporary_color: str | None = None) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
"""Read a frame from the camera returned in the format height x width x channels (e.g. 480 x 640 x 3)
of type `np.uint8`, contrarily to the pytorch format which is float channel first.
When `use_depth=True`, returns a tuple `(color_image, depth_map)` with a depth map in the format
height x width (e.g. 480 x 640) of type np.uint16.
Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
"""
if not self.is_connected:
raise RobotDeviceNotConnectedError(
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
start_time = time.perf_counter()
frame = self.camera.wait_for_frames(timeout_ms=5000)
color_frame = frame.get_color_frame()
if not color_frame:
raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.camera_index}).")
color_image = np.asanyarray(color_frame.get_data())
requested_color_mode = self.color_mode if temporary_color is None else temporary_color
if requested_color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
)
# IntelRealSense uses RGB format as default (red, green, blue).
if requested_color_mode == "bgr":
color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
h, w, _ = color_image.shape
if h != self.height or w != self.width:
raise OSError(
f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
)
# log the number of seconds it took to read the image
self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
# log the utc time at which the image was received
self.logs["timestamp_utc"] = capture_timestamp_utc()
if self.use_depth:
depth_frame = frame.get_depth_frame()
if not depth_frame:
raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.camera_index}).")
depth_map = np.asanyarray(depth_frame.get_data())
h, w = depth_map.shape
if h != self.height or w != self.width:
raise OSError(
f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
)
return color_image, depth_map
else:
return color_image
def read_loop(self):
while self.stop_event is None or not self.stop_event.is_set():
if self.use_depth:
self.color_image, self.depth_map = self.read()
else:
self.color_image = self.read()
def async_read(self):
"""Access the latest color image"""
if not self.is_connected:
raise RobotDeviceNotConnectedError(
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
if self.thread is None:
self.stop_event = threading.Event()
self.thread = Thread(target=self.read_loop, args=())
self.thread.daemon = True
self.thread.start()
num_tries = 0
while self.color_image is None:
num_tries += 1
time.sleep(1 / self.fps)
if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
raise Exception(
"The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called."
)
if self.use_depth:
return self.color_image, self.depth_map
else:
return self.color_image
def disconnect(self):
if not self.is_connected:
raise RobotDeviceNotConnectedError(
f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
)
if self.thread is not None and self.thread.is_alive():
# wait for the thread to finish
self.stop_event.set()
self.thread.join()
self.thread = None
self.stop_event = None
self.camera.stop()
self.camera = None
self.is_connected = False
def __del__(self):
if getattr(self, "is_connected", False):
self.disconnect()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Save a few frames using `IntelRealSenseCamera` for all cameras connected to the computer, or a selected subset."
)
parser.add_argument(
"--camera-ids",
type=int,
nargs="*",
default=None,
help="List of camera indices used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
)
parser.add_argument(
"--fps",
type=int,
default=30,
help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
)
parser.add_argument(
"--width",
type=str,
default=640,
help="Set the width for all cameras. If not provided, use the default width of each camera.",
)
parser.add_argument(
"--height",
type=str,
default=480,
help="Set the height for all cameras. If not provided, use the default height of each camera.",
)
parser.add_argument(
"--images-dir",
type=Path,
default="outputs/images_from_intelrealsense_cameras",
help="Set directory to save a few frames for each camera.",
)
parser.add_argument(
"--record-time-s",
type=float,
default=2.0,
help="Set the number of seconds used to record the frames. By default, 2 seconds.",
)
args = parser.parse_args()
save_images_from_cameras(**vars(args))

View File

@@ -80,6 +80,10 @@ def save_image(img_array, camera_index, frame_index, images_dir):
def save_images_from_cameras(
images_dir: Path, camera_ids: list[int] | None = None, fps=None, width=None, height=None, record_time_s=2
):
"""
Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
associated to a given camera index.
"""
if camera_ids is None:
camera_ids = find_camera_indices()
@@ -156,7 +160,7 @@ class OpenCVCameraConfig:
def __post_init__(self):
if self.color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"Expected color_mode values are 'rgb' or 'bgr', but {self.color_mode} is provided."
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
)
@@ -202,6 +206,7 @@ class OpenCVCamera:
def __init__(self, camera_index: int, config: OpenCVCameraConfig | None = None, **kwargs):
if config is None:
config = OpenCVCameraConfig()
# Overwrite config arguments using kwargs
config = replace(config, **kwargs)
@@ -220,7 +225,7 @@ class OpenCVCamera:
def connect(self):
if self.is_connected:
raise RobotDeviceAlreadyConnectedError(f"Camera {self.camera_index} is already connected.")
raise RobotDeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")
# First create a temporary camera trying to access `camera_index`,
# and verify it is a valid camera by calling `isOpened`.
@@ -246,7 +251,7 @@ class OpenCVCamera:
"To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`."
)
raise OSError(f"Can't access camera {self.camera_index}.")
raise OSError(f"Can't access OpenCVCamera({self.camera_index}).")
# Secondly, create the camera that will be used downstream.
# Note: For some unknown reason, calling `isOpened` blocks the camera which then
@@ -269,15 +274,15 @@ class OpenCVCamera:
if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
raise OSError(
f"Can't set {self.fps=} for camera {self.camera_index}. Actual value is {actual_fps}."
f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
)
if self.width is not None and self.width != actual_width:
raise OSError(
f"Can't set {self.width=} for camera {self.camera_index}. Actual value is {actual_width}."
f"Can't set {self.width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
)
if self.height is not None and self.height != actual_height:
raise OSError(
f"Can't set {self.height=} for camera {self.camera_index}. Actual value is {actual_height}."
f"Can't set {self.height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
)
self.fps = actual_fps
@@ -288,7 +293,7 @@ class OpenCVCamera:
def read(self, temporary_color_mode: str | None = None) -> np.ndarray:
"""Read a frame from the camera returned in the format (height, width, channels)
(e.g. (640, 480, 3)), contrarily to the pytorch format which is channel first.
(e.g. 480 x 640 x 3), contrarily to the pytorch format which is channel first.
Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
@@ -311,7 +316,7 @@ class OpenCVCamera:
f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
)
# OpenCV uses BGR format as default (blue, green red) for all operations, including displaying images.
# OpenCV uses BGR format as default (blue, green, red) for all operations, including displaying images.
# However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
# so we convert the image color from BGR to RGB.
if requested_color_mode == "rgb":

View File

@@ -82,26 +82,26 @@ follower_arms:
# on another USB hub or PCIe card.
cameras:
cam_high:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
camera_index: 10
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
camera_index: 128422271347
fps: 30
width: 640
height: 480
cam_low:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
camera_index: 22
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
camera_index: 130322270656
fps: 30
width: 640
height: 480
cam_left_wrist:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
camera_index: 16
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
camera_index: 218622272670
fps: 30
width: 640
height: 480
cam_right_wrist:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
camera_index: 4
_target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera
camera_index: 130322272300
fps: 30
width: 640
height: 480

26
poetry.lock generated
View File

@@ -3194,6 +3194,29 @@ files = [
[package.extras]
diagrams = ["jinja2", "railroad-diagrams"]
[[package]]
name = "pyrealsense2"
version = "2.55.1.6486"
description = "Python Wrapper for Intel Realsense SDK 2.0."
optional = true
python-versions = "*"
files = [
{file = "pyrealsense2-2.55.1.6486-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:882613808289c602b23f2e19bf1fbadd63fb3af9be9c2997cc4ea74741a65136"},
{file = "pyrealsense2-2.55.1.6486-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:686320811ef30c162c7240cb619e9b152420c0a32337a137139276c87f213336"},
{file = "pyrealsense2-2.55.1.6486-cp310-cp310-win_amd64.whl", hash = "sha256:600e7c691c7c50043a2c930471f873da33badce9c5b8c75a8bad499389ac10a4"},
{file = "pyrealsense2-2.55.1.6486-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:178f92caec6e8e212a7ced66c678dec47462b0b77e929fa576d02eea297bb177"},
{file = "pyrealsense2-2.55.1.6486-cp311-cp311-win_amd64.whl", hash = "sha256:e01939a63bac3e1a4da742f7e1dbc618a4ec03ee0f7b3690ae5d1ad0c983aca8"},
{file = "pyrealsense2-2.55.1.6486-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5a40216386b49a520b5817afe97efa9a53471747b765e8b4e6ca549678945c04"},
{file = "pyrealsense2-2.55.1.6486-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:affb80b0cad7a732db4e450e9d4a8f7193499e3b35c0ce0b3e67fde5b1e9cf64"},
{file = "pyrealsense2-2.55.1.6486-cp37-cp37m-win_amd64.whl", hash = "sha256:93f81f0955037a325529d93059138e5036fc5e51d5fda4b3c88ae4287fa0b3ed"},
{file = "pyrealsense2-2.55.1.6486-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0f51350ea100215dedc757ea7872ec23342a1d84015e87583911912d882c8ce2"},
{file = "pyrealsense2-2.55.1.6486-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:3695ae04d423d6404db4d0a756c66a0f122f9e0858c91d3dcee132adbef35b62"},
{file = "pyrealsense2-2.55.1.6486-cp38-cp38-win_amd64.whl", hash = "sha256:f06ea7adadcdcb7d3334b8f067e4f7a361f6421a763988897d52602937c716de"},
{file = "pyrealsense2-2.55.1.6486-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:18b5650b1ffcc2c2a42c9f72870d291509afc5819db757f5f365c42a8aae4129"},
{file = "pyrealsense2-2.55.1.6486-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:aac4fb7f9a36ff6ecaa3bf0565f3baa9327b02dd843f14933eece8a4455c6c79"},
{file = "pyrealsense2-2.55.1.6486-cp39-cp39-win_amd64.whl", hash = "sha256:5cbede3cd35946f3051ae6df42619ea01419c58379533c596bbad5dbf648c25b"},
]
[[package]]
name = "pyserial"
version = "3.5"
@@ -4553,6 +4576,7 @@ aloha = ["gym-aloha"]
dev = ["debugpy", "pre-commit"]
dora = ["gym-dora"]
dynamixel = ["dynamixel-sdk", "pynput"]
intelrealsense = ["pyrealsense2"]
pusht = ["gym-pusht"]
test = ["pytest", "pytest-cov"]
umi = ["imagecodecs"]
@@ -4562,4 +4586,4 @@ xarm = ["gym-xarm"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "781e1ca86ed53f76d1b28066a91cc591630886f3a908a691a5aa26146793a02c"
content-hash = "06a8a1941b75c3ec78ade6f8b2c3ad7b5d2f1516b590fa3d5a773add73f6dbec"

View File

@@ -66,7 +66,7 @@ dynamixel-sdk = {version = ">=3.7.31", optional = true}
pynput = {version = ">=1.7.7", optional = true}
# TODO(rcadene, salibert): 71.0.1 has a bug
setuptools = {version = "!=71.0.1", optional = true}
pyrealsense2 = {version = ">=2.55.1.6486", markers = "sys_platform != 'darwin'", optional = true}
[tool.poetry.extras]
@@ -79,6 +79,7 @@ test = ["pytest", "pytest-cov"]
umi = ["imagecodecs"]
video_benchmark = ["scikit-image", "pandas"]
dynamixel = ["dynamixel-sdk", "pynput"]
intelrealsense = ["pyrealsense2"]
[tool.ruff]
line-length = 110