diff --git a/lerobot/common/robot_devices/cameras/intelrealsense.py b/lerobot/common/robot_devices/cameras/intelrealsense.py new file mode 100644 index 000000000..4806bf785 --- /dev/null +++ b/lerobot/common/robot_devices/cameras/intelrealsense.py @@ -0,0 +1,448 @@ +""" +This file contains utilities for recording frames from Intel Realsense cameras. +""" + +import argparse +import concurrent.futures +import logging +import shutil +import threading +import time +import traceback +from dataclasses import dataclass, replace +from pathlib import Path +from threading import Thread + +import cv2 +import numpy as np +import pyrealsense2 as rs +from PIL import Image + +from lerobot.common.robot_devices.utils import ( + RobotDeviceAlreadyConnectedError, + RobotDeviceNotConnectedError, +) +from lerobot.common.utils.utils import capture_timestamp_utc +from lerobot.scripts.control_robot import busy_wait + +SERIAL_NUMBER_INDEX = 1 + + +def find_camera_indices(raise_when_empty=True) -> list[int]: + """ + Find the serial numbers of the Intel RealSense cameras + connected to the computer. + """ + camera_ids = [] + for device in rs.context().query_devices(): + serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX))) + camera_ids.append(serial_number) + + if raise_when_empty and len(camera_ids) == 0: + raise OSError( + "Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware." + ) + + return camera_ids + + +def save_image(img_array, camera_idx, frame_index, images_dir): + try: + img = Image.fromarray(img_array) + path = images_dir / f"camera_{camera_idx}_frame_{frame_index:06d}.png" + path.parent.mkdir(parents=True, exist_ok=True) + img.save(str(path), quality=100) + logging.info(f"Saved image: {path}") + except Exception as e: + logging.error(f"Failed to save image for camera {camera_idx} frame {frame_index}: {e}") + + +def save_images_from_cameras( + images_dir: Path, + camera_ids: list[int] | None = None, + fps=None, + width=None, + height=None, + record_time_s=2, +): + """ + Initializes all the cameras and saves images to the directory. Useful to visually identify the camera + associated to a given camera index. + """ + if camera_ids is None: + camera_ids = find_camera_indices() + + print("Connecting cameras") + cameras = [] + for cam_idx in camera_ids: + camera = IntelRealSenseCamera(cam_idx, fps=fps, width=width, height=height) + camera.connect() + print( + f"IntelRealSenseCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})" + ) + cameras.append(camera) + + images_dir = Path(images_dir) + if images_dir.exists(): + shutil.rmtree( + images_dir, + ) + images_dir.mkdir(parents=True, exist_ok=True) + + print(f"Saving images to {images_dir}") + frame_index = 0 + start_time = time.perf_counter() + try: + with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + while True: + now = time.perf_counter() + + for camera in cameras: + # If we use async_read when fps is None, the loop will go full speed, and we will end up + # saving the same images from the cameras multiple times until the RAM/disk is full. + image = camera.read() if fps is None else camera.async_read() + if image is None: + print("No Frame") + bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + + executor.submit( + save_image, + bgr_converted_image, + camera.camera_index, + frame_index, + images_dir, + ) + + if fps is not None: + dt_s = time.perf_counter() - now + busy_wait(1 / fps - dt_s) + + if time.perf_counter() - start_time > record_time_s: + break + + print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}") + + frame_index += 1 + finally: + print(f"Images have been saved to {images_dir}") + for camera in cameras: + camera.disconnect() + + +@dataclass +class IntelRealSenseCameraConfig: + """ + Example of tested options for Intel Real Sense D405: + + ```python + IntelRealSenseCameraConfig(30, 640, 480) + IntelRealSenseCameraConfig(60, 640, 480) + IntelRealSenseCameraConfig(90, 640, 480) + IntelRealSenseCameraConfig(30, 1280, 720) + IntelRealSenseCameraConfig(30, 640, 480, use_depth=True) + ``` + """ + + fps: int | None = None + width: int | None = None + height: int | None = None + color_mode: str = "rgb" + use_depth: bool = False + force_hardware_reset: bool = True + + def __post_init__(self): + if self.color_mode not in ["rgb", "bgr"]: + raise ValueError( + f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided." + ) + + if (self.fps or self.width or self.height) and not (self.fps and self.width and self.height): + raise ValueError( + "For `fps`, `width` and `height`, either all of them need to be set, or none of them, " + f"but {self.fps=}, {self.width=}, {self.height=} were provided." + ) + + +class IntelRealSenseCamera: + """ + The IntelRealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras: + - camera_index corresponds to the serial number of the camera, + - camera_index won't randomly change as it can be the case of OpenCVCamera for Linux, + - read is more reliable than OpenCVCamera, + - depth map can be returned. + + To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera: + ```bash + python lerobot/common/robot_devices/cameras/intelrealsense.py --images-dir outputs/images_from_intelrealsense_cameras + ``` + + When an IntelRealSenseCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode + of the given camera will be used. + + Example of usage: + ```python + camera_index = 128422271347 + camera = IntelRealSenseCamera(camera_index) + camera.connect() + color_image = camera.read() + # when done using the camera, consider disconnecting + camera.disconnect() + ``` + + Example of changing default fps, width, height and color_mode: + ```python + camera = IntelRealSenseCamera(camera_index, fps=30, width=1280, height=720) + camera = connect() # applies the settings, might error out if these settings are not compatible with the camera + + camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480) + camera = connect() + + camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480, color_mode="bgr") + camera = connect() + ``` + + Example of returning depth: + ```python + camera = IntelRealSenseCamera(camera_index, use_depth=True) + camera.connect() + color_image, depth_map = camera.read() + ``` + """ + + def __init__( + self, + camera_index: int, + config: IntelRealSenseCameraConfig | None = None, + **kwargs, + ): + if config is None: + config = IntelRealSenseCameraConfig() + + # Overwrite the config arguments using kwargs + config = replace(config, **kwargs) + + self.camera_index = camera_index + self.fps = config.fps + self.width = config.width + self.height = config.height + self.color_mode = config.color_mode + self.use_depth = config.use_depth + self.force_hardware_reset = config.force_hardware_reset + + self.camera = None + self.is_connected = False + self.thread = None + self.stop_event = None + self.color_image = None + self.depth_map = None + self.logs = {} + + def connect(self): + if self.is_connected: + raise RobotDeviceAlreadyConnectedError( + f"IntelRealSenseCamera({self.camera_index}) is already connected." + ) + + config = rs.config() + config.enable_device(str(self.camera_index)) + + if self.fps and self.width and self.height: + # TODO(rcadene): can we set rgb8 directly? + config.enable_stream(rs.stream.color, self.width, self.height, rs.format.rgb8, self.fps) + else: + config.enable_stream(rs.stream.color) + + if self.use_depth: + if self.fps and self.width and self.height: + config.enable_stream(rs.stream.depth, self.width, self.height, rs.format.z16, self.fps) + else: + config.enable_stream(rs.stream.depth) + + self.camera = rs.pipeline() + try: + self.camera.start(config) + is_camera_open = True + except RuntimeError: + is_camera_open = False + traceback.print_exc() + + # If the camera doesn't work, display the camera indices corresponding to + # valid cameras. + if not is_camera_open: + # Verify that the provided `camera_index` is valid before printing the traceback + available_cam_ids = find_camera_indices() + if self.camera_index not in available_cam_ids: + raise ValueError( + f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. " + "To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`." + ) + + raise OSError(f"Can't access IntelRealSenseCamera({self.camera_index}).") + + self.is_connected = True + + def read(self, temporary_color: str | None = None) -> np.ndarray | tuple[np.ndarray, np.ndarray]: + """Read a frame from the camera returned in the format height x width x channels (e.g. 480 x 640 x 3) + of type `np.uint8`, contrarily to the pytorch format which is float channel first. + + When `use_depth=True`, returns a tuple `(color_image, depth_map)` with a depth map in the format + height x width (e.g. 480 x 640) of type np.uint16. + + Note: Reading a frame is done every `camera.fps` times per second, and it is blocking. + If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`. + """ + if not self.is_connected: + raise RobotDeviceNotConnectedError( + f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first." + ) + + start_time = time.perf_counter() + + frame = self.camera.wait_for_frames(timeout_ms=5000) + + color_frame = frame.get_color_frame() + + if not color_frame: + raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.camera_index}).") + + color_image = np.asanyarray(color_frame.get_data()) + + requested_color_mode = self.color_mode if temporary_color is None else temporary_color + if requested_color_mode not in ["rgb", "bgr"]: + raise ValueError( + f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided." + ) + + # IntelRealSense uses RGB format as default (red, green, blue). + if requested_color_mode == "bgr": + color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR) + + h, w, _ = color_image.shape + if h != self.height or w != self.width: + raise OSError( + f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead." + ) + + # log the number of seconds it took to read the image + self.logs["delta_timestamp_s"] = time.perf_counter() - start_time + + # log the utc time at which the image was received + self.logs["timestamp_utc"] = capture_timestamp_utc() + + if self.use_depth: + depth_frame = frame.get_depth_frame() + if not depth_frame: + raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.camera_index}).") + + depth_map = np.asanyarray(depth_frame.get_data()) + + h, w = depth_map.shape + if h != self.height or w != self.width: + raise OSError( + f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead." + ) + + return color_image, depth_map + else: + return color_image + + def read_loop(self): + while self.stop_event is None or not self.stop_event.is_set(): + if self.use_depth: + self.color_image, self.depth_map = self.read() + else: + self.color_image = self.read() + + def async_read(self): + """Access the latest color image""" + if not self.is_connected: + raise RobotDeviceNotConnectedError( + f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first." + ) + + if self.thread is None: + self.stop_event = threading.Event() + self.thread = Thread(target=self.read_loop, args=()) + self.thread.daemon = True + self.thread.start() + + num_tries = 0 + while self.color_image is None: + num_tries += 1 + time.sleep(1 / self.fps) + if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()): + raise Exception( + "The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called." + ) + + if self.use_depth: + return self.color_image, self.depth_map + else: + return self.color_image + + def disconnect(self): + if not self.is_connected: + raise RobotDeviceNotConnectedError( + f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first." + ) + + if self.thread is not None and self.thread.is_alive(): + # wait for the thread to finish + self.stop_event.set() + self.thread.join() + self.thread = None + self.stop_event = None + + self.camera.stop() + self.camera = None + + self.is_connected = False + + def __del__(self): + if getattr(self, "is_connected", False): + self.disconnect() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Save a few frames using `IntelRealSenseCamera` for all cameras connected to the computer, or a selected subset." + ) + parser.add_argument( + "--camera-ids", + type=int, + nargs="*", + default=None, + help="List of camera indices used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.", + ) + parser.add_argument( + "--fps", + type=int, + default=30, + help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.", + ) + parser.add_argument( + "--width", + type=str, + default=640, + help="Set the width for all cameras. If not provided, use the default width of each camera.", + ) + parser.add_argument( + "--height", + type=str, + default=480, + help="Set the height for all cameras. If not provided, use the default height of each camera.", + ) + parser.add_argument( + "--images-dir", + type=Path, + default="outputs/images_from_intelrealsense_cameras", + help="Set directory to save a few frames for each camera.", + ) + parser.add_argument( + "--record-time-s", + type=float, + default=2.0, + help="Set the number of seconds used to record the frames. By default, 2 seconds.", + ) + args = parser.parse_args() + save_images_from_cameras(**vars(args)) diff --git a/lerobot/common/robot_devices/cameras/opencv.py b/lerobot/common/robot_devices/cameras/opencv.py index c26de6c50..b066a451a 100644 --- a/lerobot/common/robot_devices/cameras/opencv.py +++ b/lerobot/common/robot_devices/cameras/opencv.py @@ -80,6 +80,10 @@ def save_image(img_array, camera_index, frame_index, images_dir): def save_images_from_cameras( images_dir: Path, camera_ids: list[int] | None = None, fps=None, width=None, height=None, record_time_s=2 ): + """ + Initializes all the cameras and saves images to the directory. Useful to visually identify the camera + associated to a given camera index. + """ if camera_ids is None: camera_ids = find_camera_indices() @@ -156,7 +160,7 @@ class OpenCVCameraConfig: def __post_init__(self): if self.color_mode not in ["rgb", "bgr"]: raise ValueError( - f"Expected color_mode values are 'rgb' or 'bgr', but {self.color_mode} is provided." + f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided." ) @@ -202,6 +206,7 @@ class OpenCVCamera: def __init__(self, camera_index: int, config: OpenCVCameraConfig | None = None, **kwargs): if config is None: config = OpenCVCameraConfig() + # Overwrite config arguments using kwargs config = replace(config, **kwargs) @@ -220,7 +225,7 @@ class OpenCVCamera: def connect(self): if self.is_connected: - raise RobotDeviceAlreadyConnectedError(f"Camera {self.camera_index} is already connected.") + raise RobotDeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.") # First create a temporary camera trying to access `camera_index`, # and verify it is a valid camera by calling `isOpened`. @@ -246,7 +251,7 @@ class OpenCVCamera: "To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`." ) - raise OSError(f"Can't access camera {self.camera_index}.") + raise OSError(f"Can't access OpenCVCamera({self.camera_index}).") # Secondly, create the camera that will be used downstream. # Note: For some unknown reason, calling `isOpened` blocks the camera which then @@ -269,15 +274,15 @@ class OpenCVCamera: if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3): raise OSError( - f"Can't set {self.fps=} for camera {self.camera_index}. Actual value is {actual_fps}." + f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}." ) if self.width is not None and self.width != actual_width: raise OSError( - f"Can't set {self.width=} for camera {self.camera_index}. Actual value is {actual_width}." + f"Can't set {self.width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}." ) if self.height is not None and self.height != actual_height: raise OSError( - f"Can't set {self.height=} for camera {self.camera_index}. Actual value is {actual_height}." + f"Can't set {self.height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}." ) self.fps = actual_fps @@ -288,7 +293,7 @@ class OpenCVCamera: def read(self, temporary_color_mode: str | None = None) -> np.ndarray: """Read a frame from the camera returned in the format (height, width, channels) - (e.g. (640, 480, 3)), contrarily to the pytorch format which is channel first. + (e.g. 480 x 640 x 3), contrarily to the pytorch format which is channel first. Note: Reading a frame is done every `camera.fps` times per second, and it is blocking. If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`. @@ -311,7 +316,7 @@ class OpenCVCamera: f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided." ) - # OpenCV uses BGR format as default (blue, green red) for all operations, including displaying images. + # OpenCV uses BGR format as default (blue, green, red) for all operations, including displaying images. # However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks, # so we convert the image color from BGR to RGB. if requested_color_mode == "rgb": diff --git a/lerobot/configs/robot/aloha.yaml b/lerobot/configs/robot/aloha.yaml index d8366acdc..2380cfeba 100644 --- a/lerobot/configs/robot/aloha.yaml +++ b/lerobot/configs/robot/aloha.yaml @@ -82,26 +82,26 @@ follower_arms: # on another USB hub or PCIe card. cameras: cam_high: - _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera - camera_index: 10 + _target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera + camera_index: 128422271347 fps: 30 width: 640 height: 480 cam_low: - _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera - camera_index: 22 + _target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera + camera_index: 130322270656 fps: 30 width: 640 height: 480 cam_left_wrist: - _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera - camera_index: 16 + _target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera + camera_index: 218622272670 fps: 30 width: 640 height: 480 cam_right_wrist: - _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera - camera_index: 4 + _target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera + camera_index: 130322272300 fps: 30 width: 640 height: 480 diff --git a/poetry.lock b/poetry.lock index 8b8350fc3..37f89e5c1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3194,6 +3194,29 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pyrealsense2" +version = "2.55.1.6486" +description = "Python Wrapper for Intel Realsense SDK 2.0." +optional = true +python-versions = "*" +files = [ + {file = "pyrealsense2-2.55.1.6486-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:882613808289c602b23f2e19bf1fbadd63fb3af9be9c2997cc4ea74741a65136"}, + {file = "pyrealsense2-2.55.1.6486-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:686320811ef30c162c7240cb619e9b152420c0a32337a137139276c87f213336"}, + {file = "pyrealsense2-2.55.1.6486-cp310-cp310-win_amd64.whl", hash = "sha256:600e7c691c7c50043a2c930471f873da33badce9c5b8c75a8bad499389ac10a4"}, + {file = "pyrealsense2-2.55.1.6486-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:178f92caec6e8e212a7ced66c678dec47462b0b77e929fa576d02eea297bb177"}, + {file = "pyrealsense2-2.55.1.6486-cp311-cp311-win_amd64.whl", hash = "sha256:e01939a63bac3e1a4da742f7e1dbc618a4ec03ee0f7b3690ae5d1ad0c983aca8"}, + {file = "pyrealsense2-2.55.1.6486-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5a40216386b49a520b5817afe97efa9a53471747b765e8b4e6ca549678945c04"}, + {file = "pyrealsense2-2.55.1.6486-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:affb80b0cad7a732db4e450e9d4a8f7193499e3b35c0ce0b3e67fde5b1e9cf64"}, + {file = "pyrealsense2-2.55.1.6486-cp37-cp37m-win_amd64.whl", hash = "sha256:93f81f0955037a325529d93059138e5036fc5e51d5fda4b3c88ae4287fa0b3ed"}, + {file = "pyrealsense2-2.55.1.6486-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0f51350ea100215dedc757ea7872ec23342a1d84015e87583911912d882c8ce2"}, + {file = "pyrealsense2-2.55.1.6486-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:3695ae04d423d6404db4d0a756c66a0f122f9e0858c91d3dcee132adbef35b62"}, + {file = "pyrealsense2-2.55.1.6486-cp38-cp38-win_amd64.whl", hash = "sha256:f06ea7adadcdcb7d3334b8f067e4f7a361f6421a763988897d52602937c716de"}, + {file = "pyrealsense2-2.55.1.6486-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:18b5650b1ffcc2c2a42c9f72870d291509afc5819db757f5f365c42a8aae4129"}, + {file = "pyrealsense2-2.55.1.6486-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:aac4fb7f9a36ff6ecaa3bf0565f3baa9327b02dd843f14933eece8a4455c6c79"}, + {file = "pyrealsense2-2.55.1.6486-cp39-cp39-win_amd64.whl", hash = "sha256:5cbede3cd35946f3051ae6df42619ea01419c58379533c596bbad5dbf648c25b"}, +] + [[package]] name = "pyserial" version = "3.5" @@ -4553,6 +4576,7 @@ aloha = ["gym-aloha"] dev = ["debugpy", "pre-commit"] dora = ["gym-dora"] dynamixel = ["dynamixel-sdk", "pynput"] +intelrealsense = ["pyrealsense2"] pusht = ["gym-pusht"] test = ["pytest", "pytest-cov"] umi = ["imagecodecs"] @@ -4562,4 +4586,4 @@ xarm = ["gym-xarm"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "781e1ca86ed53f76d1b28066a91cc591630886f3a908a691a5aa26146793a02c" +content-hash = "06a8a1941b75c3ec78ade6f8b2c3ad7b5d2f1516b590fa3d5a773add73f6dbec" diff --git a/pyproject.toml b/pyproject.toml index 85affc7cd..5aa500528 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dynamixel-sdk = {version = ">=3.7.31", optional = true} pynput = {version = ">=1.7.7", optional = true} # TODO(rcadene, salibert): 71.0.1 has a bug setuptools = {version = "!=71.0.1", optional = true} - +pyrealsense2 = {version = ">=2.55.1.6486", markers = "sys_platform != 'darwin'", optional = true} [tool.poetry.extras] @@ -79,6 +79,7 @@ test = ["pytest", "pytest-cov"] umi = ["imagecodecs"] video_benchmark = ["scikit-image", "pandas"] dynamixel = ["dynamixel-sdk", "pynput"] +intelrealsense = ["pyrealsense2"] [tool.ruff] line-length = 110