Add support for Stretch (hello-robot) (#409)

Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Remi Cadene <re.cadene@gmail.com>
2024-10-04 18:56:42 +02:00
parent 26f97cfd17
commit 1a343c3591
20 changed files with 5052 additions and 1652 deletions
--- a/lerobot/common/robot_devices/cameras/intelrealsense.py
+++ b/lerobot/common/robot_devices/cameras/intelrealsense.py
@@ -10,6 +10,7 @@ import shutil
 import threading
 import time
 import traceback
+from collections import Counter
 from dataclasses import dataclass, replace
 from pathlib import Path
 from threading import Thread
@@ -27,47 +28,49 @@ from lerobot.scripts.control_robot import busy_wait
 SERIAL_NUMBER_INDEX = 1


-def find_camera_indices(raise_when_empty=True, mock=False) -> list[int]:
+def find_cameras(raise_when_empty=True, mock=False) -> list[dict]:
    """
-    Find the serial numbers of the Intel RealSense cameras
+    Find the names and the serial numbers of the Intel RealSense cameras
    connected to the computer.
    """
    if mock:
-        from tests.mock_pyrealsense2 import (
-            RSCameraInfo,
-            RSContext,
-        )
+        import tests.mock_pyrealsense2 as rs
    else:
-        from pyrealsense2 import camera_info as RSCameraInfo  # noqa: N812
-        from pyrealsense2 import context as RSContext  # noqa: N812
+        import pyrealsense2 as rs

-    camera_ids = []
-    for device in RSContext().query_devices():
-        serial_number = int(device.get_info(RSCameraInfo(SERIAL_NUMBER_INDEX)))
-        camera_ids.append(serial_number)
+    cameras = []
+    for device in rs.context().query_devices():
+        serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
+        name = device.get_info(rs.camera_info.name)
+        cameras.append(
+            {
+                "serial_number": serial_number,
+                "name": name,
+            }
+        )

-    if raise_when_empty and len(camera_ids) == 0:
+    if raise_when_empty and len(cameras) == 0:
        raise OSError(
            "Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
        )

-    return camera_ids
+    return cameras


-def save_image(img_array, camera_idx, frame_index, images_dir):
+def save_image(img_array, serial_number, frame_index, images_dir):
    try:
        img = Image.fromarray(img_array)
-        path = images_dir / f"camera_{camera_idx}_frame_{frame_index:06d}.png"
+        path = images_dir / f"camera_{serial_number}_frame_{frame_index:06d}.png"
        path.parent.mkdir(parents=True, exist_ok=True)
        img.save(str(path), quality=100)
        logging.info(f"Saved image: {path}")
    except Exception as e:
-        logging.error(f"Failed to save image for camera {camera_idx} frame {frame_index}: {e}")
+        logging.error(f"Failed to save image for camera {serial_number} frame {frame_index}: {e}")


 def save_images_from_cameras(
    images_dir: Path,
-    camera_ids: list[int] | None = None,
+    serial_numbers: list[int] | None = None,
    fps=None,
    width=None,
    height=None,
@@ -76,23 +79,25 @@ def save_images_from_cameras(
 ):
    """
    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
-    associated to a given camera index.
+    associated to a given serial number.
    """
-    if camera_ids is None:
-        camera_ids = find_camera_indices(mock=mock)
+    if serial_numbers is None or len(serial_numbers) == 0:
+        camera_infos = find_cameras(mock=mock)
+        serial_numbers = [cam["serial_number"] for cam in camera_infos]

    if mock:
-        from tests.mock_cv2 import COLOR_RGB2BGR, cvtColor
+        import tests.mock_cv2 as cv2
    else:
-        from cv2 import COLOR_RGB2BGR, cvtColor
+        import cv2

    print("Connecting cameras")
    cameras = []
-    for cam_idx in camera_ids:
-        camera = IntelRealSenseCamera(cam_idx, fps=fps, width=width, height=height, mock=mock)
+    for cam_sn in serial_numbers:
+        print(f"{cam_sn=}")
+        camera = IntelRealSenseCamera(cam_sn, fps=fps, width=width, height=height, mock=mock)
        camera.connect()
        print(
-            f"IntelRealSenseCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
+            f"IntelRealSenseCamera({camera.serial_number}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
        )
        cameras.append(camera)

@@ -107,7 +112,7 @@ def save_images_from_cameras(
    frame_index = 0
    start_time = time.perf_counter()
    try:
-        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
            while True:
                now = time.perf_counter()

@@ -118,12 +123,12 @@ def save_images_from_cameras(
                    if image is None:
                        print("No Frame")

-                    bgr_converted_image = cvtColor(image, COLOR_RGB2BGR)
+                    bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                    executor.submit(
                        save_image,
                        bgr_converted_image,
-                        camera.camera_index,
+                        camera.serial_number,
                        frame_index,
                        images_dir,
                    )
@@ -155,6 +160,7 @@ class IntelRealSenseCameraConfig:
    IntelRealSenseCameraConfig(90, 640, 480)
    IntelRealSenseCameraConfig(30, 1280, 720)
    IntelRealSenseCameraConfig(30, 640, 480, use_depth=True)
+    IntelRealSenseCameraConfig(30, 640, 480, rotation=90)
    ```
    """

@@ -164,6 +170,7 @@ class IntelRealSenseCameraConfig:
    color_mode: str = "rgb"
    use_depth: bool = False
    force_hardware_reset: bool = True
+    rotation: int | None = None
    mock: bool = False

    def __post_init__(self):
@@ -180,13 +187,15 @@ class IntelRealSenseCameraConfig:
                f"but {self.fps=}, {self.width=}, {self.height=} were provided."
            )

+        if self.rotation not in [-90, None, 90, 180]:
+            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
+

 class IntelRealSenseCamera:
    """
    The IntelRealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras:
-    - camera_index corresponds to the serial number of the camera,
-    - camera_index won't randomly change as it can be the case of OpenCVCamera for Linux,
-    - read is more reliable than OpenCVCamera,
+    - is instantiated with the serial number of the camera - won't randomly change as it can be the case of OpenCVCamera for Linux,
+    - can also be instantiated with the camera's name — if it's unique — using IntelRealSenseCamera.init_from_name(),
    - depth map can be returned.

    To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera:
@@ -199,8 +208,10 @@ class IntelRealSenseCamera:

    Example of usage:
    ```python
-    camera_index = 128422271347
-    camera = IntelRealSenseCamera(camera_index)
+    # Instantiate with its serial number
+    camera = IntelRealSenseCamera(128422271347)
+    # Or by its name if it's unique
+    camera = IntelRealSenseCamera.init_from_name("Intel RealSense D405")
    camera.connect()
    color_image = camera.read()
    # when done using the camera, consider disconnecting
@@ -209,19 +220,19 @@ class IntelRealSenseCamera:

    Example of changing default fps, width, height and color_mode:
    ```python
-    camera = IntelRealSenseCamera(camera_index, fps=30, width=1280, height=720)
+    camera = IntelRealSenseCamera(serial_number, fps=30, width=1280, height=720)
    camera = connect()  # applies the settings, might error out if these settings are not compatible with the camera

-    camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480)
+    camera = IntelRealSenseCamera(serial_number, fps=90, width=640, height=480)
    camera = connect()

-    camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480, color_mode="bgr")
+    camera = IntelRealSenseCamera(serial_number, fps=90, width=640, height=480, color_mode="bgr")
    camera = connect()
    ```

    Example of returning depth:
    ```python
-    camera = IntelRealSenseCamera(camera_index, use_depth=True)
+    camera = IntelRealSenseCamera(serial_number, use_depth=True)
    camera.connect()
    color_image, depth_map = camera.read()
    ```
@@ -229,7 +240,7 @@ class IntelRealSenseCamera:

    def __init__(
        self,
-        camera_index: int,
+        serial_number: int,
        config: IntelRealSenseCameraConfig | None = None,
        **kwargs,
    ):
@@ -239,7 +250,7 @@ class IntelRealSenseCamera:
        # Overwrite the config arguments using kwargs
        config = replace(config, **kwargs)

-        self.camera_index = camera_index
+        self.serial_number = serial_number
        self.fps = config.fps
        self.width = config.width
        self.height = config.height
@@ -256,41 +267,69 @@ class IntelRealSenseCamera:
        self.depth_map = None
        self.logs = {}

+        if self.mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
+        # TODO(alibets): Do we keep original width/height or do we define them after rotation?
+        self.rotation = None
+        if config.rotation == -90:
+            self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
+        elif config.rotation == 90:
+            self.rotation = cv2.ROTATE_90_CLOCKWISE
+        elif config.rotation == 180:
+            self.rotation = cv2.ROTATE_180
+
+    @classmethod
+    def init_from_name(cls, name: str, config: IntelRealSenseCameraConfig | None = None, **kwargs):
+        camera_infos = find_cameras()
+        camera_names = [cam["name"] for cam in camera_infos]
+        this_name_count = Counter(camera_names)[name]
+        if this_name_count > 1:
+            # TODO(aliberts): Test this with multiple identical cameras (Aloha)
+            raise ValueError(
+                f"Multiple {name} cameras have been detected. Please use their serial number to instantiate them."
+            )
+
+        name_to_serial_dict = {cam["name"]: cam["serial_number"] for cam in camera_infos}
+        cam_sn = name_to_serial_dict[name]
+
+        if config is None:
+            config = IntelRealSenseCameraConfig()
+
+        # Overwrite the config arguments using kwargs
+        config = replace(config, **kwargs)
+
+        return cls(serial_number=cam_sn, config=config, **kwargs)
+
    def connect(self):
        if self.is_connected:
            raise RobotDeviceAlreadyConnectedError(
-                f"IntelRealSenseCamera({self.camera_index}) is already connected."
+                f"IntelRealSenseCamera({self.serial_number}) is already connected."
            )

        if self.mock:
-            from tests.mock_pyrealsense2 import (
-                RSConfig,
-                RSFormat,
-                RSPipeline,
-                RSStream,
-            )
+            import tests.mock_pyrealsense2 as rs
        else:
-            from pyrealsense2 import config as RSConfig  # noqa: N812
-            from pyrealsense2 import format as RSFormat  # noqa: N812
-            from pyrealsense2 import pipeline as RSPipeline  # noqa: N812
-            from pyrealsense2 import stream as RSStream  # noqa: N812
+            import pyrealsense2 as rs

-        config = RSConfig()
-        config.enable_device(str(self.camera_index))
+        config = rs.config()
+        config.enable_device(str(self.serial_number))

        if self.fps and self.width and self.height:
            # TODO(rcadene): can we set rgb8 directly?
-            config.enable_stream(RSStream.color, self.width, self.height, RSFormat.rgb8, self.fps)
+            config.enable_stream(rs.stream.color, self.width, self.height, rs.format.rgb8, self.fps)
        else:
-            config.enable_stream(RSStream.color)
+            config.enable_stream(rs.stream.color)

        if self.use_depth:
            if self.fps and self.width and self.height:
-                config.enable_stream(RSStream.depth, self.width, self.height, RSFormat.z16, self.fps)
+                config.enable_stream(rs.stream.depth, self.width, self.height, rs.format.z16, self.fps)
            else:
-                config.enable_stream(RSStream.depth)
+                config.enable_stream(rs.stream.depth)

-        self.camera = RSPipeline()
+        self.camera = rs.pipeline()
        try:
            profile = self.camera.start(config)
            is_camera_open = True
@@ -301,17 +340,18 @@ class IntelRealSenseCamera:
        # If the camera doesn't work, display the camera indices corresponding to
        # valid cameras.
        if not is_camera_open:
-            # Verify that the provided `camera_index` is valid before printing the traceback
-            available_cam_ids = find_camera_indices()
-            if self.camera_index not in available_cam_ids:
+            # Verify that the provided `serial_number` is valid before printing the traceback
+            camera_infos = find_cameras()
+            serial_numbers = [cam["serial_number"] for cam in camera_infos]
+            if self.serial_number not in serial_numbers:
                raise ValueError(
-                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
-                    "To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
+                    f"`serial_number` is expected to be one of these available cameras {serial_numbers}, but {self.serial_number} is provided instead. "
+                    "To find the serial number you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
                )

-            raise OSError(f"Can't access IntelRealSenseCamera({self.camera_index}).")
+            raise OSError(f"Can't access IntelRealSenseCamera({self.serial_number}).")

-        color_stream = profile.get_stream(RSStream.color)
+        color_stream = profile.get_stream(rs.stream.color)
        color_profile = color_stream.as_video_stream_profile()
        actual_fps = color_profile.fps()
        actual_width = color_profile.width()
@@ -321,15 +361,15 @@ class IntelRealSenseCamera:
        if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
            # Using `OSError` since it's a broad that encompasses issues related to device communication
            raise OSError(
-                f"Can't set {self.fps=} for IntelRealSenseCamera({self.camera_index}). Actual value is {actual_fps}."
+                f"Can't set {self.fps=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_fps}."
            )
        if self.width is not None and self.width != actual_width:
            raise OSError(
-                f"Can't set {self.width=} for IntelRealSenseCamera({self.camera_index}). Actual value is {actual_width}."
+                f"Can't set {self.width=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_width}."
            )
        if self.height is not None and self.height != actual_height:
            raise OSError(
-                f"Can't set {self.height=} for IntelRealSenseCamera({self.camera_index}). Actual value is {actual_height}."
+                f"Can't set {self.height=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_height}."
            )

        self.fps = round(actual_fps)
@@ -350,9 +390,14 @@ class IntelRealSenseCamera:
        """
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
-                f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
+                f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
            )

+        if self.mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
        start_time = time.perf_counter()

        frame = self.camera.wait_for_frames(timeout_ms=5000)
@@ -360,7 +405,7 @@ class IntelRealSenseCamera:
        color_frame = frame.get_color_frame()

        if not color_frame:
-            raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.camera_index}).")
+            raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.serial_number}).")

        color_image = np.asanyarray(color_frame.get_data())

@@ -372,12 +417,7 @@ class IntelRealSenseCamera:

        # IntelRealSense uses RGB format as default (red, green, blue).
        if requested_color_mode == "bgr":
-            if self.mock:
-                from tests.mock_cv2 import COLOR_RGB2BGR, cvtColor
-            else:
-                from cv2 import COLOR_RGB2BGR, cvtColor
-
-            color_image = cvtColor(color_image, COLOR_RGB2BGR)
+            color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)

        h, w, _ = color_image.shape
        if h != self.height or w != self.width:
@@ -385,6 +425,9 @@ class IntelRealSenseCamera:
                f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
            )

+        if self.rotation is not None:
+            color_image = cv2.rotate(color_image, self.rotation)
+
        # log the number of seconds it took to read the image
        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time

@@ -394,7 +437,7 @@ class IntelRealSenseCamera:
        if self.use_depth:
            depth_frame = frame.get_depth_frame()
            if not depth_frame:
-                raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.camera_index}).")
+                raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.serial_number}).")

            depth_map = np.asanyarray(depth_frame.get_data())

@@ -404,6 +447,9 @@ class IntelRealSenseCamera:
                    f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
                )

+            if self.rotation is not None:
+                depth_map = cv2.rotate(depth_map, self.rotation)
+
            return color_image, depth_map
        else:
            return color_image
@@ -419,7 +465,7 @@ class IntelRealSenseCamera:
        """Access the latest color image"""
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
-                f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
+                f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
            )

        if self.thread is None:
@@ -446,7 +492,7 @@ class IntelRealSenseCamera:
    def disconnect(self):
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
-                f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
+                f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
            )

        if self.thread is not None and self.thread.is_alive():
@@ -471,11 +517,11 @@ if __name__ == "__main__":
        description="Save a few frames using `IntelRealSenseCamera` for all cameras connected to the computer, or a selected subset."
    )
    parser.add_argument(
-        "--camera-ids",
+        "--serial-numbers",
        type=int,
        nargs="*",
        default=None,
-        help="List of camera indices used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
+        help="List of serial numbers used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
    )
    parser.add_argument(
        "--fps",
--- a/lerobot/common/robot_devices/cameras/opencv.py
+++ b/lerobot/common/robot_devices/cameras/opencv.py
@@ -31,29 +31,48 @@ from lerobot.common.utils.utils import capture_timestamp_utc
 MAX_OPENCV_INDEX = 60


-def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False):
+def find_cameras(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False) -> list[dict]:
+    cameras = []
    if platform.system() == "Linux":
-        # Linux uses camera ports
        print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
-        possible_camera_ids = []
-        for port in Path("/dev").glob("video*"):
-            camera_idx = int(str(port).replace("/dev/video", ""))
-            possible_camera_ids.append(camera_idx)
+        possible_ports = [str(port) for port in Path("/dev").glob("video*")]
+        ports = _find_cameras(possible_ports, mock=mock)
+        for port in ports:
+            cameras.append(
+                {
+                    "port": port,
+                    "index": int(port.removeprefix("/dev/video")),
+                }
+            )
    else:
        print(
            "Mac or Windows detected. Finding available camera indices through "
            f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
        )
-        possible_camera_ids = range(max_index_search_range)
+        possible_indices = range(max_index_search_range)
+        indices = _find_cameras(possible_indices, mock=mock)
+        for index in indices:
+            cameras.append(
+                {
+                    "port": None,
+                    "index": index,
+                }
+            )

+    return cameras
+
+
+def _find_cameras(
+    possible_camera_ids: list[int | str], raise_when_empty=False, mock=False
+) -> list[int | str]:
    if mock:
-        from tests.mock_cv2 import VideoCapture
+        import tests.mock_cv2 as cv2
    else:
-        from cv2 import VideoCapture
+        import cv2

    camera_ids = []
    for camera_idx in possible_camera_ids:
-        camera = VideoCapture(camera_idx)
+        camera = cv2.VideoCapture(camera_idx)
        is_open = camera.isOpened()
        camera.release()

@@ -70,6 +89,16 @@ def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENC
    return camera_ids


+def is_valid_unix_path(path: str) -> bool:
+    """Note: if 'path' points to a symlink, this will return True only if the target exists"""
+    p = Path(path)
+    return p.is_absolute() and p.exists()
+
+
+def get_camera_index_from_unix_port(port: Path) -> int:
+    return int(str(port.resolve()).removeprefix("/dev/video"))
+
+
 def save_image(img_array, camera_index, frame_index, images_dir):
    img = Image.fromarray(img_array)
    path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
@@ -79,7 +108,7 @@ def save_image(img_array, camera_index, frame_index, images_dir):

 def save_images_from_cameras(
    images_dir: Path,
-    camera_ids: list[int] | None = None,
+    camera_ids: list | None = None,
    fps=None,
    width=None,
    height=None,
@@ -90,8 +119,9 @@ def save_images_from_cameras(
    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
    associated to a given camera index.
    """
-    if camera_ids is None:
-        camera_ids = find_camera_indices(mock=mock)
+    if camera_ids is None or len(camera_ids) == 0:
+        camera_infos = find_cameras(mock=mock)
+        camera_ids = [cam["index"] for cam in camera_infos]

    print("Connecting cameras")
    cameras = []
@@ -114,7 +144,7 @@ def save_images_from_cameras(
    print(f"Saving images to {images_dir}")
    frame_index = 0
    start_time = time.perf_counter()
-    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        while True:
            now = time.perf_counter()

@@ -126,7 +156,7 @@ def save_images_from_cameras(
                executor.submit(
                    save_image,
                    image,
-                    camera.camera_index,
+                    camera.index,
                    frame_index,
                    images_dir,
                )
@@ -135,11 +165,11 @@ def save_images_from_cameras(
                dt_s = time.perf_counter() - now
                busy_wait(1 / fps - dt_s)

+            print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
+
            if time.perf_counter() - start_time > record_time_s:
                break

-            print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
-
            frame_index += 1

    print(f"Images have been saved to {images_dir}")
@@ -162,6 +192,7 @@ class OpenCVCameraConfig:
    width: int | None = None
    height: int | None = None
    color_mode: str = "rgb"
+    rotation: int | None = None
    mock: bool = False

    def __post_init__(self):
@@ -170,6 +201,9 @@ class OpenCVCameraConfig:
                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
            )

+        if self.rotation not in [-90, None, 90, 180]:
+            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
+

 class OpenCVCamera:
    """
@@ -210,7 +244,7 @@ class OpenCVCamera:
    ```
    """

-    def __init__(self, camera_index: int, config: OpenCVCameraConfig | None = None, **kwargs):
+    def __init__(self, camera_index: int | str, config: OpenCVCameraConfig | None = None, **kwargs):
        if config is None:
            config = OpenCVCameraConfig()

@@ -218,6 +252,19 @@ class OpenCVCamera:
        config = replace(config, **kwargs)

        self.camera_index = camera_index
+        self.port = None
+
+        # Linux uses ports for connecting to cameras
+        if platform.system() == "Linux":
+            if isinstance(self.camera_index, int):
+                self.port = Path(f"/dev/video{self.camera_index}")
+            elif isinstance(self.camera_index, str) and is_valid_unix_path(self.camera_index):
+                self.port = Path(self.camera_index)
+                # Retrieve the camera index from a potentially symlinked path
+                self.camera_index = get_camera_index_from_unix_port(self.port)
+            else:
+                raise ValueError(f"Please check the provided camera_index: {camera_index}")
+
        self.fps = config.fps
        self.width = config.width
        self.height = config.height
@@ -231,34 +278,37 @@ class OpenCVCamera:
        self.color_image = None
        self.logs = {}

+        if self.mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
+        # TODO(aliberts): Do we keep original width/height or do we define them after rotation?
+        self.rotation = None
+        if config.rotation == -90:
+            self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
+        elif config.rotation == 90:
+            self.rotation = cv2.ROTATE_90_CLOCKWISE
+        elif config.rotation == 180:
+            self.rotation = cv2.ROTATE_180
+
    def connect(self):
        if self.is_connected:
            raise RobotDeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")

        if self.mock:
-            from tests.mock_cv2 import (
-                CAP_PROP_FPS,
-                CAP_PROP_FRAME_HEIGHT,
-                CAP_PROP_FRAME_WIDTH,
-                VideoCapture,
-            )
+            import tests.mock_cv2 as cv2
        else:
-            from cv2 import (
-                CAP_PROP_FPS,
-                CAP_PROP_FRAME_HEIGHT,
-                CAP_PROP_FRAME_WIDTH,
-                VideoCapture,
-                setNumThreads,
-            )
+            import cv2

            # Use 1 thread to avoid blocking the main thread. Especially useful during data collection
            # when other threads are used to save the images.
-            setNumThreads(1)
+            cv2.setNumThreads(1)

        camera_idx = f"/dev/video{self.camera_index}" if platform.system() == "Linux" else self.camera_index
        # First create a temporary camera trying to access `camera_index`,
        # and verify it is a valid camera by calling `isOpened`.
-        tmp_camera = VideoCapture(camera_idx)
+        tmp_camera = cv2.VideoCapture(camera_idx)
        is_camera_open = tmp_camera.isOpened()
        # Release camera to make it accessible for `find_camera_indices`
        tmp_camera.release()
@@ -268,7 +318,8 @@ class OpenCVCamera:
        # valid cameras.
        if not is_camera_open:
            # Verify that the provided `camera_index` is valid before printing the traceback
-            available_cam_ids = find_camera_indices()
+            cameras_info = find_cameras()
+            available_cam_ids = [cam["index"] for cam in cameras_info]
            if self.camera_index not in available_cam_ids:
                raise ValueError(
                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
@@ -280,18 +331,18 @@ class OpenCVCamera:
        # Secondly, create the camera that will be used downstream.
        # Note: For some unknown reason, calling `isOpened` blocks the camera which then
        # needs to be re-created.
-        self.camera = VideoCapture(camera_idx)
+        self.camera = cv2.VideoCapture(camera_idx)

        if self.fps is not None:
-            self.camera.set(CAP_PROP_FPS, self.fps)
+            self.camera.set(cv2.CAP_PROP_FPS, self.fps)
        if self.width is not None:
-            self.camera.set(CAP_PROP_FRAME_WIDTH, self.width)
+            self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
        if self.height is not None:
-            self.camera.set(CAP_PROP_FRAME_HEIGHT, self.height)
+            self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height)

-        actual_fps = self.camera.get(CAP_PROP_FPS)
-        actual_width = self.camera.get(CAP_PROP_FRAME_WIDTH)
-        actual_height = self.camera.get(CAP_PROP_FRAME_HEIGHT)
+        actual_fps = self.camera.get(cv2.CAP_PROP_FPS)
+        actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
+        actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)

        # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
        if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
@@ -299,11 +350,11 @@ class OpenCVCamera:
            raise OSError(
                f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
            )
-        if self.width is not None and self.width != actual_width:
+        if self.width is not None and not math.isclose(self.width, actual_width, rel_tol=1e-3):
            raise OSError(
                f"Can't set {self.width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
            )
-        if self.height is not None and self.height != actual_height:
+        if self.height is not None and not math.isclose(self.height, actual_height, rel_tol=1e-3):
            raise OSError(
                f"Can't set {self.height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
            )
@@ -345,11 +396,11 @@ class OpenCVCamera:
        # so we convert the image color from BGR to RGB.
        if requested_color_mode == "rgb":
            if self.mock:
-                from tests.mock_cv2 import COLOR_BGR2RGB, cvtColor
+                import tests.mock_cv2 as cv2
            else:
-                from cv2 import COLOR_BGR2RGB, cvtColor
+                import cv2

-            color_image = cvtColor(color_image, COLOR_BGR2RGB)
+            color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)

        h, w, _ = color_image.shape
        if h != self.height or w != self.width:
@@ -357,6 +408,9 @@ class OpenCVCamera:
                f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
            )

+        if self.rotation is not None:
+            color_image = cv2.rotate(color_image, self.rotation)
+
        # log the number of seconds it took to read the image
        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time

@@ -455,7 +509,7 @@ if __name__ == "__main__":
    parser.add_argument(
        "--record-time-s",
        type=float,
-        default=2.0,
+        default=4.0,
        help="Set the number of seconds used to record the frames. By default, 2 seconds.",
    )
    args = parser.parse_args()
--- a/lerobot/common/robot_devices/cameras/utils.py
+++ b/lerobot/common/robot_devices/cameras/utils.py
@@ -1,55 +1,8 @@
-from pathlib import Path
 from typing import Protocol

-import cv2
-import einops
 import numpy as np


-def write_shape_on_image_inplace(image):
-    height, width = image.shape[:2]
-    text = f"Width: {width} Height: {height}"
-
-    # Define the font, scale, color, and thickness
-    font = cv2.FONT_HERSHEY_SIMPLEX
-    font_scale = 1
-    color = (255, 0, 0)  # Blue in BGR
-    thickness = 2
-
-    position = (10, height - 10)  # 10 pixels from the bottom-left corner
-    cv2.putText(image, text, position, font, font_scale, color, thickness)
-
-
-def save_color_image(image, path, write_shape=False):
-    path = Path(path)
-    path.parent.mkdir(parents=True, exist_ok=True)
-    if write_shape:
-        write_shape_on_image_inplace(image)
-    cv2.imwrite(str(path), image)
-
-
-def save_depth_image(depth, path, write_shape=False):
-    path = Path(path)
-    path.parent.mkdir(parents=True, exist_ok=True)
-
-    # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
-    depth_image = cv2.applyColorMap(cv2.convertScaleAbs(depth, alpha=0.03), cv2.COLORMAP_JET)
-
-    if write_shape:
-        write_shape_on_image_inplace(depth_image)
-    cv2.imwrite(str(path), depth_image)
-
-
-def convert_torch_image_to_cv2(tensor, rgb_to_bgr=True):
-    assert tensor.ndim == 3
-    c, h, w = tensor.shape
-    assert c < h and c < w
-    color_image = einops.rearrange(tensor, "c h w -> h w c").numpy()
-    if rgb_to_bgr:
-        color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
-    return color_image
-
-
 # Defines a camera type
 class Camera(Protocol):
    def connect(self): ...
--- a/lerobot/common/robot_devices/motors/dynamixel.py
+++ b/lerobot/common/robot_devices/motors/dynamixel.py
@@ -159,30 +159,25 @@ def convert_to_bytes(value, bytes, mock=False):
    if mock:
        return value

-    from dynamixel_sdk import (
-        DXL_HIBYTE,
-        DXL_HIWORD,
-        DXL_LOBYTE,
-        DXL_LOWORD,
-    )
+    import dynamixel_sdk as dxl

    # Note: No need to convert back into unsigned int, since this byte preprocessing
    # already handles it for us.
    if bytes == 1:
        data = [
-            DXL_LOBYTE(DXL_LOWORD(value)),
+            dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
        ]
    elif bytes == 2:
        data = [
-            DXL_LOBYTE(DXL_LOWORD(value)),
-            DXL_HIBYTE(DXL_LOWORD(value)),
+            dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
+            dxl.DXL_HIBYTE(dxl.DXL_LOWORD(value)),
        ]
    elif bytes == 4:
        data = [
-            DXL_LOBYTE(DXL_LOWORD(value)),
-            DXL_HIBYTE(DXL_LOWORD(value)),
-            DXL_LOBYTE(DXL_HIWORD(value)),
-            DXL_HIBYTE(DXL_HIWORD(value)),
+            dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
+            dxl.DXL_HIBYTE(dxl.DXL_LOWORD(value)),
+            dxl.DXL_LOBYTE(dxl.DXL_HIWORD(value)),
+            dxl.DXL_HIBYTE(dxl.DXL_HIWORD(value)),
        ]
    else:
        raise NotImplementedError(
@@ -361,12 +356,12 @@ class DynamixelMotorsBus:
            )

        if self.mock:
-            from tests.mock_dynamixel_sdk import PacketHandler, PortHandler
+            import tests.mock_dynamixel_sdk as dxl
        else:
-            from dynamixel_sdk import PacketHandler, PortHandler
+            import dynamixel_sdk as dxl

-        self.port_handler = PortHandler(self.port)
-        self.packet_handler = PacketHandler(PROTOCOL_VERSION)
+        self.port_handler = dxl.PortHandler(self.port)
+        self.packet_handler = dxl.PacketHandler(PROTOCOL_VERSION)

        try:
            if not self.port_handler.openPort():
@@ -399,12 +394,12 @@ class DynamixelMotorsBus:

    def reconnect(self):
        if self.mock:
-            from tests.mock_dynamixel_sdk import PacketHandler, PortHandler
+            import tests.mock_dynamixel_sdk as dxl
        else:
-            from dynamixel_sdk import PacketHandler, PortHandler
+            import dynamixel_sdk as dxl

-        self.port_handler = PortHandler(self.port)
-        self.packet_handler = PacketHandler(PROTOCOL_VERSION)
+        self.port_handler = dxl.PortHandler(self.port)
+        self.packet_handler = dxl.PacketHandler(PROTOCOL_VERSION)

        if not self.port_handler.openPort():
            raise OSError(f"Failed to open port '{self.port}'.")
@@ -795,9 +790,9 @@ class DynamixelMotorsBus:

    def _read_with_motor_ids(self, motor_models, motor_ids, data_name):
        if self.mock:
-            from tests.mock_dynamixel_sdk import COMM_SUCCESS, GroupSyncRead
+            import tests.mock_dynamixel_sdk as dxl
        else:
-            from dynamixel_sdk import COMM_SUCCESS, GroupSyncRead
+            import dynamixel_sdk as dxl

        return_list = True
        if not isinstance(motor_ids, list):
@@ -806,12 +801,12 @@ class DynamixelMotorsBus:

        assert_same_address(self.model_ctrl_table, self.motor_models, data_name)
        addr, bytes = self.model_ctrl_table[motor_models[0]][data_name]
-        group = GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
+        group = dxl.GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
        for idx in motor_ids:
            group.addParam(idx)

        comm = group.txRxPacket()
-        if comm != COMM_SUCCESS:
+        if comm != dxl.COMM_SUCCESS:
            raise ConnectionError(
                f"Read failed due to communication error on port {self.port_handler.port_name} for indices {motor_ids}: "
                f"{self.packet_handler.getTxRxResult(comm)}"
@@ -836,9 +831,9 @@ class DynamixelMotorsBus:
        start_time = time.perf_counter()

        if self.mock:
-            from tests.mock_dynamixel_sdk import COMM_SUCCESS, GroupSyncRead
+            import tests.mock_dynamixel_sdk as dxl
        else:
-            from dynamixel_sdk import COMM_SUCCESS, GroupSyncRead
+            import dynamixel_sdk as dxl

        if motor_names is None:
            motor_names = self.motor_names
@@ -859,16 +854,18 @@ class DynamixelMotorsBus:

        if data_name not in self.group_readers:
            # create new group reader
-            self.group_readers[group_key] = GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
+            self.group_readers[group_key] = dxl.GroupSyncRead(
+                self.port_handler, self.packet_handler, addr, bytes
+            )
            for idx in motor_ids:
                self.group_readers[group_key].addParam(idx)

        for _ in range(NUM_READ_RETRY):
            comm = self.group_readers[group_key].txRxPacket()
-            if comm == COMM_SUCCESS:
+            if comm == dxl.COMM_SUCCESS:
                break

-        if comm != COMM_SUCCESS:
+        if comm != dxl.COMM_SUCCESS:
            raise ConnectionError(
                f"Read failed due to communication error on port {self.port} for group_key {group_key}: "
                f"{self.packet_handler.getTxRxResult(comm)}"
@@ -900,9 +897,9 @@ class DynamixelMotorsBus:

    def _write_with_motor_ids(self, motor_models, motor_ids, data_name, values):
        if self.mock:
-            from tests.mock_dynamixel_sdk import COMM_SUCCESS, GroupSyncWrite
+            import tests.mock_dynamixel_sdk as dxl
        else:
-            from dynamixel_sdk import COMM_SUCCESS, GroupSyncWrite
+            import dynamixel_sdk as dxl

        if not isinstance(motor_ids, list):
            motor_ids = [motor_ids]
@@ -911,13 +908,13 @@ class DynamixelMotorsBus:

        assert_same_address(self.model_ctrl_table, motor_models, data_name)
        addr, bytes = self.model_ctrl_table[motor_models[0]][data_name]
-        group = GroupSyncWrite(self.port_handler, self.packet_handler, addr, bytes)
+        group = dxl.GroupSyncWrite(self.port_handler, self.packet_handler, addr, bytes)
        for idx, value in zip(motor_ids, values, strict=True):
            data = convert_to_bytes(value, bytes, self.mock)
            group.addParam(idx, data)

        comm = group.txPacket()
-        if comm != COMM_SUCCESS:
+        if comm != dxl.COMM_SUCCESS:
            raise ConnectionError(
                f"Write failed due to communication error on port {self.port_handler.port_name} for indices {motor_ids}: "
                f"{self.packet_handler.getTxRxResult(comm)}"
@@ -932,9 +929,9 @@ class DynamixelMotorsBus:
        start_time = time.perf_counter()

        if self.mock:
-            from tests.mock_dynamixel_sdk import COMM_SUCCESS, GroupSyncWrite
+            import tests.mock_dynamixel_sdk as dxl
        else:
-            from dynamixel_sdk import COMM_SUCCESS, GroupSyncWrite
+            import dynamixel_sdk as dxl

        if motor_names is None:
            motor_names = self.motor_names
@@ -965,7 +962,7 @@ class DynamixelMotorsBus:

        init_group = data_name not in self.group_readers
        if init_group:
-            self.group_writers[group_key] = GroupSyncWrite(
+            self.group_writers[group_key] = dxl.GroupSyncWrite(
                self.port_handler, self.packet_handler, addr, bytes
            )

@@ -977,7 +974,7 @@ class DynamixelMotorsBus:
                self.group_writers[group_key].changeParam(idx, data)

        comm = self.group_writers[group_key].txPacket()
-        if comm != COMM_SUCCESS:
+        if comm != dxl.COMM_SUCCESS:
            raise ConnectionError(
                f"Write failed due to communication error on port {self.port} for group_key {group_key}: "
                f"{self.packet_handler.getTxRxResult(comm)}"
--- a/lerobot/common/robot_devices/robots/factory.py
+++ b/lerobot/common/robot_devices/robots/factory.py
@@ -1,7 +1,9 @@
 import hydra
 from omegaconf import DictConfig

+from lerobot.common.robot_devices.robots.utils import Robot

-def make_robot(cfg: DictConfig):
+
+def make_robot(cfg: DictConfig) -> Robot:
    robot = hydra.utils.instantiate(cfg)
    return robot
--- a/lerobot/common/robot_devices/robots/manipulator.py
+++ b/lerobot/common/robot_devices/robots/manipulator.py
@@ -681,6 +681,10 @@ class ManipulatorRobot:

        return torch.cat(action_sent)

+    def print_logs(self):
+        pass
+        # TODO(aliberts): move robot-specific logs logic here
+
    def disconnect(self):
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
--- a/lerobot/common/robot_devices/robots/stretch.py
+++ b/lerobot/common/robot_devices/robots/stretch.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+from dataclasses import dataclass, field, replace
+
+import torch
+from stretch_body.gamepad_teleop import GamePadTeleop
+from stretch_body.robot import Robot as StretchAPI
+from stretch_body.robot_params import RobotParams
+
+from lerobot.common.robot_devices.cameras.utils import Camera
+
+
+@dataclass
+class StretchRobotConfig:
+    robot_type: str | None = "stretch"
+    cameras: dict[str, Camera] = field(default_factory=lambda: {})
+    # TODO(aliberts): add feature with max_relative target
+    # TODO(aliberts): add comment on max_relative target
+    max_relative_target: list[float] | float | None = None
+
+
+class StretchRobot(StretchAPI):
+    """Wrapper of stretch_body.robot.Robot"""
+
+    def __init__(self, config: StretchRobotConfig | None = None, **kwargs):
+        super().__init__()
+        if config is None:
+            config = StretchRobotConfig()
+        # Overwrite config arguments using kwargs
+        self.config = replace(config, **kwargs)
+
+        self.robot_type = self.config.robot_type
+        self.cameras = self.config.cameras
+        self.is_connected = False
+        self.teleop = None
+        self.logs = {}
+
+        # TODO(aliberts): test this
+        RobotParams.set_logging_level("WARNING")
+        RobotParams.set_logging_formatter("brief_console_formatter")
+
+        self.state_keys = None
+        self.action_keys = None
+
+    def connect(self) -> None:
+        self.is_connected = self.startup()
+        if not self.is_connected:
+            print("Another process is already using Stretch. Try running 'stretch_free_robot_process.py'")
+            raise ConnectionError()
+
+        for name in self.cameras:
+            self.cameras[name].connect()
+            self.is_connected = self.is_connected and self.cameras[name].is_connected
+
+        if not self.is_connected:
+            print("Could not connect to the cameras, check that all cameras are plugged-in.")
+            raise ConnectionError()
+
+        self.run_calibration()
+
+    def run_calibration(self) -> None:
+        if not self.is_homed():
+            self.home()
+
+    def teleop_step(
+        self, record_data=False
+    ) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
+        # TODO(aliberts): return ndarrays instead of torch.Tensors
+        if not self.is_connected:
+            raise ConnectionError()
+
+        if self.teleop is None:
+            self.teleop = GamePadTeleop(robot_instance=False)
+            self.teleop.startup(robot=self)
+
+        before_read_t = time.perf_counter()
+        state = self.get_state()
+        action = self.teleop.gamepad_controller.get_state()
+        self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
+
+        before_write_t = time.perf_counter()
+        self.teleop.do_motion(robot=self)
+        self.push_command()
+        self.logs["write_pos_dt_s"] = time.perf_counter() - before_write_t
+
+        if self.state_keys is None:
+            self.state_keys = list(state)
+
+        if not record_data:
+            return
+
+        state = torch.as_tensor(list(state.values()))
+        action = torch.as_tensor(list(action.values()))
+
+        # Capture images from cameras
+        images = {}
+        for name in self.cameras:
+            before_camread_t = time.perf_counter()
+            images[name] = self.cameras[name].async_read()
+            images[name] = torch.from_numpy(images[name])
+            self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
+            self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
+
+        # Populate output dictionnaries
+        obs_dict, action_dict = {}, {}
+        obs_dict["observation.state"] = state
+        action_dict["action"] = action
+        for name in self.cameras:
+            obs_dict[f"observation.images.{name}"] = images[name]
+
+        return obs_dict, action_dict
+
+    def get_state(self) -> dict:
+        status = self.get_status()
+        return {
+            "head_pan.pos": status["head"]["head_pan"]["pos"],
+            "head_tilt.pos": status["head"]["head_tilt"]["pos"],
+            "lift.pos": status["lift"]["pos"],
+            "arm.pos": status["arm"]["pos"],
+            "wrist_pitch.pos": status["end_of_arm"]["wrist_pitch"]["pos"],
+            "wrist_roll.pos": status["end_of_arm"]["wrist_roll"]["pos"],
+            "wrist_yaw.pos": status["end_of_arm"]["wrist_yaw"]["pos"],
+            "gripper.pos": status["end_of_arm"]["stretch_gripper"]["pos"],
+            "base_x.vel": status["base"]["x_vel"],
+            "base_y.vel": status["base"]["y_vel"],
+            "base_theta.vel": status["base"]["theta_vel"],
+        }
+
+    def capture_observation(self) -> dict:
+        # TODO(aliberts): return ndarrays instead of torch.Tensors
+        before_read_t = time.perf_counter()
+        state = self.get_state()
+        self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
+
+        if self.state_keys is None:
+            self.state_keys = list(state)
+
+        state = torch.as_tensor(list(state.values()))
+
+        # Capture images from cameras
+        images = {}
+        for name in self.cameras:
+            before_camread_t = time.perf_counter()
+            images[name] = self.cameras[name].async_read()
+            images[name] = torch.from_numpy(images[name])
+            self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
+            self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
+
+        # Populate output dictionnaries
+        obs_dict = {}
+        obs_dict["observation.state"] = state
+        for name in self.cameras:
+            obs_dict[f"observation.images.{name}"] = images[name]
+
+        return obs_dict
+
+    def send_action(self, action: torch.Tensor) -> torch.Tensor:
+        # TODO(aliberts): return ndarrays instead of torch.Tensors
+        if not self.is_connected:
+            raise ConnectionError()
+
+        if self.teleop is None:
+            self.teleop = GamePadTeleop(robot_instance=False)
+            self.teleop.startup(robot=self)
+
+        if self.action_keys is None:
+            dummy_action = self.teleop.gamepad_controller.get_state()
+            self.action_keys = list(dummy_action.keys())
+
+        action_dict = dict(zip(self.action_keys, action.tolist(), strict=True))
+
+        before_write_t = time.perf_counter()
+        self.teleop.do_motion(state=action_dict, robot=self)
+        self.push_command()
+        self.logs["write_pos_dt_s"] = time.perf_counter() - before_write_t
+
+        # TODO(aliberts): return action_sent when motion is limited
+        return action
+
+    def print_logs(self) -> None:
+        pass
+        # TODO(aliberts): move robot-specific logs logic here
+
+    def teleop_safety_stop(self) -> None:
+        if self.teleop is not None:
+            self.teleop._safety_stop(robot=self)
+
+    def disconnect(self) -> None:
+        self.stop()
+        if self.teleop is not None:
+            self.teleop.gamepad_controller.stop()
+            self.teleop.stop()
+
+        if len(self.cameras) > 0:
+            for cam in self.cameras.values():
+                cam.disconnect()
+
+        self.is_connected = False
+
+    def __del__(self):
+        self.disconnect()
--- a/lerobot/common/robot_devices/robots/utils.py
+++ b/lerobot/common/robot_devices/robots/utils.py
@@ -9,8 +9,12 @@ def get_arm_id(name, arm_type):


 class Robot(Protocol):
-    def init_teleop(self): ...
+    # TODO(rcadene, aliberts): Add unit test checking the protocol is implemented in the corresponding classes
+    robot_type: str
+
+    def connect(self): ...
    def run_calibration(self): ...
    def teleop_step(self, record_data=False): ...
    def capture_observation(self): ...
    def send_action(self, action): ...
+    def disconnect(self): ...
--- a/lerobot/common/robot_devices/utils.py
+++ b/lerobot/common/robot_devices/utils.py
@@ -16,6 +16,20 @@ def busy_wait(seconds):
            time.sleep(seconds)


+def safe_disconnect(func):
+    # TODO(aliberts): Allow to pass custom exceptions
+    # (e.g. ThreadServiceExit, KeyboardInterrupt, SystemExit, UnpluggedError, DynamixelCommError)
+    def wrapper(robot, *args, **kwargs):
+        try:
+            return func(robot, *args, **kwargs)
+        except Exception as e:
+            if robot.is_connected:
+                robot.disconnect()
+            raise e
+
+    return wrapper
+
+
 class RobotDeviceNotConnectedError(Exception):
    """Exception raised when the robot device is not connected."""