Add support for Stretch (hello-robot) (#409)

Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Remi Cadene <re.cadene@gmail.com>
2024-10-04 18:56:42 +02:00
parent 26f97cfd17
commit 1a343c3591
20 changed files with 5052 additions and 1652 deletions
--- a/lerobot/common/robot_devices/cameras/opencv.py
+++ b/lerobot/common/robot_devices/cameras/opencv.py
@@ -31,29 +31,48 @@ from lerobot.common.utils.utils import capture_timestamp_utc
 MAX_OPENCV_INDEX = 60


-def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False):
+def find_cameras(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False) -> list[dict]:
+    cameras = []
    if platform.system() == "Linux":
-        # Linux uses camera ports
        print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
-        possible_camera_ids = []
-        for port in Path("/dev").glob("video*"):
-            camera_idx = int(str(port).replace("/dev/video", ""))
-            possible_camera_ids.append(camera_idx)
+        possible_ports = [str(port) for port in Path("/dev").glob("video*")]
+        ports = _find_cameras(possible_ports, mock=mock)
+        for port in ports:
+            cameras.append(
+                {
+                    "port": port,
+                    "index": int(port.removeprefix("/dev/video")),
+                }
+            )
    else:
        print(
            "Mac or Windows detected. Finding available camera indices through "
            f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
        )
-        possible_camera_ids = range(max_index_search_range)
+        possible_indices = range(max_index_search_range)
+        indices = _find_cameras(possible_indices, mock=mock)
+        for index in indices:
+            cameras.append(
+                {
+                    "port": None,
+                    "index": index,
+                }
+            )

+    return cameras
+
+
+def _find_cameras(
+    possible_camera_ids: list[int | str], raise_when_empty=False, mock=False
+) -> list[int | str]:
    if mock:
-        from tests.mock_cv2 import VideoCapture
+        import tests.mock_cv2 as cv2
    else:
-        from cv2 import VideoCapture
+        import cv2

    camera_ids = []
    for camera_idx in possible_camera_ids:
-        camera = VideoCapture(camera_idx)
+        camera = cv2.VideoCapture(camera_idx)
        is_open = camera.isOpened()
        camera.release()

@@ -70,6 +89,16 @@ def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENC
    return camera_ids


+def is_valid_unix_path(path: str) -> bool:
+    """Note: if 'path' points to a symlink, this will return True only if the target exists"""
+    p = Path(path)
+    return p.is_absolute() and p.exists()
+
+
+def get_camera_index_from_unix_port(port: Path) -> int:
+    return int(str(port.resolve()).removeprefix("/dev/video"))
+
+
 def save_image(img_array, camera_index, frame_index, images_dir):
    img = Image.fromarray(img_array)
    path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
@@ -79,7 +108,7 @@ def save_image(img_array, camera_index, frame_index, images_dir):

 def save_images_from_cameras(
    images_dir: Path,
-    camera_ids: list[int] | None = None,
+    camera_ids: list | None = None,
    fps=None,
    width=None,
    height=None,
@@ -90,8 +119,9 @@ def save_images_from_cameras(
    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
    associated to a given camera index.
    """
-    if camera_ids is None:
-        camera_ids = find_camera_indices(mock=mock)
+    if camera_ids is None or len(camera_ids) == 0:
+        camera_infos = find_cameras(mock=mock)
+        camera_ids = [cam["index"] for cam in camera_infos]

    print("Connecting cameras")
    cameras = []
@@ -114,7 +144,7 @@ def save_images_from_cameras(
    print(f"Saving images to {images_dir}")
    frame_index = 0
    start_time = time.perf_counter()
-    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        while True:
            now = time.perf_counter()

@@ -126,7 +156,7 @@ def save_images_from_cameras(
                executor.submit(
                    save_image,
                    image,
-                    camera.camera_index,
+                    camera.index,
                    frame_index,
                    images_dir,
                )
@@ -135,11 +165,11 @@ def save_images_from_cameras(
                dt_s = time.perf_counter() - now
                busy_wait(1 / fps - dt_s)

+            print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
+
            if time.perf_counter() - start_time > record_time_s:
                break

-            print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
-
            frame_index += 1

    print(f"Images have been saved to {images_dir}")
@@ -162,6 +192,7 @@ class OpenCVCameraConfig:
    width: int | None = None
    height: int | None = None
    color_mode: str = "rgb"
+    rotation: int | None = None
    mock: bool = False

    def __post_init__(self):
@@ -170,6 +201,9 @@ class OpenCVCameraConfig:
                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
            )

+        if self.rotation not in [-90, None, 90, 180]:
+            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
+

 class OpenCVCamera:
    """
@@ -210,7 +244,7 @@ class OpenCVCamera:
    ```
    """

-    def __init__(self, camera_index: int, config: OpenCVCameraConfig | None = None, **kwargs):
+    def __init__(self, camera_index: int | str, config: OpenCVCameraConfig | None = None, **kwargs):
        if config is None:
            config = OpenCVCameraConfig()

@@ -218,6 +252,19 @@ class OpenCVCamera:
        config = replace(config, **kwargs)

        self.camera_index = camera_index
+        self.port = None
+
+        # Linux uses ports for connecting to cameras
+        if platform.system() == "Linux":
+            if isinstance(self.camera_index, int):
+                self.port = Path(f"/dev/video{self.camera_index}")
+            elif isinstance(self.camera_index, str) and is_valid_unix_path(self.camera_index):
+                self.port = Path(self.camera_index)
+                # Retrieve the camera index from a potentially symlinked path
+                self.camera_index = get_camera_index_from_unix_port(self.port)
+            else:
+                raise ValueError(f"Please check the provided camera_index: {camera_index}")
+
        self.fps = config.fps
        self.width = config.width
        self.height = config.height
@@ -231,34 +278,37 @@ class OpenCVCamera:
        self.color_image = None
        self.logs = {}

+        if self.mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
+        # TODO(aliberts): Do we keep original width/height or do we define them after rotation?
+        self.rotation = None
+        if config.rotation == -90:
+            self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
+        elif config.rotation == 90:
+            self.rotation = cv2.ROTATE_90_CLOCKWISE
+        elif config.rotation == 180:
+            self.rotation = cv2.ROTATE_180
+
    def connect(self):
        if self.is_connected:
            raise RobotDeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")

        if self.mock:
-            from tests.mock_cv2 import (
-                CAP_PROP_FPS,
-                CAP_PROP_FRAME_HEIGHT,
-                CAP_PROP_FRAME_WIDTH,
-                VideoCapture,
-            )
+            import tests.mock_cv2 as cv2
        else:
-            from cv2 import (
-                CAP_PROP_FPS,
-                CAP_PROP_FRAME_HEIGHT,
-                CAP_PROP_FRAME_WIDTH,
-                VideoCapture,
-                setNumThreads,
-            )
+            import cv2

            # Use 1 thread to avoid blocking the main thread. Especially useful during data collection
            # when other threads are used to save the images.
-            setNumThreads(1)
+            cv2.setNumThreads(1)

        camera_idx = f"/dev/video{self.camera_index}" if platform.system() == "Linux" else self.camera_index
        # First create a temporary camera trying to access `camera_index`,
        # and verify it is a valid camera by calling `isOpened`.
-        tmp_camera = VideoCapture(camera_idx)
+        tmp_camera = cv2.VideoCapture(camera_idx)
        is_camera_open = tmp_camera.isOpened()
        # Release camera to make it accessible for `find_camera_indices`
        tmp_camera.release()
@@ -268,7 +318,8 @@ class OpenCVCamera:
        # valid cameras.
        if not is_camera_open:
            # Verify that the provided `camera_index` is valid before printing the traceback
-            available_cam_ids = find_camera_indices()
+            cameras_info = find_cameras()
+            available_cam_ids = [cam["index"] for cam in cameras_info]
            if self.camera_index not in available_cam_ids:
                raise ValueError(
                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
@@ -280,18 +331,18 @@ class OpenCVCamera:
        # Secondly, create the camera that will be used downstream.
        # Note: For some unknown reason, calling `isOpened` blocks the camera which then
        # needs to be re-created.
-        self.camera = VideoCapture(camera_idx)
+        self.camera = cv2.VideoCapture(camera_idx)

        if self.fps is not None:
-            self.camera.set(CAP_PROP_FPS, self.fps)
+            self.camera.set(cv2.CAP_PROP_FPS, self.fps)
        if self.width is not None:
-            self.camera.set(CAP_PROP_FRAME_WIDTH, self.width)
+            self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
        if self.height is not None:
-            self.camera.set(CAP_PROP_FRAME_HEIGHT, self.height)
+            self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height)

-        actual_fps = self.camera.get(CAP_PROP_FPS)
-        actual_width = self.camera.get(CAP_PROP_FRAME_WIDTH)
-        actual_height = self.camera.get(CAP_PROP_FRAME_HEIGHT)
+        actual_fps = self.camera.get(cv2.CAP_PROP_FPS)
+        actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
+        actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)

        # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
        if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
@@ -299,11 +350,11 @@ class OpenCVCamera:
            raise OSError(
                f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
            )
-        if self.width is not None and self.width != actual_width:
+        if self.width is not None and not math.isclose(self.width, actual_width, rel_tol=1e-3):
            raise OSError(
                f"Can't set {self.width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
            )
-        if self.height is not None and self.height != actual_height:
+        if self.height is not None and not math.isclose(self.height, actual_height, rel_tol=1e-3):
            raise OSError(
                f"Can't set {self.height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
            )
@@ -345,11 +396,11 @@ class OpenCVCamera:
        # so we convert the image color from BGR to RGB.
        if requested_color_mode == "rgb":
            if self.mock:
-                from tests.mock_cv2 import COLOR_BGR2RGB, cvtColor
+                import tests.mock_cv2 as cv2
            else:
-                from cv2 import COLOR_BGR2RGB, cvtColor
+                import cv2

-            color_image = cvtColor(color_image, COLOR_BGR2RGB)
+            color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)

        h, w, _ = color_image.shape
        if h != self.height or w != self.width:
@@ -357,6 +408,9 @@ class OpenCVCamera:
                f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
            )

+        if self.rotation is not None:
+            color_image = cv2.rotate(color_image, self.rotation)
+
        # log the number of seconds it took to read the image
        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time

@@ -455,7 +509,7 @@ if __name__ == "__main__":
    parser.add_argument(
        "--record-time-s",
        type=float,
-        default=2.0,
+        default=4.0,
        help="Set the number of seconds used to record the frames. By default, 2 seconds.",
    )
    args = parser.parse_args()