chore(docs): adress notes + add docs in camera code

This commit is contained in:
Steven Palma
2025-05-14 15:44:38 +02:00
parent cca647307b
commit 859a369b29
11 changed files with 149 additions and 56 deletions

View File

@@ -1,3 +1,17 @@
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .camera import Camera
from .configs import CameraConfig
from .utils import make_cameras_from_configs

View File

@@ -21,7 +21,6 @@ import numpy as np
from .configs import CameraConfig, ColorMode
# NOTE(Steven): Consider something like configure() if makes sense for both cameras
class Camera(abc.ABC):
def __init__(self, config: CameraConfig):
self.fps: int | None = config.fps

View File

@@ -1,2 +1,16 @@
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .camera_realsense import RealSenseCamera
from .configuration_realsense import RealSenseCameraConfig

View File

@@ -22,7 +22,7 @@ import math
import queue
import time
from threading import Event, Thread
from typing import Any, Dict, List, Tuple, Union
from typing import Any, Dict, List
import cv2
import numpy as np
@@ -49,6 +49,11 @@ class RealSenseCamera(Camera):
indices, especially on Linux. It also supports capturing depth maps alongside
color frames.
Use the provided utility script to find available camera indices and default profiles:
```bash
python -m lerobot.find_cameras
```
A `RealSenseCamera` instance requires a configuration object specifying the
camera's serial number or a unique device name. If using the name, ensure only
one camera with that name is connected.
@@ -120,7 +125,7 @@ class RealSenseCamera(Camera):
self.config = config
if config.name is not None: # TODO(Steven): Do we want to continue supporting this?
if config.name is not None: # NOTE(Steven): Do we want to continue supporting this?
self.serial_number = self._find_serial_number_from_name(config.name)
elif config.serial_number is not None:
self.serial_number = str(config.serial_number)
@@ -143,8 +148,6 @@ class RealSenseCamera(Camera):
self.rotation: int | None = get_cv2_rotation(config.rotation)
# NOTE(Steven): What happens if rotation is specified but we leave width and height to None?
# NOTE(Steven): Should we enforce these parameters if rotation is set?
if self.height and self.width:
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
self.prerotated_width, self.prerotated_height = self.height, self.width
@@ -301,7 +304,6 @@ class RealSenseCamera(Camera):
self.rs_profile.get_stream(rs.stream.depth).as_video_stream_profile()
)
# NOTE(Steven): Add a wamr-up period time config
def connect(self, do_warmup_read: bool = True):
"""
Connects to the RealSense camera specified in the configuration.
@@ -339,13 +341,12 @@ class RealSenseCamera(Camera):
if do_warmup_read:
logger.debug(f"Reading a warm-up frame for {self.serial_number}...")
self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X secs
self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs
logger.info(f"Camera {self.serial_number} connected and configured successfully.")
def _validate_fps(self, stream) -> None:
"""Validates and sets the internal FPS based on actual stream FPS."""
actual_fps = stream.fps()
if self.fps is None:
@@ -366,7 +367,6 @@ class RealSenseCamera(Camera):
def _validate_width_and_height(self, stream) -> None:
"""Validates and sets the internal capture width and height based on actual stream width."""
actual_width = int(round(stream.width()))
actual_height = int(round(stream.height()))
@@ -475,7 +475,7 @@ class RealSenseCamera(Camera):
ret, frame = self.rs_pipeline.try_wait_for_frames(
timeout_ms=timeout_ms
) # NOTE(Steven): This read has a timeout
) # NOTE(Steven): This read has a timeout while opencv doesn't
if not ret or frame is None:
raise RuntimeError(
@@ -579,8 +579,8 @@ class RealSenseCamera(Camera):
self.thread.start()
logger.debug(f"Read thread started for {self}.")
# NOTE(Steven): Missing implementation for depth
def async_read(self, timeout_ms: float = 2000) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
# NOTE(Steven): Missing implementation for depth for now
def async_read(self, timeout_ms: float = 2000) -> np.ndarray:
"""
Reads the latest available frame data (color or color+depth) asynchronously.
@@ -594,10 +594,8 @@ class RealSenseCamera(Camera):
to become available in the queue. Defaults to 2000ms (2 seconds).
Returns:
Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
The latest captured frame data (color image or tuple of color image
and depth map), processed according to configuration. Format depends
on `self.use_depth`.
np.ndarray:
The latest captured frame data (color image), processed according to configuration.
Raises:
DeviceNotConnectedError: If the camera is not connected.
@@ -628,7 +626,6 @@ class RealSenseCamera(Camera):
f"Error getting frame data from queue for camera {self.serial_number}: {e}"
) from e
# NOTE(Steven): There are multiple functions that are the same between realsense and opencv. We should consider moving them to the parent class
def _shutdown_read_thread(self):
"""Signals the background read thread to stop and waits for it to join."""
if self.stop_event is not None:

View File

@@ -20,17 +20,39 @@ from ..configs import CameraConfig, ColorMode, Cv2Rotation
@CameraConfig.register_subclass("intelrealsense")
@dataclass
class RealSenseCameraConfig(CameraConfig):
"""
Example of tested options for Intel Real Sense D405:
"""Configuration class for Intel RealSense cameras.
This class provides specialized configuration options for Intel RealSense cameras,
including support for depth sensing and device identification via serial number or name.
Example configurations for Intel RealSense D405:
```python
RealSenseCameraConfig(128422271347, 30, 640, 480)
RealSenseCameraConfig(128422271347, 60, 640, 480)
RealSenseCameraConfig(128422271347, 90, 640, 480)
RealSenseCameraConfig(128422271347, 30, 1280, 720)
RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)
RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=90)
# Basic configurations
RealSenseCameraConfig(128422271347, 30, 1280, 720) # 1280x720 @ 30FPS
RealSenseCameraConfig(128422271347, 60, 640, 480) # 640x480 @ 60FPS
# Advanced configurations
RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True) # With depth sensing
RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation
```
Attributes:
fps: Requested frames per second for the color stream.
width: Requested frame width in pixels for the color stream.
height: Requested frame height in pixels for the color stream.
name: Optional human-readable name to identify the camera.
serial_number: Optional unique serial number to identify the camera.
Either name or serial_number must be provided.
color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
channels: Number of color channels (currently only 3 is supported).
use_depth: Whether to enable depth stream. Defaults to False.
rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
Note:
- Either name or serial_number must be specified, but not both.
- Depth stream configuration (if enabled) will use the same FPS as the color stream.
- The actual resolution and FPS may be adjusted by the camera to the nearest supported mode.
- Only 3-channel color output (RGB/BGR) is currently supported.
"""
name: str | None = None
@@ -38,8 +60,7 @@ class RealSenseCameraConfig(CameraConfig):
color_mode: ColorMode = ColorMode.RGB
channels: int | None = 3
use_depth: bool = False
# NOTE(Steven): Check how draccus would deal with this str -> enum
rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION # NOTE(Steven): Check if draccus can parse to an enum
def __post_init__(self):
if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):

View File

@@ -1,2 +1,16 @@
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .camera_opencv import OpenCVCamera
from .configuration_opencv import OpenCVCameraConfig

View File

@@ -36,7 +36,7 @@ from ..camera import Camera
from ..utils import IndexOrPath, get_cv2_backend, get_cv2_rotation
from .configuration_opencv import ColorMode, OpenCVCameraConfig
# The maximum opencv device index depends on your operating system. For instance,
# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
# if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
# on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
# When you change the USB port or reboot the computer, the operating system might
@@ -129,10 +129,8 @@ class OpenCVCamera(Camera):
self.logs: dict = {} # NOTE(Steven): Might be removed in the future
self.rotation: int | None = get_cv2_rotation(config.rotation)
self.backend: int = get_cv2_backend() # NOTE(Steven): If I specify backend the opencv open fails
self.backend: int = get_cv2_backend() # NOTE(Steven): If we specify backend the opencv open fails
# NOTE(Steven): What happens if rotation is specified but we leave width and height to None?
# NOTE(Steven): Should we enforce these parameters if rotation is set?
if self.height and self.width:
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
self.prerotated_width, self.prerotated_height = self.height, self.width
@@ -208,7 +206,7 @@ class OpenCVCamera(Camera):
if do_warmup_read:
logger.debug(f"Reading a warm-up frame for {self.index_or_path}...")
self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X secs\
self.read() # NOTE(Steven): For now we just read one frame, we could also loop for X frames/secs
logger.debug(f"Camera {self.index_or_path} connected and configured successfully.")
@@ -239,7 +237,6 @@ class OpenCVCamera(Camera):
actual_width = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH)))
actual_height = int(round(self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# NOTE(Steven): When do we constraint the possibility of only setting one?
if self.width is None or self.height is None:
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
self.width, self.height = actual_height, actual_width

View File

@@ -1,3 +1,17 @@
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass
from pathlib import Path
@@ -7,15 +21,34 @@ from ..configs import CameraConfig, ColorMode, Cv2Rotation
@CameraConfig.register_subclass("opencv")
@dataclass
class OpenCVCameraConfig(CameraConfig):
"""
Example of tested options for Intel Real Sense D405:
#NOTE(Steven): update this doc
"""Configuration class for OpenCV-based camera devices or video files.
This class provides configuration options for cameras accessed through OpenCV,
supporting both physical camera devices and video files. It includes settings
for resolution, frame rate, color mode, and image rotation.
Example configurations:
```python
OpenCVCameraConfig(0, 30, 640, 480)
OpenCVCameraConfig(0, 60, 640, 480)
OpenCVCameraConfig(0, 90, 640, 480)
OpenCVCameraConfig(0, 30, 1280, 720)
# Basic configurations
OpenCVCameraConfig(0, 30, 1280, 720) # 1280x720 @ 30FPS
OpenCVCameraConfig(/dev/video4, 60, 640, 480) # 640x480 @ 60FPS
# Advanced configurations
OpenCVCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation
```
Attributes:
index_or_path: Either an integer representing the camera device index,
or a Path object pointing to a video file.
fps: Requested frames per second for the color stream.
width: Requested frame width in pixels for the color stream.
height: Requested frame height in pixels for the color stream.
color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
channels: Number of color channels (currently only 3 is supported).
rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
Note:
- Only 3-channel color output (RGB/BGR) is currently supported.
"""
index_or_path: int | Path

View File

@@ -20,7 +20,7 @@ import logging
import shutil
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List
import numpy as np
from PIL import Image
@@ -77,7 +77,7 @@ def find_all_realsense_cameras() -> List[Dict[str, Any]]:
return all_realsense_cameras_info
def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dict[str, Any]]:
def find_and_print_cameras(camera_type_filter: str | None = None) -> List[Dict[str, Any]]:
"""
Finds available cameras based on an optional filter and prints their information.
@@ -106,7 +106,7 @@ def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dic
else:
print("\n--- Detected Cameras ---")
for i, cam_info in enumerate(all_cameras_info):
print(f"Camera #{i + 1}:")
print(f"Camera #{i}:")
for key, value in cam_info.items():
if key == "default_stream_profile" and isinstance(value, dict):
print(f" {key.replace('_', ' ').capitalize()}:")
@@ -120,7 +120,7 @@ def find_and_print_cameras(camera_type_filter: Optional[str] = None) -> List[Dic
def save_image(
img_array: np.ndarray,
camera_identifier: Union[str, int],
camera_identifier: str | int,
images_dir: Path,
camera_type: str,
):
@@ -142,7 +142,7 @@ def save_image(
logger.error(f"Failed to save image for camera {camera_identifier} (type {camera_type}): {e}")
def initialize_output_directory(output_dir: Union[str, Path]) -> Path:
def initialize_output_directory(output_dir: str | Path) -> Path:
"""Initialize and clean the output directory."""
output_dir = Path(output_dir)
if output_dir.exists():
@@ -153,7 +153,7 @@ def initialize_output_directory(output_dir: Union[str, Path]) -> Path:
return output_dir
def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]:
def create_camera_instance(cam_meta: Dict[str, Any]) -> Dict[str, Any] | None:
"""Create and connect to a camera instance based on metadata."""
cam_type = cam_meta.get("type")
cam_id = cam_meta.get("id")
@@ -180,7 +180,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]
if instance:
logger.info(f"Connecting to {cam_type} camera: {cam_id}...")
instance.connect()
instance.connect(do_warmup_read=False)
return {"instance": instance, "meta": cam_meta}
except Exception as e:
logger.error(f"Failed to connect or configure {cam_type} camera {cam_id}: {e}")
@@ -191,7 +191,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Optional[Dict[str, Any]]
def process_camera_image(
cam_dict: Dict[str, Any], output_dir: Path, current_time: float
) -> Optional[concurrent.futures.Future]:
) -> concurrent.futures.Future | None:
"""Capture and process an image from a single camera."""
cam = cam_dict["instance"]
meta = cam_dict["meta"]
@@ -228,9 +228,9 @@ def cleanup_cameras(cameras_to_use: List[Dict[str, Any]]):
def save_images_from_all_cameras(
output_dir: Union[str, Path],
output_dir: str | Path,
record_time_s: float = 2.0,
camera_type_filter: Optional[str] = None,
camera_type_filter: str | None = None,
):
"""
Connects to detected cameras (optionally filtered by type) and saves images from each.
@@ -249,7 +249,6 @@ def save_images_from_all_cameras(
logger.warning("No cameras detected matching the criteria. Cannot save images.")
return
# Create and connect to all cameras
cameras_to_use = []
for cam_meta in all_camera_metadata:
camera_instance = create_camera_instance(cam_meta)
@@ -263,7 +262,6 @@ def save_images_from_all_cameras(
logger.info(f"Starting image capture for {record_time_s} seconds from {len(cameras_to_use)} cameras.")
start_time = time.perf_counter()
# NOTE(Steven): This seems like an overkill to me
with concurrent.futures.ThreadPoolExecutor(max_workers=len(cameras_to_use) * 2) as executor:
try:
while time.perf_counter() - start_time < record_time_s:
@@ -287,6 +285,10 @@ def save_images_from_all_cameras(
logger.info(f"Image capture finished. Images saved to {output_dir}")
# NOTE(Steven):
# * realsense identified as opencv -> consistent in linux, we can even capture images
# * opencv mac cams reporting different fps at init, not an issue as we don't enforce fps here
# * opencv not opening in linux if we specify a backend
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Unified camera utility script for listing cameras and capturing images."
@@ -329,8 +331,8 @@ if __name__ == "__main__":
capture_parser.add_argument(
"--record-time-s",
type=float,
default=5.0,
help="Time duration to attempt capturing frames. Default: 0.5 seconds (usually enough for one frame).",
default=6.0,
help="Time duration to attempt capturing frames. Default: 6 seconds.",
)
capture_parser.set_defaults(
func=lambda args: save_images_from_all_cameras(

View File

@@ -28,7 +28,7 @@ from lerobot.common.cameras.configs import Cv2Rotation
from lerobot.common.cameras.opencv import OpenCVCamera, OpenCVCameraConfig
from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
# NOTE(Steven): Consider improving the assert coverage
# NOTE(Steven): more tests + assertions?
TEST_ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "artifacts", "cameras")
DEFAULT_PNG_FILE_PATH = os.path.join(TEST_ARTIFACTS_DIR, "fakecam_sd_160x120.png")
TEST_IMAGE_PATHS = [

View File

@@ -36,7 +36,9 @@ except (ImportError, ModuleNotFoundError):
TEST_ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "artifacts", "cameras")
BAG_FILE_PATH = os.path.join(TEST_ARTIFACTS_DIR, "test_rs.bag")
# NOTE(Steven): Missing tests for depth
# NOTE(Steven): Takes 20sec, the patch being the biggest bottleneck
# NOTE(Steven): more tests + assertions?
if not os.path.exists(BAG_FILE_PATH):
print(f"Warning: Bag file not found at {BAG_FILE_PATH}. Some tests might fail or be skipped.")