refactor(cameras): improvements realsense cam v0.1

This commit is contained in:
Steven Palma
2025-05-02 15:09:38 +02:00
parent 35c4b01752
commit 15b5d28f45
5 changed files with 782 additions and 551 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -14,7 +14,7 @@
from dataclasses import dataclass
from ..configs import CameraConfig
from ..configs import CameraConfig, ColorMode, Cv2Rotation
@CameraConfig.register_subclass("intelrealsense")
@@ -38,26 +38,39 @@ class RealSenseCameraConfig(CameraConfig):
fps: int | None = None
width: int | None = None
height: int | None = None
color_mode: str = "rgb"
channels: int | None = None
color_mode: ColorMode = ColorMode.RGB
channels: int | None = 3
use_depth: bool = False
force_hardware_reset: bool = True
rotation: int | None = None
rotation: Cv2Rotation = (
Cv2Rotation.NO_ROTATION
) # NOTE(Steven): Check how draccus would deal with this str -> enum
def __post_init__(self):
if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
raise ValueError(
f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
)
if self.rotation not in (
Cv2Rotation.NO_ROTATION,
Cv2Rotation.ROTATE_90,
Cv2Rotation.ROTATE_180,
Cv2Rotation.ROTATE_270,
):
raise ValueError(
f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
)
if self.channels != 3:
raise NotImplementedError(f"Unsupported number of channels: {self.channels}")
# bool is stronger than is None, since it works with empty strings
if bool(self.name) and bool(self.serial_number):
raise ValueError(
f"One of them must be set: name or serial_number, but {self.name=} and {self.serial_number=} provided."
)
if self.color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
)
self.channels = 3
at_least_one_is_not_none = self.fps is not None or self.width is not None or self.height is not None
at_least_one_is_none = self.fps is None or self.width is None or self.height is None
if at_least_one_is_not_none and at_least_one_is_none:
@@ -65,6 +78,3 @@ class RealSenseCameraConfig(CameraConfig):
"For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
f"but {self.fps=}, {self.width=}, {self.height=} were provided."
)
if self.rotation not in [-90, None, 90, 180]:
raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")

View File

@@ -115,12 +115,15 @@ class OpenCVCamera(Camera):
self.capture_width: int | None = config.width
self.capture_height: int | None = config.height
self.width: int | None = None
self.height: int | None = None
self.fps: int | None = config.fps
self.channels: int = config.channels
self.color_mode: ColorMode = config.color_mode
self.videocapture_camera: cv2.VideoCapture | None = None
self.thread: Thread | None = None
self.stop_event: Event | None = None
self.frame_queue: queue.Queue = queue.Queue(maxsize=1)
@@ -188,18 +191,19 @@ class OpenCVCamera(Camera):
if not self.is_connected:
raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")
self._set_fps()
self._set_capture_width()
self._set_capture_height()
self._validate_fps()
self._validate_capture_width()
self._validate_capture_height()
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
self.width, self.height = self.capture_height, self.capture_width
else:
self.width, self.height = self.capture_width, self.capture_height
logger.debug(f"Final image dimensions set to: {self.width}x{self.height} (after rotation if any)")
def connect(self):
"""
Connects to the camera device specified in the configuration.
Connects to the OpenCV camera specified in the configuration.
Initializes the OpenCV VideoCapture object, sets desired camera properties
(FPS, width, height), and performs initial checks.
@@ -231,8 +235,8 @@ class OpenCVCamera(Camera):
self._configure_capture_settings()
logger.debug(f"Camera {self.index_or_path} connected and configured successfully.")
def _set_fps(self) -> None:
"""Sets the camera's frames per second (FPS)."""
def _validate_fps(self) -> None:
"""Validates and sets the camera's frames per second (FPS)."""
if self.fps is None:
self.fps = self.videocapture_camera.get(cv2.CAP_PROP_FPS)
@@ -252,8 +256,8 @@ class OpenCVCamera(Camera):
)
logger.debug(f"FPS set to {actual_fps} for {self}.")
def _set_capture_width(self) -> None:
"""Sets the camera's frame capture width."""
def _validate_capture_width(self) -> None:
"""Validates and sets the camera's frame capture width."""
if self.capture_width is None:
self.capture_width = self.videocapture_camera.get(cv2.CAP_PROP_FRAME_WIDTH)
@@ -271,8 +275,8 @@ class OpenCVCamera(Camera):
)
logger.debug(f"Capture width set to {actual_width} for {self}.")
def _set_capture_height(self) -> None:
"""Sets the camera's frame capture height."""
def _validate_capture_height(self) -> None:
"""Validates and sets the camera's frame capture height."""
if self.capture_height is None:
self.capture_height = self.videocapture_camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
@@ -365,6 +369,7 @@ class OpenCVCamera(Camera):
start_time = time.perf_counter()
# NOTE(Steven): Are we okay with this blocking an undefined amount of time?
ret, frame = self.videocapture_camera.read()
if not ret or frame is None:
@@ -423,7 +428,7 @@ class OpenCVCamera(Camera):
if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
processed_image = cv2.rotate(processed_image, self.rotation)
logger.debug(f"Rotated frame by {self.config.rotation} degrees for {self}.")
logger.debug(f"Rotated frame by {self.config.rotation} degrees for {self}.")
return processed_image
@@ -433,7 +438,7 @@ class OpenCVCamera(Camera):
Continuously reads frames from the camera using the synchronous `read()`
method and places the latest frame into the `frame_queue`. It overwrites
any previous frame in the queue to ensure only the most recent one is available.
any previous frame in the queue.
"""
logger.debug(f"Starting read loop thread for {self}.")
while not self.stop_event.is_set():
@@ -455,7 +460,6 @@ class OpenCVCamera(Camera):
def _ensure_read_thread_running(self):
"""Starts or restarts the background read thread if it's not running."""
logger.debug(f"Read thread for {self} is not running. Starting...")
if self.thread is not None:
self.thread.join(timeout=0.1)
if self.stop_event is not None:
@@ -495,11 +499,9 @@ class OpenCVCamera(Camera):
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
# Ensure the background reading thread is operational
self._ensure_read_thread_running()
try:
# Get the latest frame from the queue, waiting up to the timeout
return self.frame_queue.get(timeout=timeout_ms / 1000.0)
except queue.Empty as e:
thread_alive = self.thread is not None and self.thread.is_alive()
@@ -516,19 +518,23 @@ class OpenCVCamera(Camera):
raise RuntimeError(f"Error getting frame from queue for camera {self.index_or_path}: {e}") from e
def _shutdown_read_thread(self):
"""Cleans the background read thread if it's running."""
"""Signals the background read thread to stop and waits for it to join."""
if self.stop_event is not None:
logger.debug(f"Signaling stop event for read thread of {self}.")
self.stop_event.set()
else:
logger.warning(f"Stop event not found for thread of {self}, cannot signal.")
logger.debug(f"Waiting for read thread of {self} to join...")
self.thread.join(timeout=2.0)
if self.thread.is_alive():
logger.warning(f"Read thread for {self} did not terminate gracefully after 2 seconds.")
logger.debug(f"Waiting for read thread of {self} to join...")
self.thread.join(timeout=2.0)
if self.thread.is_alive():
logger.warning(f"Read thread for {self} did not terminate gracefully after 2 seconds.")
else:
logger.debug(f"Read thread for {self} joined successfully.")
else:
logger.debug(f"Read thread for {self} joined successfully.")
logger.debug(f"Read thread for {self} was already stopped.")
self.thread = None
self.stop_event = None
@@ -556,9 +562,5 @@ class OpenCVCamera(Camera):
logger.debug(f"Releasing OpenCV VideoCapture object for {self}.")
self.videocapture_camera.release()
self.videocapture_camera = None
else:
logger.debug(
f"No OpenCV VideoCapture object to release for {self} (was already None or failed connection)."
)
logger.debug(f"Camera {self.index_or_path} disconnected successfully.")
logger.info(f"Camera {self.index_or_path} disconnected successfully.")

View File

@@ -192,3 +192,180 @@ def save_image(img_array: np.ndarray, camera_index: int, frame_index: int, image
# )
# args = parser.parse_args()
# save_images_from_cameras(**vars(args))
### Realsense
# def find_realsense_cameras(raise_when_empty: bool = True) -> list[dict]:
# """
# Find the names and the serial numbers of the Intel RealSense cameras
# connected to the computer.
# """
# import pyrealsense2 as rs
# cameras = []
# for device in rs.context().query_devices():
# serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
# name = device.get_info(rs.camera_info.name)
# cameras.append(
# {
# "serial_number": serial_number,
# "name": name,
# }
# )
# if raise_when_empty and len(cameras) == 0:
# raise OSError(
# "Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
# )
# return cameras
# def save_image(img_array, serial_number, frame_index, images_dir):
# try:
# img = Image.fromarray(img_array)
# path = images_dir / f"camera_{serial_number}_frame_{frame_index:06d}.png"
# path.parent.mkdir(parents=True, exist_ok=True)
# img.save(str(path), quality=100)
# logging.info(f"Saved image: {path}")
# except Exception as e:
# logging.error(f"Failed to save image for camera {serial_number} frame {frame_index}: {e}")
# def save_images_from_cameras(
# images_dir: Path,
# serial_numbers: list[int] | None = None,
# fps=None,
# width=None,
# height=None,
# record_time_s=2,
# ):
# """
# Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
# associated to a given serial number.
# """
# if serial_numbers is None or len(serial_numbers) == 0:
# camera_infos = find_realsense_cameras()
# serial_numbers = [cam["serial_number"] for cam in camera_infos]
# import cv2
# print("Connecting cameras")
# cameras = []
# for cam_sn in serial_numbers:
# print(f"{cam_sn=}")
# config = RealSenseCameraConfig(serial_number=cam_sn, fps=fps, width=width, height=height)
# camera = RealSenseCamera(config)
# camera.connect()
# print(
# f"RealSenseCamera({camera.serial_number}, fps={camera.fps}, width={camera.capture_width}, height={camera.capture_height}, color_mode={camera.color_mode})"
# )
# cameras.append(camera)
# images_dir = Path(images_dir)
# if images_dir.exists():
# shutil.rmtree(
# images_dir,
# )
# images_dir.mkdir(parents=True, exist_ok=True)
# print(f"Saving images to {images_dir}")
# frame_index = 0
# start_time = time.perf_counter()
# try:
# with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
# while True:
# now = time.perf_counter()
# for camera in cameras:
# # If we use async_read when fps is None, the loop will go full speed, and we will end up
# # saving the same images from the cameras multiple times until the RAM/disk is full.
# image = camera.read() if fps is None else camera.async_read()
# if image is None:
# print("No Frame")
# bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# executor.submit(
# save_image,
# bgr_converted_image,
# camera.serial_number,
# frame_index,
# images_dir,
# )
# if fps is not None:
# dt_s = time.perf_counter() - now
# busy_wait(1 / fps - dt_s)
# if time.perf_counter() - start_time > record_time_s:
# break
# print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
# frame_index += 1
# finally:
# print(f"Images have been saved to {images_dir}")
# for camera in cameras:
# camera.disconnect()
# def find_serial_number_from_name(name):
# camera_infos = find_realsense_cameras()
# camera_names = [cam["name"] for cam in camera_infos]
# this_name_count = Counter(camera_names)[name]
# if this_name_count > 1:
# # TODO(aliberts): Test this with multiple identical cameras (Aloha)
# raise ValueError(
# f"Multiple {name} cameras have been detected. Please use their serial number to instantiate them."
# )
# name_to_serial_dict = {cam["name"]: cam["serial_number"] for cam in camera_infos}
# cam_sn = name_to_serial_dict[name]
# return cam_sn
# if __name__ == "__main__":
# parser = argparse.ArgumentParser(
# description="Save a few frames using `RealSenseCamera` for all cameras connected to the computer, or a selected subset."
# )
# parser.add_argument(
# "--serial-numbers",
# type=int,
# nargs="*",
# default=None,
# help="List of serial numbers used to instantiate the `RealSenseCamera`. If not provided, find and use all available camera indices.",
# )
# parser.add_argument(
# "--fps",
# type=int,
# default=30,
# help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
# )
# parser.add_argument(
# "--width",
# type=str,
# default=640,
# help="Set the width for all cameras. If not provided, use the default width of each camera.",
# )
# parser.add_argument(
# "--height",
# type=str,
# default=480,
# help="Set the height for all cameras. If not provided, use the default height of each camera.",
# )
# parser.add_argument(
# "--images-dir",
# type=Path,
# default="outputs/images_from_intelrealsense_cameras",
# help="Set directory to save a few frames for each camera.",
# )
# parser.add_argument(
# "--record-time-s",
# type=float,
# default=2.0,
# help="Set the number of seconds used to record the frames. By default, 2 seconds.",
# )
# args = parser.parse_args()
# save_images_from_cameras(**vars(args))

View File

@@ -1,101 +0,0 @@
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import cache
import numpy as np
CAP_V4L2 = 200
CAP_DSHOW = 700
CAP_AVFOUNDATION = 1200
CAP_ANY = -1
CAP_PROP_FPS = 5
CAP_PROP_FRAME_WIDTH = 3
CAP_PROP_FRAME_HEIGHT = 4
COLOR_RGB2BGR = 4
COLOR_BGR2RGB = 4
ROTATE_90_COUNTERCLOCKWISE = 2
ROTATE_90_CLOCKWISE = 0
ROTATE_180 = 1
@cache
def _generate_image(width: int, height: int):
return np.random.randint(0, 256, size=(height, width, 3), dtype=np.uint8)
def cvtColor(color_image, color_conversion): # noqa: N802
if color_conversion in [COLOR_RGB2BGR, COLOR_BGR2RGB]:
return color_image[:, :, [2, 1, 0]]
else:
raise NotImplementedError(color_conversion)
def rotate(color_image, rotation):
if rotation is None:
return color_image
elif rotation == ROTATE_90_CLOCKWISE:
return np.rot90(color_image, k=1)
elif rotation == ROTATE_180:
return np.rot90(color_image, k=2)
elif rotation == ROTATE_90_COUNTERCLOCKWISE:
return np.rot90(color_image, k=3)
else:
raise NotImplementedError(rotation)
class VideoCapture:
def __init__(self, *args, **kwargs):
self._mock_dict = {
CAP_PROP_FPS: 30,
CAP_PROP_FRAME_WIDTH: 640,
CAP_PROP_FRAME_HEIGHT: 480,
}
self._is_opened = True
def isOpened(self): # noqa: N802
return self._is_opened
def set(self, propId: int, value: float) -> bool: # noqa: N803
if not self._is_opened:
raise RuntimeError("Camera is not opened")
self._mock_dict[propId] = value
return True
def get(self, propId: int) -> float: # noqa: N803
if not self._is_opened:
raise RuntimeError("Camera is not opened")
value = self._mock_dict[propId]
if value == 0:
if propId == CAP_PROP_FRAME_HEIGHT:
value = 480
elif propId == CAP_PROP_FRAME_WIDTH:
value = 640
return value
def read(self):
if not self._is_opened:
raise RuntimeError("Camera is not opened")
h = self.get(CAP_PROP_FRAME_HEIGHT)
w = self.get(CAP_PROP_FRAME_WIDTH)
ret = True
return ret, _generate_image(width=w, height=h)
def release(self):
self._is_opened = False
def __del__(self):
if self._is_opened:
self.release()