Files
lerobot/lerobot/scripts/server/crop_roi.py
Michel Aractingi 2211209be5 - Added base gym env class for the real robot environment.
- Added several wrappers around the base gym env robot class.
- Including: time limit, reward classifier, crop images, preprocess observations.
- Added an interactive script crop_roi.py where the user can interactively select the roi in the observation images and return the correct crop values that will improve the policy and reward classifier performance.

Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
2025-02-03 15:07:59 +00:00

149 lines
5.2 KiB
Python

import cv2
from lerobot.common.robot_devices.cameras.opencv import OpenCVCamera
def select_square_roi(img):
"""
Allows the user to draw a square ROI on the image.
The user must click and drag to draw the square.
- While dragging, the square is dynamically drawn.
- On mouse button release, the square is fixed.
- Press 'c' to confirm the selection.
- Press 'r' to reset the selection.
- Press ESC to cancel.
Returns:
A tuple (top, left, height, width) representing the square ROI,
or None if no valid ROI is selected.
"""
# Create a working copy of the image
clone = img.copy()
working_img = clone.copy()
roi = None # Will store the final ROI as (top, left, side, side)
drawing = False
ix, iy = -1, -1 # Initial click coordinates
def mouse_callback(event, x, y, flags, param):
nonlocal ix, iy, drawing, roi, working_img
if event == cv2.EVENT_LBUTTONDOWN:
# Start drawing: record starting coordinates
drawing = True
ix, iy = x, y
elif event == cv2.EVENT_MOUSEMOVE:
if drawing:
# Compute side length as the minimum of horizontal/vertical drags
side = min(abs(x - ix), abs(y - iy))
# Determine the direction to draw (in case of dragging to top/left)
dx = side if x >= ix else -side
dy = side if y >= iy else -side
# Show a temporary image with the current square drawn
temp = working_img.copy()
cv2.rectangle(temp, (ix, iy), (ix + dx, iy + dy), (0, 255, 0), 2)
cv2.imshow("Select ROI", temp)
elif event == cv2.EVENT_LBUTTONUP:
# Finish drawing
drawing = False
side = min(abs(x - ix), abs(y - iy))
dx = side if x >= ix else -side
dy = side if y >= iy else -side
# Normalize coordinates: (top, left) is the minimum of the two points
x1 = min(ix, ix + dx)
y1 = min(iy, iy + dy)
roi = (y1, x1, side, side) # (top, left, height, width)
# Draw the final square on the working image and display it
working_img = clone.copy()
cv2.rectangle(working_img, (ix, iy), (ix + dx, iy + dy), (0, 255, 0), 2)
cv2.imshow("Select ROI", working_img)
# Create the window and set the callback
cv2.namedWindow("Select ROI")
cv2.setMouseCallback("Select ROI", mouse_callback)
cv2.imshow("Select ROI", working_img)
print("Instructions for ROI selection:")
print(" - Click and drag to draw a square ROI.")
print(" - Press 'c' to confirm the selection.")
print(" - Press 'r' to reset and draw again.")
print(" - Press ESC to cancel the selection.")
# Wait until the user confirms with 'c', resets with 'r', or cancels with ESC
while True:
key = cv2.waitKey(1) & 0xFF
# Confirm ROI if one has been drawn
if key == ord("c") and roi is not None:
break
# Reset: clear the ROI and restore the original image
elif key == ord("r"):
working_img = clone.copy()
roi = None
cv2.imshow("Select ROI", working_img)
# Cancel selection for this image
elif key == 27: # ESC key
roi = None
break
cv2.destroyWindow("Select ROI")
return roi
def select_square_roi_for_images(images: dict) -> dict:
"""
For each image in the provided dictionary, open a window to allow the user
to select a square ROI. Returns a dictionary mapping each key to a tuple
(top, left, height, width) representing the ROI.
Parameters:
images (dict): Dictionary where keys are identifiers and values are OpenCV images.
Returns:
dict: Mapping of image keys to the selected square ROI.
"""
selected_rois = {}
for key, img in images.items():
if img is None:
print(f"Image for key '{key}' is None, skipping.")
continue
print(f"\nSelect square ROI for image with key: '{key}'")
roi = select_square_roi(img)
if roi is None:
print(f"No valid ROI selected for '{key}'.")
else:
selected_rois[key] = roi
print(f"ROI for '{key}': {roi}")
return selected_rois
if __name__ == "__main__":
# Example usage:
# Replace 'image1.jpg' and 'image2.jpg' with valid paths to your image files.
fps = [5, 30]
cameras = [OpenCVCamera(i, fps=fps[i], width=640, height=480, mock=False) for i in range(2)]
[camera.connect() for camera in cameras]
image_keys = ["image_" + str(i) for i in range(len(cameras))]
images = {image_keys[i]: cameras[i].read() for i in range(len(cameras))}
# Verify images loaded correctly
for key, img in images.items():
if img is None:
raise ValueError(f"Failed to load image for key '{key}'. Check the file path.")
# Let the user select a square ROI for each image
rois = select_square_roi_for_images(images)
# Print the selected square ROIs
print("\nSelected Square Regions of Interest (top, left, height, width):")
for key, roi in rois.items():
print(f"{key}: {roi}")