feat(visualization): replace cv2 GUI with Rerun (and solves ffmpeg versioning issues) (#903)

This commit is contained in:
Steven Palma
2025-04-09 17:33:01 +02:00
committed by GitHub
parent 2c86fea78a
commit 4041f57943
15 changed files with 175 additions and 65 deletions

View File

@@ -24,7 +24,7 @@ Designed by Physical Intelligence. Ported from Jax by Hugging Face.
Install pi0 extra dependencies:
```bash
pip install --no-binary=av -e ".[pi0]"
pip install -e ".[pi0]"
```
Example of finetuning the pi0 pretrained model (`pi0_base` in `openpi`):

View File

@@ -41,7 +41,7 @@ class TeleoperateControlConfig(ControlConfig):
fps: int | None = None
teleop_time_s: float | None = None
# Display all cameras on screen
display_cameras: bool = True
display_data: bool = False
@ControlConfig.register_subclass("record")
@@ -82,7 +82,7 @@ class RecordControlConfig(ControlConfig):
# Not enough threads might cause low camera fps.
num_image_writer_threads_per_camera: int = 4
# Display all cameras on screen
display_cameras: bool = True
display_data: bool = False
# Use vocal synthesis to read events.
play_sounds: bool = True
# Resume recording on an existing dataset.
@@ -116,6 +116,11 @@ class ReplayControlConfig(ControlConfig):
@dataclass
class RemoteRobotConfig(ControlConfig):
log_interval: int = 100
# Display all cameras on screen
display_data: bool = False
# Rerun configuration for remote robot (https://ref.rerun.io/docs/python/0.22.1/common/initialization_functions/#rerun.connect_tcp)
viewer_ip: str | None = None
viewer_port: str | None = None
@dataclass

View File

@@ -24,7 +24,7 @@ from contextlib import nullcontext
from copy import copy
from functools import cache
import cv2
import rerun as rr
import torch
from deepdiff import DeepDiff
from termcolor import colored
@@ -174,13 +174,13 @@ def warmup_record(
events,
enable_teleoperation,
warmup_time_s,
display_cameras,
display_data,
fps,
):
control_loop(
robot=robot,
control_time_s=warmup_time_s,
display_cameras=display_cameras,
display_data=display_data,
events=events,
fps=fps,
teleoperate=enable_teleoperation,
@@ -192,7 +192,7 @@ def record_episode(
dataset,
events,
episode_time_s,
display_cameras,
display_data,
policy,
fps,
single_task,
@@ -200,7 +200,7 @@ def record_episode(
control_loop(
robot=robot,
control_time_s=episode_time_s,
display_cameras=display_cameras,
display_data=display_data,
dataset=dataset,
events=events,
policy=policy,
@@ -215,7 +215,7 @@ def control_loop(
robot,
control_time_s=None,
teleoperate=False,
display_cameras=False,
display_data=False,
dataset: LeRobotDataset | None = None,
events=None,
policy: PreTrainedPolicy = None,
@@ -264,11 +264,15 @@ def control_loop(
frame = {**observation, **action, "task": single_task}
dataset.add_frame(frame)
if display_cameras and not is_headless():
# TODO(Steven): This should be more general (for RemoteRobot instead of checking the name, but anyways it will change soon)
if (display_data and not is_headless()) or (display_data and robot.robot_type.startswith("lekiwi")):
for k, v in action.items():
for i, vv in enumerate(v):
rr.log(f"sent_{k}_{i}", rr.Scalar(vv.numpy()))
image_keys = [key for key in observation if "image" in key]
for key in image_keys:
cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
cv2.waitKey(1)
rr.log(key, rr.Image(observation[key].numpy()), static=True)
if fps is not None:
dt_s = time.perf_counter() - start_loop_t
@@ -297,15 +301,11 @@ def reset_environment(robot, events, reset_time_s, fps):
)
def stop_recording(robot, listener, display_cameras):
def stop_recording(robot, listener, display_data):
robot.disconnect()
if not is_headless():
if listener is not None:
listener.stop()
if display_cameras:
cv2.destroyAllWindows()
if not is_headless() and listener is not None:
listener.stop()
def sanity_check_dataset_name(repo_id, policy_cfg):

View File

@@ -135,15 +135,19 @@ python lerobot/scripts/control_robot.py \
"""
import logging
import os
import time
from dataclasses import asdict
from pprint import pformat
import rerun as rr
# from safetensors.torch import load_file, save_file
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
from lerobot.common.policies.factory import make_policy
from lerobot.common.robot_devices.control_configs import (
CalibrateControlConfig,
ControlConfig,
ControlPipelineConfig,
RecordControlConfig,
RemoteRobotConfig,
@@ -153,6 +157,7 @@ from lerobot.common.robot_devices.control_configs import (
from lerobot.common.robot_devices.control_utils import (
control_loop,
init_keyboard_listener,
is_headless,
log_control_info,
record_episode,
reset_environment,
@@ -232,7 +237,7 @@ def teleoperate(robot: Robot, cfg: TeleoperateControlConfig):
control_time_s=cfg.teleop_time_s,
fps=cfg.fps,
teleoperate=True,
display_cameras=cfg.display_cameras,
display_data=cfg.display_data,
)
@@ -280,7 +285,7 @@ def record(
# 3. place the cameras windows on screen
enable_teleoperation = policy is None
log_say("Warmup record", cfg.play_sounds)
warmup_record(robot, events, enable_teleoperation, cfg.warmup_time_s, cfg.display_cameras, cfg.fps)
warmup_record(robot, events, enable_teleoperation, cfg.warmup_time_s, cfg.display_data, cfg.fps)
if has_method(robot, "teleop_safety_stop"):
robot.teleop_safety_stop()
@@ -296,7 +301,7 @@ def record(
dataset=dataset,
events=events,
episode_time_s=cfg.episode_time_s,
display_cameras=cfg.display_cameras,
display_data=cfg.display_data,
policy=policy,
fps=cfg.fps,
single_task=cfg.single_task,
@@ -326,7 +331,7 @@ def record(
break
log_say("Stop recording", cfg.play_sounds, blocking=True)
stop_recording(robot, listener, cfg.display_cameras)
stop_recording(robot, listener, cfg.display_data)
if cfg.push_to_hub:
dataset.push_to_hub(tags=cfg.tags, private=cfg.private)
@@ -363,6 +368,40 @@ def replay(
log_control_info(robot, dt_s, fps=cfg.fps)
def _init_rerun(control_config: ControlConfig, session_name: str = "lerobot_control_loop") -> None:
"""Initializes the Rerun SDK for visualizing the control loop.
Args:
control_config: Configuration determining data display and robot type.
session_name: Rerun session name. Defaults to "lerobot_control_loop".
Raises:
ValueError: If viewer IP is missing for non-remote configurations with display enabled.
"""
if (control_config.display_data and not is_headless()) or (
control_config.display_data and isinstance(control_config, RemoteRobotConfig)
):
# Configure Rerun flush batch size default to 8KB if not set
batch_size = os.getenv("RERUN_FLUSH_NUM_BYTES", "8000")
os.environ["RERUN_FLUSH_NUM_BYTES"] = batch_size
# Initialize Rerun based on configuration
rr.init(session_name)
if isinstance(control_config, RemoteRobotConfig):
viewer_ip = control_config.viewer_ip
viewer_port = control_config.viewer_port
if not viewer_ip or not viewer_port:
raise ValueError(
"Viewer IP & Port are required for remote config. Set via config file/CLI or disable control_config.display_data."
)
logging.info(f"Connecting to viewer at {viewer_ip}:{viewer_port}")
rr.connect_tcp(f"{viewer_ip}:{viewer_port}")
else:
# Get memory limit for rerun viewer parameters
memory_limit = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "10%")
rr.spawn(memory_limit=memory_limit)
@parser.wrap()
def control_robot(cfg: ControlPipelineConfig):
init_logging()
@@ -370,17 +409,22 @@ def control_robot(cfg: ControlPipelineConfig):
robot = make_robot_from_config(cfg.robot)
# TODO(Steven): Blueprint for fixed window size
if isinstance(cfg.control, CalibrateControlConfig):
calibrate(robot, cfg.control)
elif isinstance(cfg.control, TeleoperateControlConfig):
_init_rerun(control_config=cfg.control, session_name="lerobot_control_loop_teleop")
teleoperate(robot, cfg.control)
elif isinstance(cfg.control, RecordControlConfig):
_init_rerun(control_config=cfg.control, session_name="lerobot_control_loop_record")
record(robot, cfg.control)
elif isinstance(cfg.control, ReplayControlConfig):
replay(robot, cfg.control)
elif isinstance(cfg.control, RemoteRobotConfig):
from lerobot.common.robot_devices.robots.lekiwi_remote import run_lekiwi
_init_rerun(control_config=cfg.control, session_name="lerobot_control_loop_remote")
run_lekiwi(cfg.robot)
if robot.is_connected: