# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Records a dataset. Actions for the robot can be either generated by teleoperation or by a policy. Example: ```shell lerobot-record \ --robot.type=so100_follower \ --robot.port=/dev/tty.usbmodem58760431541 \ --robot.cameras="{laptop: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \ --robot.id=black \ --dataset.repo_id=/ \ --dataset.num_episodes=2 \ --dataset.single_task="Grab the cube" \ --display_data=true # <- Teleop optional if you want to teleoperate to record or in between episodes with a policy \ # --teleop.type=so100_leader \ # --teleop.port=/dev/tty.usbmodem58760431551 \ # --teleop.id=blue \ # <- Policy optional if you want to record with a policy \ # --policy.path=${HF_USER}/my_policy \ ``` Example recording with bimanual so100: ```shell lerobot-record \ --robot.type=bi_so100_follower \ --robot.left_arm_port=/dev/tty.usbmodem5A460851411 \ --robot.right_arm_port=/dev/tty.usbmodem5A460812391 \ --robot.id=bimanual_follower \ --robot.cameras='{ left: {"type": "opencv", "index_or_path": 0, "width": 640, "height": 480, "fps": 30}, top: {"type": "opencv", "index_or_path": 1, "width": 640, "height": 480, "fps": 30}, right: {"type": "opencv", "index_or_path": 2, "width": 640, "height": 480, "fps": 30} }' \ --teleop.type=bi_so100_leader \ --teleop.left_arm_port=/dev/tty.usbmodem5A460828611 \ --teleop.right_arm_port=/dev/tty.usbmodem5A460826981 \ --teleop.id=bimanual_leader \ --display_data=true \ --dataset.repo_id=${HF_USER}/bimanual-so100-handover-cube \ --dataset.num_episodes=25 \ --dataset.single_task="Grab and handover the red cube to the other arm" ``` """ import logging import time from dataclasses import asdict, dataclass, field from pathlib import Path from pprint import pformat from typing import Any from lerobot.cameras import ( # noqa: F401 CameraConfig, # noqa: F401 ) from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig # noqa: F401 from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig # noqa: F401 from lerobot.configs import parser from lerobot.configs.policies import PreTrainedConfig from lerobot.datasets.image_writer import safe_stop_image_writer from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts from lerobot.datasets.video_utils import VideoEncodingManager from lerobot.policies.factory import make_policy, make_pre_post_processors from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.processor import ( PolicyAction, PolicyProcessorPipeline, RobotAction, RobotObservation, RobotProcessorPipeline, make_default_processors, ) from lerobot.processor.rename_processor import rename_stats from lerobot.robots import ( # noqa: F401 Robot, RobotConfig, bi_so100_follower, hope_jr, koch_follower, make_robot_from_config, so100_follower, so101_follower, ) from lerobot.teleoperators import ( # noqa: F401 Teleoperator, TeleoperatorConfig, bi_so100_leader, homunculus, koch_leader, make_teleoperator_from_config, so100_leader, so101_leader, ) from lerobot.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop from lerobot.utils.constants import ACTION, OBS_STR from lerobot.utils.control_utils import ( init_keyboard_listener, is_headless, predict_action, sanity_check_dataset_name, sanity_check_dataset_robot_compatibility, ) from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import ( get_safe_torch_device, init_logging, log_say, ) from lerobot.utils.visualization_utils import init_rerun, log_rerun_data from robot_client.robots.realman import RealmanRobot,RealmanRobotConfig from robot_client.teleoperators.xbox import Xbox,XboxConfig from robot_client.teleoperators.flight_stick import FlightStick,FlightStickConfig @dataclass class DatasetRecordConfig: # Dataset identifier. By convention it should match '{hf_username}/{dataset_name}' (e.g. `lerobot/test`). repo_id: str # A short but accurate description of the task performed during the recording (e.g. "Pick the Lego block and drop it in the box on the right.") single_task: str # Root directory where the dataset will be stored (e.g. 'dataset/path'). root: str | Path | None = None # Limit the frames per second. fps: int = 30 # Number of seconds for data recording for each episode. episode_time_s: int | float = 60 # Number of seconds for resetting the environment after each episode. reset_time_s: int | float = 60 # Number of episodes to record. num_episodes: int = 50 # Encode frames in the dataset into video video: bool = True # Upload dataset to Hugging Face hub. push_to_hub: bool = True # Upload on private repository on the Hugging Face hub. private: bool = False # Add tags to your dataset on the hub. tags: list[str] | None = None # Number of subprocesses handling the saving of frames as PNG. Set to 0 to use threads only; # set to ≥1 to use subprocesses, each using threads to write images. The best number of processes # and threads depends on your system. We recommend 4 threads per camera with 0 processes. # If fps is unstable, adjust the thread count. If still unstable, try using 1 or more subprocesses. num_image_writer_processes: int = 0 # Number of threads writing the frames as png images on disk, per camera. # Too many threads might cause unstable teleoperation fps due to main thread being blocked. # Not enough threads might cause low camera fps. num_image_writer_threads_per_camera: int = 4 # Number of episodes to record before batch encoding videos # Set to 1 for immediate encoding (default behavior), or higher for batched encoding video_encoding_batch_size: int = 1 # Rename map for the observation to override the image and state keys rename_map: dict[str, str] = field(default_factory=dict) def __post_init__(self): if self.single_task is None: raise ValueError("You need to provide a task as argument in `single_task`.") @dataclass class RecordConfig: robot: RobotConfig dataset: DatasetRecordConfig # Whether to control the robot with a teleoperator teleop: TeleoperatorConfig | None = None # Whether to control the robot with a policy policy: PreTrainedConfig | None = None # Display all cameras on screen display_data: bool = False # Use vocal synthesis to read events. play_sounds: bool = True # Resume recording on an existing dataset. resume: bool = False def __post_init__(self): # HACK: We parse again the cli args here to get the pretrained path if there was one. policy_path = parser.get_path_arg("policy") if policy_path: cli_overrides = parser.get_cli_overrides("policy") self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides) self.policy.pretrained_path = policy_path if self.teleop is None and self.policy is None: raise ValueError("Choose a policy, a teleoperator or both to control the robot") @classmethod def __get_path_fields__(cls) -> list[str]: """This enables the parser to load config from the policy using `--policy.path=local/dir`""" return ["policy"] """ --------------- record_loop() data flow -------------------------- [ Robot ] V [ robot.get_observation() ] ---> raw_obs V [ robot_observation_processor ] ---> processed_obs V .-----( ACTION LOGIC )------------------. V V [ From Teleoperator ] [ From Policy ] | | | [teleop.get_action] -> raw_action | [predict_action] | | | | | V | V | [teleop_action_processor] | | | | | | '---> processed_teleop_action '---> processed_policy_action | | '-------------------------.-------------' V [ robot_action_processor ] --> robot_action_to_send V [ robot.send_action() ] -- (Robot Executes) V ( Save to Dataset ) V ( Rerun Log / Loop Wait ) """ @safe_stop_image_writer def record_loop( robot: Robot, events: dict, fps: int, teleop_action_processor: RobotProcessorPipeline[ tuple[RobotAction, RobotObservation], RobotAction ], # runs after teleop robot_action_processor: RobotProcessorPipeline[ tuple[RobotAction, RobotObservation], RobotAction ], # runs before robot robot_observation_processor: RobotProcessorPipeline[ RobotObservation, RobotObservation ], # runs after robot dataset: LeRobotDataset | None = None, teleop: Teleoperator | list[Teleoperator] | None = None, policy: PreTrainedPolicy | None = None, preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]] | None = None, postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction] | None = None, control_time_s: int | None = None, single_task: str | None = None, display_data: bool = False, ): if dataset is not None and dataset.fps != fps: raise ValueError(f"The dataset fps should be equal to requested fps ({dataset.fps} != {fps}).") teleop_arm = teleop_keyboard = None if isinstance(teleop, list): teleop_keyboard = next((t for t in teleop if isinstance(t, KeyboardTeleop)), None) teleop_arm = next( ( t for t in teleop if isinstance( t, (so100_leader.SO100Leader | so101_leader.SO101Leader | koch_leader.KochLeader), ) ), None, ) if not (teleop_arm and teleop_keyboard and len(teleop) == 2 and robot.name == "lekiwi_client"): raise ValueError( "For multi-teleop, the list must contain exactly one KeyboardTeleop and one arm teleoperator. Currently only supported for LeKiwi robot." ) # Reset policy and processor if they are provided if policy is not None and preprocessor is not None and postprocessor is not None: policy.reset() preprocessor.reset() postprocessor.reset() timestamp = 0 start_episode_t = time.perf_counter() while timestamp < control_time_s: start_loop_t = time.perf_counter() if events["exit_early"]: events["exit_early"] = False break # Get robot observation obs = robot.get_observation() # Applies a pipeline to the raw robot observation, default is IdentityProcessor obs_processed = robot_observation_processor(obs) if policy is not None or dataset is not None: observation_frame = build_dataset_frame(dataset.features, obs_processed, prefix=OBS_STR) # Get action from either policy or teleop if policy is not None and preprocessor is not None and postprocessor is not None: action_values = predict_action( observation=observation_frame, policy=policy, device=get_safe_torch_device(policy.config.device), preprocessor=preprocessor, postprocessor=postprocessor, use_amp=policy.config.use_amp, task=single_task, robot_type=robot.robot_type, ) act_processed_policy: RobotAction = make_robot_action(action_values, dataset.features) elif policy is None and isinstance(teleop, Teleoperator): act = teleop.get_action() # Applies a pipeline to the raw teleop action, default is IdentityProcessor act_processed_teleop = teleop_action_processor((act, obs)) elif policy is None and isinstance(teleop, list): arm_action = teleop_arm.get_action() arm_action = {f"arm_{k}": v for k, v in arm_action.items()} keyboard_action = teleop_keyboard.get_action() base_action = robot._from_keyboard_to_base_action(keyboard_action) act = {**arm_action, **base_action} if len(base_action) > 0 else arm_action act_processed_teleop = teleop_action_processor((act, obs)) else: logging.info( "No policy or teleoperator provided, skipping action generation." "This is likely to happen when resetting the environment without a teleop device." "The robot won't be at its rest position at the start of the next episode." ) continue # Applies a pipeline to the action, default is IdentityProcessor if policy is not None and act_processed_policy is not None: action_values = act_processed_policy robot_action_to_send = robot_action_processor((act_processed_policy, obs)) else: action_values = act_processed_teleop robot_action_to_send = robot_action_processor((act_processed_teleop, obs)) # Send action to robot # Action can eventually be clipped using `max_relative_target`, # so action actually sent is saved in the dataset. action = postprocessor.process(action) # TODO(steven, pepijn, adil): we should use a pipeline step to clip the action, so the sent action is the action that we input to the robot. _sent_action = robot.send_action(robot_action_to_send) # Write to dataset if dataset is not None: action_frame = build_dataset_frame(dataset.features, action_values, prefix=ACTION) frame = {**observation_frame, **action_frame, "task": single_task} dataset.add_frame(frame) if display_data: log_rerun_data(observation=obs_processed, action=action_values) dt_s = time.perf_counter() - start_loop_t precise_sleep(1 / fps - dt_s) timestamp = time.perf_counter() - start_episode_t @parser.wrap() def record(cfg: RecordConfig) -> LeRobotDataset: init_logging() logging.info(pformat(asdict(cfg))) if cfg.display_data: init_rerun(session_name="recording") robot = make_robot_from_config(cfg.robot) teleop = make_teleoperator_from_config(cfg.teleop) if cfg.teleop is not None else None teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors() dataset_features = combine_feature_dicts( aggregate_pipeline_dataset_features( pipeline=teleop_action_processor, initial_features=create_initial_features( action=robot.action_features ), # TODO(steven, pepijn): in future this should be come from teleop or policy use_videos=cfg.dataset.video, ), aggregate_pipeline_dataset_features( pipeline=robot_observation_processor, initial_features=create_initial_features(observation=robot.observation_features), use_videos=cfg.dataset.video, ), ) if cfg.resume: dataset = LeRobotDataset( cfg.dataset.repo_id, root=cfg.dataset.root, batch_encoding_size=cfg.dataset.video_encoding_batch_size, ) if hasattr(robot, "cameras") and len(robot.cameras) > 0: dataset.start_image_writer( num_processes=cfg.dataset.num_image_writer_processes, num_threads=cfg.dataset.num_image_writer_threads_per_camera * len(robot.cameras), ) sanity_check_dataset_robot_compatibility(dataset, robot, cfg.dataset.fps, dataset_features) else: # Create empty dataset or load existing saved episodes sanity_check_dataset_name(cfg.dataset.repo_id, cfg.policy) dataset = LeRobotDataset.create( cfg.dataset.repo_id, cfg.dataset.fps, root=cfg.dataset.root, robot_type=robot.name, features=dataset_features, use_videos=cfg.dataset.video, image_writer_processes=cfg.dataset.num_image_writer_processes, image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * len(robot.cameras), batch_encoding_size=cfg.dataset.video_encoding_batch_size, ) # Load pretrained policy policy = None if cfg.policy is None else make_policy(cfg.policy, ds_meta=dataset.meta) preprocessor = None postprocessor = None if cfg.policy is not None: preprocessor, postprocessor = make_pre_post_processors( policy_cfg=cfg.policy, pretrained_path=cfg.policy.pretrained_path, dataset_stats=rename_stats(dataset.meta.stats, cfg.dataset.rename_map), preprocessor_overrides={ "device_processor": {"device": cfg.policy.device}, "rename_observations_processor": {"rename_map": cfg.dataset.rename_map}, }, ) robot.connect() if teleop is not None: teleop.connect() listener, events = init_keyboard_listener() with VideoEncodingManager(dataset): recorded_episodes = 0 while recorded_episodes < cfg.dataset.num_episodes and not events["stop_recording"]: log_say(f"Recording episode {dataset.num_episodes}", cfg.play_sounds) record_loop( robot=robot, events=events, fps=cfg.dataset.fps, teleop_action_processor=teleop_action_processor, robot_action_processor=robot_action_processor, robot_observation_processor=robot_observation_processor, teleop=teleop, policy=policy, preprocessor=preprocessor, postprocessor=postprocessor, dataset=dataset, control_time_s=cfg.dataset.episode_time_s, single_task=cfg.dataset.single_task, display_data=cfg.display_data, ) # Execute a few seconds without recording to give time to manually reset the environment # Skip reset for the last episode to be recorded if not events["stop_recording"] and ( (recorded_episodes < cfg.dataset.num_episodes - 1) or events["rerecord_episode"] ): log_say("Reset the environment", cfg.play_sounds) record_loop( robot=robot, events=events, fps=cfg.dataset.fps, teleop_action_processor=teleop_action_processor, robot_action_processor=robot_action_processor, robot_observation_processor=robot_observation_processor, teleop=teleop, control_time_s=cfg.dataset.reset_time_s, single_task=cfg.dataset.single_task, display_data=cfg.display_data, ) if events["rerecord_episode"]: log_say("Re-record episode", cfg.play_sounds) events["rerecord_episode"] = False events["exit_early"] = False dataset.clear_episode_buffer() continue dataset.save_episode() recorded_episodes += 1 log_say("Stop recording", cfg.play_sounds, blocking=True) robot.disconnect() if teleop is not None: teleop.disconnect() if not is_headless() and listener is not None: listener.stop() if cfg.dataset.push_to_hub: dataset.push_to_hub(tags=cfg.dataset.tags, private=cfg.dataset.private) log_say("Exiting", cfg.play_sounds) return dataset def main(): record() if __name__ == "__main__": main()