Improve control robot ; Add process to configure motor indices (#326)

Co-authored-by: Simon Alibert <alibert.sim@gmail.com>
Co-authored-by: jess-moss <jess.moss@dextrousrobotics.com>
Co-authored-by: Marina Barannikov <marina.barannikov@huggingface.co>
Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
This commit is contained in:
Remi
2024-08-15 18:11:33 +02:00
committed by GitHub
parent 8c4643687c
commit bbe9057225
35 changed files with 2085 additions and 476 deletions

View File

@@ -1,9 +1,22 @@
"""
Utilities to control a robot.
Useful to record a dataset, replay a recorded episode, run the policy on your robot
and record an evaluation dataset, and to recalibrate your robot if needed.
Examples of usage:
- Recalibrate your robot:
```bash
python lerobot/scripts/control_robot.py calibrate
```
- Unlimited teleoperation at highest frequency (~200 Hz is expected), to exit with CTRL+C:
```bash
python lerobot/scripts/control_robot.py teleoperate
# Remove the cameras from the robot definition. They are not used in 'teleoperate' anyway.
python lerobot/scripts/control_robot.py teleoperate --robot-overrides '~cameras'
```
- Unlimited teleoperation at a limited frequency of 30 Hz, to simulate data recording frequency:
@@ -14,7 +27,7 @@ python lerobot/scripts/control_robot.py teleoperate \
- Record one episode in order to test replay:
```bash
python lerobot/scripts/control_robot.py record_dataset \
python lerobot/scripts/control_robot.py record \
--fps 30 \
--root tmp/data \
--repo-id $USER/koch_test \
@@ -32,7 +45,7 @@ python lerobot/scripts/visualize_dataset.py \
- Replay this test episode:
```bash
python lerobot/scripts/control_robot.py replay_episode \
python lerobot/scripts/control_robot.py replay \
--fps 30 \
--root tmp/data \
--repo-id $USER/koch_test \
@@ -42,12 +55,11 @@ python lerobot/scripts/control_robot.py replay_episode \
- Record a full dataset in order to train a policy, with 2 seconds of warmup,
30 seconds of recording for each episode, and 10 seconds to reset the environment in between episodes:
```bash
python lerobot/scripts/control_robot.py record_dataset \
python lerobot/scripts/control_robot.py record \
--fps 30 \
--root data \
--repo-id $USER/koch_pick_place_lego \
--num-episodes 50 \
--run-compute-stats 1 \
--warmup-time-s 2 \
--episode-time-s 30 \
--reset-time-s 10
@@ -74,7 +86,14 @@ DATA_DIR=data python lerobot/scripts/train.py \
- Run the pretrained policy on the robot:
```bash
python lerobot/scripts/control_robot.py run_policy \
python lerobot/scripts/control_robot.py record \
--fps 30 \
--root data \
--repo-id $USER/eval_act_koch_real \
--num-episodes 10 \
--warmup-time-s 2 \
--episode-time-s 30 \
--reset-time-s 10
-p outputs/train/act_koch_real/checkpoints/080000/pretrained_model
```
"""
@@ -87,12 +106,14 @@ import os
import platform
import shutil
import time
import traceback
from contextlib import nullcontext
from functools import cache
from pathlib import Path
import cv2
import torch
import tqdm
from huggingface_hub import create_branch
from omegaconf import DictConfig
from PIL import Image
from termcolor import colored
@@ -102,7 +123,7 @@ from lerobot.common.datasets.compute_stats import compute_stats
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import to_hf_dataset
from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, get_default_encoding
from lerobot.common.datasets.utils import calculate_episode_data_index
from lerobot.common.datasets.utils import calculate_episode_data_index, create_branch
from lerobot.common.datasets.video_utils import encode_video_frames
from lerobot.common.policies.factory import make_policy
from lerobot.common.robot_devices.robots.factory import make_robot
@@ -116,6 +137,26 @@ from lerobot.scripts.push_dataset_to_hub import push_meta_data_to_hub, push_vide
########################################################################################
def say(text, blocking=False):
# Check if mac, linux, or windows.
if platform.system() == "Darwin":
cmd = f'say "{text}"'
elif platform.system() == "Linux":
cmd = f'spd-say "{text}"'
elif platform.system() == "Windows":
cmd = (
'PowerShell -Command "Add-Type -AssemblyName System.Speech; '
f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')\""
)
if not blocking and platform.system() in ["Darwin", "Linux"]:
# TODO(rcadene): Make it work for Windows
# Use the ampersand to run command in the background
cmd += " &"
os.system(cmd)
def save_image(img_tensor, key, frame_index, episode_index, videos_dir):
img = Image.fromarray(img_tensor.numpy())
path = videos_dir / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
@@ -160,11 +201,11 @@ def log_control_info(robot, dt_s, episode_index=None, frame_index=None, fps=None
for name in robot.follower_arms:
key = f"write_follower_{name}_goal_pos_dt_s"
if key in robot.logs:
log_dt("dtRfoll", robot.logs[key])
log_dt("dtWfoll", robot.logs[key])
key = f"read_follower_{name}_pos_dt_s"
if key in robot.logs:
log_dt("dtWfoll", robot.logs[key])
log_dt("dtRfoll", robot.logs[key])
for name in robot.cameras:
key = f"read_camera_{name}_dt_s"
@@ -179,12 +220,23 @@ def log_control_info(robot, dt_s, episode_index=None, frame_index=None, fps=None
logging.info(info_str)
def get_is_headless():
if platform.system() == "Linux":
display = os.environ.get("DISPLAY")
if display is None or display == "":
return True
return False
@cache
def is_headless():
"""Detects if python is running without a monitor."""
try:
import pynput # noqa
return False
except Exception:
print(
"Error trying to import pynput. Switching to headless mode. "
"As a result, the video stream from the cameras won't be shown, "
"and you won't be able to change the control flow with keyboards. "
"For more info, see traceback below.\n"
)
traceback.print_exc()
print()
return True
########################################################################################
@@ -192,29 +244,44 @@ def get_is_headless():
########################################################################################
def calibrate(robot: Robot):
if robot.calibration_path.exists():
print(f"Removing '{robot.calibration_path}'")
robot.calibration_path.unlink()
if robot.is_connected:
robot.disconnect()
# Calling `connect` automatically runs calibration
# when the calibration file is missing
robot.connect()
def teleoperate(robot: Robot, fps: int | None = None, teleop_time_s: float | None = None):
# TODO(rcadene): Add option to record logs
if not robot.is_connected:
robot.connect()
start_time = time.perf_counter()
start_teleop_t = time.perf_counter()
while True:
now = time.perf_counter()
start_loop_t = time.perf_counter()
robot.teleop_step()
if fps is not None:
dt_s = time.perf_counter() - now
dt_s = time.perf_counter() - start_loop_t
busy_wait(1 / fps - dt_s)
dt_s = time.perf_counter() - now
dt_s = time.perf_counter() - start_loop_t
log_control_info(robot, dt_s, fps=fps)
if teleop_time_s is not None and time.perf_counter() - start_time > teleop_time_s:
if teleop_time_s is not None and time.perf_counter() - start_teleop_t > teleop_time_s:
break
def record_dataset(
def record(
robot: Robot,
policy: torch.nn.Module | None = None,
hydra_cfg: DictConfig | None = None,
fps: int | None = None,
root="data",
repo_id="lerobot/debug",
@@ -229,6 +296,13 @@ def record_dataset(
force_override=False,
):
# TODO(rcadene): Add option to record logs
# TODO(rcadene): Clean this function via decomposition in higher level functions
_, dataset_name = repo_id.split("/")
if dataset_name.startswith("eval_") and policy is None:
raise ValueError(
f"Your dataset name begins by 'eval_' ({dataset_name}) but no policy is provided ({policy})."
)
if not video:
raise NotImplementedError()
@@ -255,32 +329,10 @@ def record_dataset(
else:
episode_index = 0
is_headless = get_is_headless()
# Execute a few seconds without recording data, to give times
# to the robot devices to connect and start synchronizing.
timestamp = 0
start_time = time.perf_counter()
is_warmup_print = False
while timestamp < warmup_time_s:
if not is_warmup_print:
logging.info("Warming up (no data recording)")
os.system('say "Warmup" &')
is_warmup_print = True
now = time.perf_counter()
observation, action = robot.teleop_step(record_data=True)
if not is_headless:
image_keys = [key for key in observation if "image" in key]
dt_s = time.perf_counter() - now
busy_wait(1 / fps - dt_s)
dt_s = time.perf_counter() - now
log_control_info(robot, dt_s, fps=fps)
timestamp = time.perf_counter() - start_time
if is_headless():
logging.info(
"Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
)
# Allow to exit early while recording an episode or resetting the environment,
# by tapping the right arrow key '->'. This might require a sudo permission
@@ -290,9 +342,7 @@ def record_dataset(
stop_recording = False
# Only import pynput if not in a headless environment
if is_headless:
logging.info("Headless environment detected. Keyboard input will not be available.")
else:
if not is_headless():
from pynput import keyboard
def on_press(key):
@@ -315,6 +365,53 @@ def record_dataset(
listener = keyboard.Listener(on_press=on_press)
listener.start()
# Load policy if any
if policy is not None:
# Check device is available
device = get_safe_torch_device(hydra_cfg.device, log=True)
policy.eval()
policy.to(device)
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
set_global_seed(hydra_cfg.seed)
# override fps using policy fps
fps = hydra_cfg.env.fps
# Execute a few seconds without recording data, to give times
# to the robot devices to connect and start synchronizing.
timestamp = 0
start_warmup_t = time.perf_counter()
is_warmup_print = False
while timestamp < warmup_time_s:
if not is_warmup_print:
logging.info("Warming up (no data recording)")
say("Warming up")
is_warmup_print = True
start_loop_t = time.perf_counter()
if policy is None:
observation, action = robot.teleop_step(record_data=True)
else:
observation = robot.capture_observation()
if not is_headless():
image_keys = [key for key in observation if "image" in key]
for key in image_keys:
cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
cv2.waitKey(1)
dt_s = time.perf_counter() - start_loop_t
busy_wait(1 / fps - dt_s)
dt_s = time.perf_counter() - start_loop_t
log_control_info(robot, dt_s, fps=fps)
timestamp = time.perf_counter() - start_warmup_t
# Save images using threads to reach high fps (30 and more)
# Using `with` to exist smoothly if an execption is raised.
# Using only 4 worker threads to avoid blocking the main thread.
@@ -323,14 +420,18 @@ def record_dataset(
# Start recording all episodes
while episode_index < num_episodes:
logging.info(f"Recording episode {episode_index}")
os.system(f'say "Recording episode {episode_index}" &')
say(f"Recording episode {episode_index}")
ep_dict = {}
frame_index = 0
timestamp = 0
start_time = time.perf_counter()
start_episode_t = time.perf_counter()
while timestamp < episode_time_s:
now = time.perf_counter()
observation, action = robot.teleop_step(record_data=True)
start_loop_t = time.perf_counter()
if policy is None:
observation, action = robot.teleop_step(record_data=True)
else:
observation = robot.capture_observation()
image_keys = [key for key in observation if "image" in key]
not_image_keys = [key for key in observation if "image" not in key]
@@ -342,11 +443,46 @@ def record_dataset(
)
]
if not is_headless():
image_keys = [key for key in observation if "image" in key]
for key in image_keys:
cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
cv2.waitKey(1)
for key in not_image_keys:
if key not in ep_dict:
ep_dict[key] = []
ep_dict[key].append(observation[key])
if policy is not None:
with (
torch.inference_mode(),
torch.autocast(device_type=device.type)
if device.type == "cuda" and hydra_cfg.use_amp
else nullcontext(),
):
# Convert to pytorch format: channel first and float32 in [0,1] with batch dimension
for name in observation:
if "image" in name:
observation[name] = observation[name].type(torch.float32) / 255
observation[name] = observation[name].permute(2, 0, 1).contiguous()
observation[name] = observation[name].unsqueeze(0)
observation[name] = observation[name].to(device)
# Compute the next action with the policy
# based on the current observation
action = policy.select_action(observation)
# Remove batch dimension
action = action.squeeze(0)
# Move to cpu, if not already the case
action = action.to("cpu")
# Order the robot to move
robot.send_action(action)
action = {"action": action}
for key in action:
if key not in ep_dict:
ep_dict[key] = []
@@ -354,14 +490,13 @@ def record_dataset(
frame_index += 1
dt_s = time.perf_counter() - now
dt_s = time.perf_counter() - start_loop_t
busy_wait(1 / fps - dt_s)
dt_s = time.perf_counter() - now
dt_s = time.perf_counter() - start_loop_t
log_control_info(robot, dt_s, fps=fps)
timestamp = time.perf_counter() - start_time
timestamp = time.perf_counter() - start_episode_t
if exit_early:
exit_early = False
break
@@ -369,10 +504,10 @@ def record_dataset(
if not stop_recording:
# Start resetting env while the executor are finishing
logging.info("Reset the environment")
os.system('say "Reset the environment" &')
say("Reset the environment")
timestamp = 0
start_time = time.perf_counter()
start_vencod_t = time.perf_counter()
# During env reset we save the data and encode the videos
num_frames = frame_index
@@ -418,7 +553,7 @@ def record_dataset(
with tqdm.tqdm(total=reset_time_s, desc="Waiting") as pbar:
while timestamp < reset_time_s and not is_last_episode:
time.sleep(1)
timestamp = time.perf_counter() - start_time
timestamp = time.perf_counter() - start_vencod_t
pbar.update(1)
if exit_early:
exit_early = False
@@ -433,8 +568,8 @@ def record_dataset(
if is_last_episode:
logging.info("Done recording")
os.system('say "Done recording"')
if not is_headless:
say("Done recording", blocking=True)
if not is_headless():
listener.stop()
logging.info("Waiting for threads writing the images on disk to terminate...")
@@ -444,10 +579,14 @@ def record_dataset(
pass
break
robot.disconnect()
if not is_headless():
cv2.destroyAllWindows()
num_episodes = episode_index
logging.info("Encoding videos")
os.system('say "Encoding videos" &')
say("Encoding videos")
# Use ffmpeg to convert frames stored as png into mp4 videos
for episode_index in tqdm.tqdm(range(num_episodes)):
for key in image_keys:
@@ -455,6 +594,7 @@ def record_dataset(
fname = f"{key}_episode_{episode_index:06d}.mp4"
video_path = local_dir / "videos" / fname
if video_path.exists():
# Skip if video is already encoded. Could be the case when resuming data recording.
continue
# note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
# since video encoding with ffmpeg is already using multithreading.
@@ -491,11 +631,12 @@ def record_dataset(
)
if run_compute_stats:
logging.info("Computing dataset statistics")
os.system('say "Computing dataset statistics" &')
say("Computing dataset statistics")
stats = compute_stats(lerobot_dataset)
lerobot_dataset.stats = stats
else:
logging.info("Skipping computation of the dataset statistrics")
stats = {}
logging.info("Skipping computation of the dataset statistics")
hf_dataset = hf_dataset.with_format(None) # to remove transforms that cant be saved
hf_dataset.save_to_disk(str(local_dir / "train"))
@@ -511,12 +652,11 @@ def record_dataset(
create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
logging.info("Exiting")
os.system('say "Exiting" &')
say("Exiting")
return lerobot_dataset
def replay_episode(robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug"):
def replay(robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug"):
# TODO(rcadene): Add option to record logs
local_dir = Path(root) / repo_id
if not local_dir.exists():
@@ -531,76 +671,20 @@ def replay_episode(robot: Robot, episode: int, fps: int | None = None, root="dat
robot.connect()
logging.info("Replaying episode")
os.system('say "Replaying episode"')
say("Replaying episode", blocking=True)
for idx in range(from_idx, to_idx):
now = time.perf_counter()
start_episode_t = time.perf_counter()
action = items[idx]["action"]
robot.send_action(action)
dt_s = time.perf_counter() - now
dt_s = time.perf_counter() - start_episode_t
busy_wait(1 / fps - dt_s)
dt_s = time.perf_counter() - now
dt_s = time.perf_counter() - start_episode_t
log_control_info(robot, dt_s, fps=fps)
def run_policy(robot: Robot, policy: torch.nn.Module, hydra_cfg: DictConfig, run_time_s: float | None = None):
# TODO(rcadene): Add option to record eval dataset and logs
# Check device is available
device = get_safe_torch_device(hydra_cfg.device, log=True)
policy.eval()
policy.to(device)
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
set_global_seed(hydra_cfg.seed)
fps = hydra_cfg.env.fps
if not robot.is_connected:
robot.connect()
start_time = time.perf_counter()
while True:
now = time.perf_counter()
observation = robot.capture_observation()
with (
torch.inference_mode(),
torch.autocast(device_type=device.type)
if device.type == "cuda" and hydra_cfg.use_amp
else nullcontext(),
):
# add batch dimension to 1
for name in observation:
observation[name] = observation[name].unsqueeze(0)
if device.type == "mps":
for name in observation:
observation[name] = observation[name].to(device)
action = policy.select_action(observation)
# remove batch dimension
action = action.squeeze(0)
robot.send_action(action.to("cpu"))
dt_s = time.perf_counter() - now
busy_wait(1 / fps - dt_s)
dt_s = time.perf_counter() - now
log_control_info(robot, dt_s, fps=fps)
if run_time_s is not None and time.perf_counter() - start_time > run_time_s:
break
if __name__ == "__main__":
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="mode", required=True)
@@ -608,18 +692,26 @@ if __name__ == "__main__":
# Set common options for all the subparsers
base_parser = argparse.ArgumentParser(add_help=False)
base_parser.add_argument(
"--robot",
"--robot-path",
type=str,
default="koch",
help="Name of the robot provided to the `make_robot(name)` factory function.",
default="lerobot/configs/robot/koch.yaml",
help="Path to robot yaml file used to instantiate the robot using `make_robot` factory function.",
)
base_parser.add_argument(
"--robot-overrides",
type=str,
nargs="*",
help="Any key=value arguments to override config values (use dots for.nested=overrides)",
)
parser_calib = subparsers.add_parser("calibrate", parents=[base_parser])
parser_teleop = subparsers.add_parser("teleoperate", parents=[base_parser])
parser_teleop.add_argument(
"--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)"
)
parser_record = subparsers.add_parser("record_dataset", parents=[base_parser])
parser_record = subparsers.add_parser("record", parents=[base_parser])
parser_record.add_argument(
"--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)"
)
@@ -638,19 +730,19 @@ if __name__ == "__main__":
parser_record.add_argument(
"--warmup-time-s",
type=int,
default=2,
default=10,
help="Number of seconds before starting data collection. It allows the robot devices to warmup and synchronize.",
)
parser_record.add_argument(
"--episode-time-s",
type=int,
default=10,
default=60,
help="Number of seconds for data recording for each episode.",
)
parser_record.add_argument(
"--reset-time-s",
type=int,
default=5,
default=60,
help="Number of seconds for resetting the environment after each episode.",
)
parser_record.add_argument("--num-episodes", type=int, default=50, help="Number of episodes to record.")
@@ -678,8 +770,23 @@ if __name__ == "__main__":
default=0,
help="By default, data recording is resumed. When set to 1, delete the local directory and start data recording from scratch.",
)
parser_record.add_argument(
"-p",
"--pretrained-policy-name-or-path",
type=str,
help=(
"Either the repo ID of a model hosted on the Hub or a path to a directory containing weights "
"saved using `Policy.save_pretrained`."
),
)
parser_record.add_argument(
"--policy-overrides",
type=str,
nargs="*",
help="Any key=value arguments to override config values (use dots for.nested=overrides)",
)
parser_replay = subparsers.add_parser("replay_episode", parents=[base_parser])
parser_replay = subparsers.add_parser("replay", parents=[base_parser])
parser_replay.add_argument(
"--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)"
)
@@ -697,41 +804,46 @@ if __name__ == "__main__":
)
parser_replay.add_argument("--episode", type=int, default=0, help="Index of the episode to replay.")
parser_policy = subparsers.add_parser("run_policy", parents=[base_parser])
parser_policy.add_argument(
"-p",
"--pretrained-policy-name-or-path",
type=str,
help=(
"Either the repo ID of a model hosted on the Hub or a path to a directory containing weights "
"saved using `Policy.save_pretrained`."
),
)
parser_policy.add_argument(
"overrides",
nargs="*",
help="Any key=value arguments to override config values (use dots for.nested=overrides)",
)
args = parser.parse_args()
init_logging()
control_mode = args.mode
robot_name = args.robot
robot_path = args.robot_path
robot_overrides = args.robot_overrides
kwargs = vars(args)
del kwargs["mode"]
del kwargs["robot"]
del kwargs["robot_path"]
del kwargs["robot_overrides"]
robot = make_robot(robot_name)
if control_mode == "teleoperate":
robot_cfg = init_hydra_config(robot_path, robot_overrides)
robot = make_robot(robot_cfg)
if control_mode == "calibrate":
calibrate(robot, **kwargs)
elif control_mode == "teleoperate":
teleoperate(robot, **kwargs)
elif control_mode == "record_dataset":
record_dataset(robot, **kwargs)
elif control_mode == "replay_episode":
replay_episode(robot, **kwargs)
elif control_mode == "run_policy":
pretrained_policy_path = get_pretrained_policy_path(args.pretrained_policy_name_or_path)
hydra_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", args.overrides)
policy = make_policy(hydra_cfg=hydra_cfg, pretrained_policy_name_or_path=pretrained_policy_path)
run_policy(robot, policy, hydra_cfg)
elif control_mode == "record":
pretrained_policy_name_or_path = args.pretrained_policy_name_or_path
policy_overrides = args.policy_overrides
del kwargs["pretrained_policy_name_or_path"]
del kwargs["policy_overrides"]
policy_cfg = None
if pretrained_policy_name_or_path is not None:
pretrained_policy_path = get_pretrained_policy_path(pretrained_policy_name_or_path)
policy_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", policy_overrides)
policy = make_policy(hydra_cfg=policy_cfg, pretrained_policy_name_or_path=pretrained_policy_path)
record(robot, policy, policy_cfg, **kwargs)
else:
record(robot, **kwargs)
elif control_mode == "replay":
replay(robot, **kwargs)
if robot.is_connected:
# Disconnect manually to avoid a "Core dump" during process
# termination due to camera threads not properly exiting.
robot.disconnect()