diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py index 784e9fc66..4656130e9 100644 --- a/lerobot/scripts/eval.py +++ b/lerobot/scripts/eval.py @@ -209,7 +209,7 @@ def eval_policy( policy: torch.nn.Module, n_episodes: int, max_episodes_rendered: int = 0, - video_dir: Path | None = None, + videos_dir: Path | None = None, return_episode_data: bool = False, start_seed: int | None = None, enable_progbar: bool = False, @@ -221,7 +221,7 @@ def eval_policy( policy: The policy. n_episodes: The number of episodes to evaluate. max_episodes_rendered: Maximum number of episodes to render into videos. - video_dir: Where to save rendered videos. + videos_dir: Where to save rendered videos. return_episode_data: Whether to return episode data for online training. Incorporates the data into the "episodes" key of the returned dictionary. start_seed: The first seed to use for the first individual rollout. For all subsequent rollouts the @@ -347,8 +347,8 @@ def eval_policy( ): if n_episodes_rendered >= max_episodes_rendered: break - video_dir.mkdir(parents=True, exist_ok=True) - video_path = video_dir / f"eval_episode_{n_episodes_rendered}.mp4" + videos_dir.mkdir(parents=True, exist_ok=True) + video_path = videos_dir / f"eval_episode_{n_episodes_rendered}.mp4" video_paths.append(str(video_path)) thread = threading.Thread( target=write_video, @@ -503,9 +503,10 @@ def _compile_episode_data( } -def eval( +def main( pretrained_policy_path: str | None = None, hydra_cfg_path: str | None = None, + out_dir: str | None = None, config_overrides: list[str] | None = None, ): assert (pretrained_policy_path is None) ^ (hydra_cfg_path is None) @@ -513,12 +514,8 @@ def eval( hydra_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", config_overrides) else: hydra_cfg = init_hydra_config(hydra_cfg_path, config_overrides) - out_dir = ( - f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}" - ) - if out_dir is None: - raise NotImplementedError() + out_dir = f"outputs/eval/{dt.now().strftime('%Y-%m-%d/%H-%M-%S')}_{hydra_cfg.env.name}_{hydra_cfg.policy.name}" # Check device is available device = get_safe_torch_device(hydra_cfg.device, log=True) @@ -546,7 +543,7 @@ def eval( policy, hydra_cfg.eval.n_episodes, max_episodes_rendered=10, - video_dir=Path(out_dir) / "eval", + videos_dir=Path(out_dir) / "videos", start_seed=hydra_cfg.seed, enable_progbar=True, enable_inner_progbar=True, @@ -586,6 +583,13 @@ if __name__ == "__main__": ), ) parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.") + parser.add_argument( + "--out-dir", + help=( + "Where to save the evaluation outputs. If not provided, outputs are saved in " + "outputs/eval/{timestamp}_{env_name}_{policy_name}" + ), + ) parser.add_argument( "overrides", nargs="*", @@ -594,7 +598,7 @@ if __name__ == "__main__": args = parser.parse_args() if args.pretrained_policy_name_or_path is None: - eval(hydra_cfg_path=args.config, config_overrides=args.overrides) + main(hydra_cfg_path=args.config, out_dir=args.out_dir, config_overrides=args.overrides) else: try: pretrained_policy_path = Path( @@ -618,4 +622,8 @@ if __name__ == "__main__": "repo ID, nor is it an existing local directory." ) - eval(pretrained_policy_path=pretrained_policy_path, config_overrides=args.overrides) + main( + pretrained_policy_path=pretrained_policy_path, + out_dir=args.out_dir, + config_overrides=args.overrides, + ) diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 860412bd6..5990d18a8 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -150,6 +150,7 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_offline): grad_norm = info["grad_norm"] lr = info["lr"] update_s = info["update_s"] + dataloading_s = info["dataloading_s"] # A sample is an (observation,action) pair, where observation and action # can be on multiple timestamps. In a batch, we have `batch_size`` number of samples. @@ -170,6 +171,7 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_offline): f"lr:{lr:0.1e}", # in seconds f"updt_s:{update_s:.3f}", + f"data_s:{dataloading_s:.3f}", # if not ~0, you are bottlenecked by cpu or io ] logging.info(" ".join(log_items)) @@ -325,6 +327,9 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No # Note: this helper will be used in offline and online training loops. def evaluate_and_checkpoint_if_needed(step): + _num_digits = max(6, len(str(cfg.training.offline_steps + cfg.training.online_steps))) + step_identifier = f"{step:0{_num_digits}d}" + if cfg.training.eval_freq > 0 and step % cfg.training.eval_freq == 0: logging.info(f"Eval policy at step {step}") with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.use_amp else nullcontext(): @@ -332,7 +337,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No eval_env, policy, cfg.eval.n_episodes, - video_dir=Path(out_dir) / "eval", + videos_dir=Path(out_dir) / "eval" / f"videos_step_{step_identifier}", max_episodes_rendered=4, start_seed=cfg.seed, ) @@ -350,9 +355,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No policy, optimizer, lr_scheduler, - identifier=str(step).zfill( - max(6, len(str(cfg.training.offline_steps + cfg.training.online_steps))) - ), + identifier=step_identifier, ) logging.info("Resume training") @@ -382,7 +385,10 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No for _ in range(step, cfg.training.offline_steps): if step == 0: logging.info("Start offline training on a fixed dataset") + + start_time = time.perf_counter() batch = next(dl_iter) + dataloading_s = time.perf_counter() - start_time for key in batch: batch[key] = batch[key].to(device, non_blocking=True) @@ -397,6 +403,8 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No use_amp=cfg.use_amp, ) + train_info["dataloading_s"] = dataloading_s + if step % cfg.training.log_freq == 0: log_train_info(logger, train_info, step, cfg, offline_dataset, is_offline=True)