From aed02dc7c6159888c9fbff99001dda33e5771446 Mon Sep 17 00:00:00 2001 From: Cadene Date: Sat, 24 Feb 2024 18:18:39 +0000 Subject: [PATCH] Add multithreading for video generation, Speed policy sampling --- README.md | 29 +++++++++++++++++++++++++++++ lerobot/common/tdmpc.py | 12 +++++++++++- lerobot/scripts/eval.py | 19 ++++++++++++++++--- lerobot/scripts/train.py | 5 +++-- 4 files changed, 59 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fba7be33a..ce4f3dd43 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,35 @@ python lerobot/scripts/eval.py \ - [ ] add diffusion - [ ] add aloha 2 +## Profile + +**Example** +```python +from torch.profiler import profile, record_function, ProfilerActivity + +def trace_handler(prof): + prof.export_chrome_trace(f"tmp/trace_schedule_{prof.step_num}.json") + +with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + schedule=torch.profiler.schedule( + wait=2, + warmup=2, + active=3, + ), + on_trace_ready=trace_handler +) as prof: + with record_function("eval_policy"): + for i in range(num_episodes): + prof.step() +``` + +```bash +python lerobot/scripts/eval.py \ +pretrained_model_path=/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/final.pt \ +eval_episodes=7 +``` + ## Contribute **style** diff --git a/lerobot/common/tdmpc.py b/lerobot/common/tdmpc.py index 7ceff4563..902673aa1 100644 --- a/lerobot/common/tdmpc.py +++ b/lerobot/common/tdmpc.py @@ -51,6 +51,11 @@ class TOLD(nn.Module): """Predicts next latent state (d) and single-step reward (R).""" x = torch.cat([z, a], dim=-1) return self._dynamics(x), self._reward(x) + + def next_dynamics(self, z, a): + """Predicts next latent state (d).""" + x = torch.cat([z, a], dim=-1) + return self._dynamics(x) def pi(self, z, std=0): """Samples an action from the learned policy (pi).""" @@ -191,7 +196,7 @@ class TDMPC(nn.Module): _z = z.repeat(num_pi_trajs, 1) for t in range(horizon): pi_actions[t] = self.model.pi(_z, self.cfg.min_std) - _z, _ = self.model.next(_z, pi_actions[t]) + _z = self.model.next_dynamics(_z, pi_actions[t]) # Initialize state and parameters z = z.repeat(self.cfg.num_samples + num_pi_trajs, 1) @@ -241,6 +246,11 @@ class TDMPC(nn.Module): mean, std = self.cfg.momentum * mean + (1 - self.cfg.momentum) * _mean, _std # Outputs + # TODO(rcadene): remove numpy with + # # Convert score tensor to probabilities using softmax + # probabilities = torch.softmax(score, dim=0) + # # Generate a random sample index based on the probabilities + # sample_index = torch.multinomial(probabilities, 1).item() score = score.squeeze(1).cpu().numpy() actions = elite_actions[:, np.random.choice(np.arange(score.shape[0]), p=score)] self._prev_mean = mean diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py index 663f55e82..62d482e09 100644 --- a/lerobot/scripts/eval.py +++ b/lerobot/scripts/eval.py @@ -11,7 +11,10 @@ from torchrl.envs import EnvBase from lerobot.common.envs.factory import make_env from lerobot.common.tdmpc import TDMPC from lerobot.common.utils import set_seed +import threading +def write_video(video_path, stacked_frames, fps): + imageio.mimsave(video_path, stacked_frames, fps=fps) def eval_policy( env: EnvBase, @@ -29,6 +32,7 @@ def eval_policy( sum_rewards = [] max_rewards = [] successes = [] + threads = [] for i in range(num_episodes): ep_frames = [] @@ -63,7 +67,12 @@ def eval_policy( if save_video: video_dir.mkdir(parents=True, exist_ok=True) video_path = video_dir / f"eval_episode_{i}.mp4" - imageio.mimsave(video_path, stacked_frames, fps=fps) + thread = threading.Thread( + target=write_video, + args=(str(video_path), stacked_frames, fps), + ) + thread.start() + threads.append(thread) first_episode = i == 0 if wandb and first_episode: @@ -72,6 +81,9 @@ def eval_policy( ) wandb.log({"eval_video": eval_video}, step=env_step) + for thread in threads: + thread.join() + metrics = { "avg_sum_reward": np.nanmean(sum_rewards), "avg_max_reward": np.nanmean(max_rewards), @@ -90,6 +102,7 @@ def eval(cfg: dict, out_dir=None): raise NotImplementedError() assert torch.cuda.is_available() + torch.backends.cudnn.benchmark = True set_seed(cfg.seed) print(colored("Log dir:", "yellow", attrs=["bold"]), out_dir) @@ -98,9 +111,9 @@ def eval(cfg: dict, out_dir=None): if cfg.pretrained_model_path: policy = TDMPC(cfg) if "offline" in cfg.pretrained_model_path: - policy.step = 25000 + policy.step[0] = 25000 elif "final" in cfg.pretrained_model_path: - policy.step = 100000 + policy.step[0] = 100000 else: raise NotImplementedError() policy.load(cfg.pretrained_model_path) diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 6af8656ea..40b9d30ac 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -46,6 +46,7 @@ def train(cfg: dict, out_dir=None, job_name=None): raise NotImplementedError() assert torch.cuda.is_available() + torch.backends.cudnn.benchmark = True set_seed(cfg.seed) print(colored("Work dir:", "yellow", attrs=["bold"]), out_dir) @@ -55,9 +56,9 @@ def train(cfg: dict, out_dir=None, job_name=None): # TODO(rcadene): hack for old pretrained models from fowm if "fowm" in cfg.pretrained_model_path: if "offline" in cfg.pretrained_model_path: - policy.step = 25000 + policy.step[0] = 25000 elif "final" in cfg.pretrained_model_path: - policy.step = 100000 + policy.step[0] = 100000 else: raise NotImplementedError() policy.load(cfg.pretrained_model_path)