From aed02dc7c6159888c9fbff99001dda33e5771446 Mon Sep 17 00:00:00 2001
From: Cadene <re.cadene@gmail.com>
Date: Sat, 24 Feb 2024 18:18:39 +0000
Subject: [PATCH] Add multithreading for video generation, Speed policy
 sampling

---
 README.md                | 29 +++++++++++++++++++++++++++++
 lerobot/common/tdmpc.py  | 12 +++++++++++-
 lerobot/scripts/eval.py  | 19 ++++++++++++++++---
 lerobot/scripts/train.py |  5 +++--
 4 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index fba7be33a..ce4f3dd43 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,35 @@ python lerobot/scripts/eval.py \
 - [ ] add diffusion
 - [ ] add aloha 2
 
+## Profile
+
+**Example**
+```python
+from torch.profiler import profile, record_function, ProfilerActivity
+
+def trace_handler(prof):
+    prof.export_chrome_trace(f"tmp/trace_schedule_{prof.step_num}.json")
+
+with profile(
+    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+    schedule=torch.profiler.schedule(
+        wait=2,
+        warmup=2,
+        active=3,
+    ),
+    on_trace_ready=trace_handler
+) as prof:
+    with record_function("eval_policy"):
+        for i in range(num_episodes):
+            prof.step()
+```
+
+```bash
+python lerobot/scripts/eval.py \
+pretrained_model_path=/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/final.pt \
+eval_episodes=7
+```
+
 ## Contribute
 
 **style**
diff --git a/lerobot/common/tdmpc.py b/lerobot/common/tdmpc.py
index 7ceff4563..902673aa1 100644
--- a/lerobot/common/tdmpc.py
+++ b/lerobot/common/tdmpc.py
@@ -51,6 +51,11 @@ class TOLD(nn.Module):
         """Predicts next latent state (d) and single-step reward (R)."""
         x = torch.cat([z, a], dim=-1)
         return self._dynamics(x), self._reward(x)
+    
+    def next_dynamics(self, z, a):
+        """Predicts next latent state (d)."""
+        x = torch.cat([z, a], dim=-1)
+        return self._dynamics(x)
 
     def pi(self, z, std=0):
         """Samples an action from the learned policy (pi)."""
@@ -191,7 +196,7 @@ class TDMPC(nn.Module):
             _z = z.repeat(num_pi_trajs, 1)
             for t in range(horizon):
                 pi_actions[t] = self.model.pi(_z, self.cfg.min_std)
-                _z, _ = self.model.next(_z, pi_actions[t])
+                _z = self.model.next_dynamics(_z, pi_actions[t])
 
         # Initialize state and parameters
         z = z.repeat(self.cfg.num_samples + num_pi_trajs, 1)
@@ -241,6 +246,11 @@ class TDMPC(nn.Module):
             mean, std = self.cfg.momentum * mean + (1 - self.cfg.momentum) * _mean, _std
 
         # Outputs
+        # TODO(rcadene): remove numpy with
+        # # Convert score tensor to probabilities using softmax
+        # probabilities = torch.softmax(score, dim=0)
+        # # Generate a random sample index based on the probabilities
+        # sample_index = torch.multinomial(probabilities, 1).item()
         score = score.squeeze(1).cpu().numpy()
         actions = elite_actions[:, np.random.choice(np.arange(score.shape[0]), p=score)]
         self._prev_mean = mean
diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py
index 663f55e82..62d482e09 100644
--- a/lerobot/scripts/eval.py
+++ b/lerobot/scripts/eval.py
@@ -11,7 +11,10 @@ from torchrl.envs import EnvBase
 from lerobot.common.envs.factory import make_env
 from lerobot.common.tdmpc import TDMPC
 from lerobot.common.utils import set_seed
+import threading
 
+def write_video(video_path, stacked_frames, fps):
+    imageio.mimsave(video_path, stacked_frames, fps=fps)
 
 def eval_policy(
     env: EnvBase,
@@ -29,6 +32,7 @@ def eval_policy(
     sum_rewards = []
     max_rewards = []
     successes = []
+    threads = []
     for i in range(num_episodes):
         ep_frames = []
 
@@ -63,7 +67,12 @@ def eval_policy(
             if save_video:
                 video_dir.mkdir(parents=True, exist_ok=True)
                 video_path = video_dir / f"eval_episode_{i}.mp4"
-                imageio.mimsave(video_path, stacked_frames, fps=fps)
+                thread = threading.Thread(
+                    target=write_video,
+                    args=(str(video_path), stacked_frames, fps),
+                )
+                thread.start()
+                threads.append(thread)
 
             first_episode = i == 0
             if wandb and first_episode:
@@ -72,6 +81,9 @@ def eval_policy(
                 )
                 wandb.log({"eval_video": eval_video}, step=env_step)
 
+    for thread in threads:
+        thread.join()
+
     metrics = {
         "avg_sum_reward": np.nanmean(sum_rewards),
         "avg_max_reward": np.nanmean(max_rewards),
@@ -90,6 +102,7 @@ def eval(cfg: dict, out_dir=None):
         raise NotImplementedError()
 
     assert torch.cuda.is_available()
+    torch.backends.cudnn.benchmark = True
     set_seed(cfg.seed)
     print(colored("Log dir:", "yellow", attrs=["bold"]), out_dir)
 
@@ -98,9 +111,9 @@ def eval(cfg: dict, out_dir=None):
     if cfg.pretrained_model_path:
         policy = TDMPC(cfg)
         if "offline" in cfg.pretrained_model_path:
-            policy.step = 25000
+            policy.step[0] = 25000
         elif "final" in cfg.pretrained_model_path:
-            policy.step = 100000
+            policy.step[0] = 100000
         else:
             raise NotImplementedError()
         policy.load(cfg.pretrained_model_path)
diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py
index 6af8656ea..40b9d30ac 100644
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -46,6 +46,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
         raise NotImplementedError()
 
     assert torch.cuda.is_available()
+    torch.backends.cudnn.benchmark = True
     set_seed(cfg.seed)
     print(colored("Work dir:", "yellow", attrs=["bold"]), out_dir)
 
@@ -55,9 +56,9 @@ def train(cfg: dict, out_dir=None, job_name=None):
         # TODO(rcadene): hack for old pretrained models from fowm
         if "fowm" in cfg.pretrained_model_path:
             if "offline" in cfg.pretrained_model_path:
-                policy.step = 25000
+                policy.step[0] = 25000
             elif "final" in cfg.pretrained_model_path:
-                policy.step = 100000
+                policy.step[0] = 100000
             else:
                 raise NotImplementedError()
         policy.load(cfg.pretrained_model_path)