Merge remote-tracking branch 'Cadene/user/rcadene/2024_03_31_remove_torchrl' into refactor_act_remove_torchrl

2024-04-05 11:41:11 +01:00
parent edb125b351 5af00d0c1e
commit 9d77f5773d
21 changed files with 1303 additions and 1370 deletions
--- a/lerobot/common/policies/diffusion/diffusion_unet_image_policy.py
+++ b/lerobot/common/policies/diffusion/diffusion_unet_image_policy.py
@@ -243,10 +243,9 @@ class DiffusionUnetImagePolicy(BaseImagePolicy):
        result = {"action": action, "action_pred": action_pred}
        return result

-    def compute_loss(self, batch):
-        assert "valid_mask" not in batch
-        nobs = batch["obs"]
-        nactions = batch["action"]
+    def compute_loss(self, obs_dict, action):
+        nobs = obs_dict
+        nactions = action
        batch_size = nactions.shape[0]
        horizon = nactions.shape[1]

--- a/lerobot/common/policies/diffusion/policy.py
+++ b/lerobot/common/policies/diffusion/policy.py
@@ -1,18 +1,20 @@
 import copy
 import logging
 import time
+from collections import deque

 import hydra
 import torch
+from torch import nn

-from lerobot.common.policies.abstract import AbstractPolicy
 from lerobot.common.policies.diffusion.diffusion_unet_image_policy import DiffusionUnetImagePolicy
 from lerobot.common.policies.diffusion.model.lr_scheduler import get_scheduler
 from lerobot.common.policies.diffusion.model.multi_image_obs_encoder import MultiImageObsEncoder, RgbEncoder
+from lerobot.common.policies.utils import populate_queues
 from lerobot.common.utils import get_safe_torch_device


-class DiffusionPolicy(AbstractPolicy):
+class DiffusionPolicy(nn.Module):
    name = "diffusion"

    def __init__(
@@ -38,8 +40,12 @@ class DiffusionPolicy(AbstractPolicy):
        # parameters passed to step
        **kwargs,
    ):
-        super().__init__(n_action_steps)
+        super().__init__()
        self.cfg = cfg
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        # queues are populated during rollout of the policy, they contain the n latest observations and actions
+        self._queues = None

        noise_scheduler = hydra.utils.instantiate(cfg_noise_scheduler)
        rgb_model_input_shape = copy.deepcopy(shape_meta.obs.image.shape)
@@ -100,76 +106,59 @@ class DiffusionPolicy(AbstractPolicy):
            last_epoch=self.global_step - 1,
        )

+    def reset(self):
+        """
+        Clear observation and action queues. Should be called on `env.reset()`
+        """
+        self._queues = {
+            "observation.image": deque(maxlen=self.n_obs_steps),
+            "observation.state": deque(maxlen=self.n_obs_steps),
+            "action": deque(maxlen=self.n_action_steps),
+        }
+
    @torch.no_grad()
-    def select_actions(self, observation, step_count):
+    def select_action(self, batch, step):
        """
        Note: this uses the ema model weights if self.training == False, otherwise the non-ema model weights.
        """
-        # TODO(rcadene): remove unused step_count
-        del step_count
+        # TODO(rcadene): remove unused step
+        del step
+        assert "observation.image" in batch
+        assert "observation.state" in batch
+        assert len(batch) == 2

-        obs_dict = {
-            "image": observation["image"],
-            "agent_pos": observation["state"],
-        }
-        if self.training:
-            out = self.diffusion.predict_action(obs_dict)
-        else:
-            out = self.ema_diffusion.predict_action(obs_dict)
-        action = out["action"]
+        self._queues = populate_queues(self._queues, batch)
+
+        if len(self._queues["action"]) == 0:
+            # stack n latest observations from the queue
+            batch = {key: torch.stack(list(self._queues[key]), dim=1) for key in batch}
+
+            obs_dict = {
+                "image": batch["observation.image"],
+                "agent_pos": batch["observation.state"],
+            }
+            if self.training:
+                out = self.diffusion.predict_action(obs_dict)
+            else:
+                out = self.ema_diffusion.predict_action(obs_dict)
+            self._queues["action"].extend(out["action"].transpose(0, 1))
+
+        action = self._queues["action"].popleft()
        return action

-    def update(self, replay_buffer, step):
+    def forward(self, batch, step):
        start_time = time.time()

        self.diffusion.train()

-        num_slices = self.cfg.batch_size
-        batch_size = self.cfg.horizon * num_slices
-
-        assert batch_size % self.cfg.horizon == 0
-        assert batch_size % num_slices == 0
-
-        def process_batch(batch, horizon, num_slices):
-            # trajectory t = 64, horizon h = 16
-            # (t h) ... -> t h ...
-            batch = batch.reshape(num_slices, horizon)  # .transpose(1, 0).contiguous()
-
-            # |-1|0|1|2|3|4|5|6|7|8|9|10|11|12|13|14| timestamps: 16
-            #  |o|o|                                  observations: 2
-            #  | |a|a|a|a|a|a|a|a|                    actions executed: 8
-            #  |p|p|p|p|p|p|p|p|p|p|p| p| p| p| p| p| actions predicted: 16
-            # note: we predict the action needed to go from t=-1 to t=0 similarly to an inverse kinematic model
-
-            image = batch["observation", "image"]
-            state = batch["observation", "state"]
-            action = batch["action"]
-            assert image.shape[1] == horizon
-            assert state.shape[1] == horizon
-            assert action.shape[1] == horizon
-
-            if not (horizon == 16 and self.cfg.n_obs_steps == 2):
-                raise NotImplementedError()
-
-            # keep first 2 observations of the slice corresponding to t=[-1,0]
-            image = image[:, : self.cfg.n_obs_steps]
-            state = state[:, : self.cfg.n_obs_steps]
-
-            out = {
-                "obs": {
-                    "image": image.to(self.device, non_blocking=True),
-                    "agent_pos": state.to(self.device, non_blocking=True),
-                },
-                "action": action.to(self.device, non_blocking=True),
-            }
-            return out
-
-        batch = replay_buffer.sample(batch_size)
-        batch = process_batch(batch, self.cfg.horizon, num_slices)
-
        data_s = time.time() - start_time

-        loss = self.diffusion.compute_loss(batch)
+        obs_dict = {
+            "image": batch["observation.image"],
+            "agent_pos": batch["observation.state"],
+        }
+        action = batch["action"]
+        loss = self.diffusion.compute_loss(obs_dict, action)
        loss.backward()

        grad_norm = torch.nn.utils.clip_grad_norm_(