wip: still needs batch logic for act and tdmp

2024-03-14 15:22:55 +00:00
parent 8c56770318
commit ba91976944
11 changed files with 240 additions and 100 deletions
--- a/lerobot/common/envs/pusht/env.py
+++ b/lerobot/common/envs/pusht/env.py
@@ -2,7 +2,6 @@ import importlib
 from collections import deque
 from typing import Optional

-import einops
 import torch
 from tensordict import TensorDict
 from torchrl.data.tensor_specs import (
@@ -120,40 +119,30 @@ class PushtEnv(AbstractEnv):
    def _step(self, tensordict: TensorDict):
        td = tensordict
        action = td["action"].numpy()
-        # step expects shape=(4,) so we pad if necessary
+        assert action.ndim == 1
        # TODO(rcadene): add info["is_success"] and info["success"] ?
-        sum_reward = 0

-        if action.ndim == 1:
-            action = einops.repeat(action, "c -> t c", t=self.frame_skip)
-        else:
-            if self.frame_skip > 1:
-                raise NotImplementedError()
+        raw_obs, reward, done, info = self._env.step(action)

-        num_action_steps = action.shape[0]
-        for i in range(num_action_steps):
-            raw_obs, reward, done, info = self._env.step(action[i])
-            sum_reward += reward
+        obs = self._format_raw_obs(raw_obs)

-            obs = self._format_raw_obs(raw_obs)
+        if self.num_prev_obs > 0:
+            stacked_obs = {}
+            if "image" in obs:
+                self._prev_obs_image_queue.append(obs["image"])
+                stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
+            if "state" in obs:
+                self._prev_obs_state_queue.append(obs["state"])
+                stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
+            obs = stacked_obs

-            if self.num_prev_obs > 0:
-                stacked_obs = {}
-                if "image" in obs:
-                    self._prev_obs_image_queue.append(obs["image"])
-                    stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
-                if "state" in obs:
-                    self._prev_obs_state_queue.append(obs["state"])
-                    stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
-                obs = stacked_obs
-
-            self.call_rendering_hooks()
+        self.call_rendering_hooks()

        td = TensorDict(
            {
                "observation": TensorDict(obs, batch_size=[]),
-                "reward": torch.tensor([sum_reward], dtype=torch.float32),
-                # succes and done are true when coverage > self.success_threshold in env
+                "reward": torch.tensor([reward], dtype=torch.float32),
+                # success and done are true when coverage > self.success_threshold in env
                "done": torch.tensor([done], dtype=torch.bool),
                "success": torch.tensor([done], dtype=torch.bool),
            },