wip: still needs batch logic for act and tdmp

2024-03-14 15:22:55 +00:00
parent 8c56770318
commit ba91976944
11 changed files with 240 additions and 100 deletions
--- a/lerobot/common/envs/aloha/env.py
+++ b/lerobot/common/envs/aloha/env.py
@@ -168,42 +168,31 @@ class AlohaEnv(AbstractEnv):
    def _step(self, tensordict: TensorDict):
        td = tensordict
        action = td["action"].numpy()
-        # step expects shape=(4,) so we pad if necessary
+        assert action.ndim == 1
        # TODO(rcadene): add info["is_success"] and info["success"] ?
-        sum_reward = 0

-        if action.ndim == 1:
-            action = einops.repeat(action, "c -> t c", t=self.frame_skip)
-        else:
-            if self.frame_skip > 1:
-                raise NotImplementedError()
+        _, reward, _, raw_obs = self._env.step(action)

-        num_action_steps = action.shape[0]
-        for i in range(num_action_steps):
-            _, reward, discount, raw_obs = self._env.step(action[i])
-            del discount  # not used
+        # TODO(rcadene): add an enum
+        success = done = reward == 4
+        obs = self._format_raw_obs(raw_obs)

-            # TOOD(rcadene): add an enum
-            success = done = reward == 4
-            sum_reward += reward
-            obs = self._format_raw_obs(raw_obs)
+        if self.num_prev_obs > 0:
+            stacked_obs = {}
+            if "image" in obs:
+                self._prev_obs_image_queue.append(obs["image"]["top"])
+                stacked_obs["image"] = {"top": torch.stack(list(self._prev_obs_image_queue))}
+            if "state" in obs:
+                self._prev_obs_state_queue.append(obs["state"])
+                stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
+            obs = stacked_obs

-            if self.num_prev_obs > 0:
-                stacked_obs = {}
-                if "image" in obs:
-                    self._prev_obs_image_queue.append(obs["image"]["top"])
-                    stacked_obs["image"] = {"top": torch.stack(list(self._prev_obs_image_queue))}
-                if "state" in obs:
-                    self._prev_obs_state_queue.append(obs["state"])
-                    stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
-                obs = stacked_obs
-
-            self.call_rendering_hooks()
+        self.call_rendering_hooks()

        td = TensorDict(
            {
                "observation": TensorDict(obs, batch_size=[]),
-                "reward": torch.tensor([sum_reward], dtype=torch.float32),
+                "reward": torch.tensor([reward], dtype=torch.float32),
                # succes and done are true when coverage > self.success_threshold in env
                "done": torch.tensor([done], dtype=torch.bool),
                "success": torch.tensor([success], dtype=torch.bool),
--- a/lerobot/common/envs/factory.py
+++ b/lerobot/common/envs/factory.py
@@ -1,15 +1,17 @@
 from torchrl.envs.transforms import Compose, StepCounter, Transform, TransformedEnv


-def make_env(cfg, transform=None):
+def make_env(cfg, seed=None, transform=None):
+    """
+    Provide seed to override the seed in the cfg (useful for batched environments).
+    """
    kwargs = {
        "frame_skip": cfg.env.action_repeat,
        "from_pixels": cfg.env.from_pixels,
        "pixels_only": cfg.env.pixels_only,
        "image_size": cfg.env.image_size,
-        # TODO(rcadene): do we want a specific eval_env_seed?
-        "seed": cfg.seed,
        "num_prev_obs": cfg.n_obs_steps - 1,
+        "seed": seed if seed is not None else cfg.seed,
    }

    if cfg.env.name == "simxarm":
--- a/lerobot/common/envs/pusht/env.py
+++ b/lerobot/common/envs/pusht/env.py
@@ -2,7 +2,6 @@ import importlib
 from collections import deque
 from typing import Optional

-import einops
 import torch
 from tensordict import TensorDict
 from torchrl.data.tensor_specs import (
@@ -120,40 +119,30 @@ class PushtEnv(AbstractEnv):
    def _step(self, tensordict: TensorDict):
        td = tensordict
        action = td["action"].numpy()
-        # step expects shape=(4,) so we pad if necessary
+        assert action.ndim == 1
        # TODO(rcadene): add info["is_success"] and info["success"] ?
-        sum_reward = 0

-        if action.ndim == 1:
-            action = einops.repeat(action, "c -> t c", t=self.frame_skip)
-        else:
-            if self.frame_skip > 1:
-                raise NotImplementedError()
+        raw_obs, reward, done, info = self._env.step(action)

-        num_action_steps = action.shape[0]
-        for i in range(num_action_steps):
-            raw_obs, reward, done, info = self._env.step(action[i])
-            sum_reward += reward
+        obs = self._format_raw_obs(raw_obs)

-            obs = self._format_raw_obs(raw_obs)
+        if self.num_prev_obs > 0:
+            stacked_obs = {}
+            if "image" in obs:
+                self._prev_obs_image_queue.append(obs["image"])
+                stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
+            if "state" in obs:
+                self._prev_obs_state_queue.append(obs["state"])
+                stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
+            obs = stacked_obs

-            if self.num_prev_obs > 0:
-                stacked_obs = {}
-                if "image" in obs:
-                    self._prev_obs_image_queue.append(obs["image"])
-                    stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
-                if "state" in obs:
-                    self._prev_obs_state_queue.append(obs["state"])
-                    stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
-                obs = stacked_obs
-
-            self.call_rendering_hooks()
+        self.call_rendering_hooks()

        td = TensorDict(
            {
                "observation": TensorDict(obs, batch_size=[]),
-                "reward": torch.tensor([sum_reward], dtype=torch.float32),
-                # succes and done are true when coverage > self.success_threshold in env
+                "reward": torch.tensor([reward], dtype=torch.float32),
+                # success and done are true when coverage > self.success_threshold in env
                "done": torch.tensor([done], dtype=torch.bool),
                "success": torch.tensor([done], dtype=torch.bool),
            },