Refactor eval.py (#127)

2024-05-03 17:33:16 +01:00
parent b7b69fcc3d
commit bccee745c3
12 changed files with 457 additions and 298 deletions
--- a/lerobot/common/envs/factory.py
+++ b/lerobot/common/envs/factory.py
@@ -1,13 +1,17 @@
 import importlib

 import gymnasium as gym
+from omegaconf import DictConfig


-def make_env(cfg, num_parallel_envs=0) -> gym.Env | gym.vector.SyncVectorEnv:
-    """
-    Note: When `num_parallel_envs > 0`, this function returns a `SyncVectorEnv` which takes batched action as input and
-    returns batched observation, reward, terminated, truncated of `num_parallel_envs` items.
+def make_env(cfg: DictConfig, n_envs: int | None = None) -> gym.vector.VectorEnv:
+    """Makes a gym vector environment according to the evaluation config.
+
+    n_envs can be used to override eval.batch_size in the configuration. Must be at least 1.
    """
+    if n_envs is not None and n_envs < 1:
+        raise ValueError("`n_envs must be at least 1")
+
    kwargs = {
        "obs_type": "pixels_agent_pos",
        "render_mode": "rgb_array",
@@ -28,16 +32,13 @@ def make_env(cfg, num_parallel_envs=0) -> gym.Env | gym.vector.SyncVectorEnv:

    gym_handle = f"{package_name}/{cfg.env.task}"

-    if num_parallel_envs == 0:
-        # non-batched version of the env that returns an observation of shape (c)
-        env = gym.make(gym_handle, disable_env_checker=True, **kwargs)
-    else:
-        # batched version of the env that returns an observation of shape (b, c)
-        env = gym.vector.SyncVectorEnv(
-            [
-                lambda: gym.make(gym_handle, disable_env_checker=True, **kwargs)
-                for _ in range(num_parallel_envs)
-            ]
-        )
+    # batched version of the env that returns an observation of shape (b, c)
+    env_cls = gym.vector.AsyncVectorEnv if cfg.eval.use_async_envs else gym.vector.SyncVectorEnv
+    env = env_cls(
+        [
+            lambda: gym.make(gym_handle, disable_env_checker=True, **kwargs)
+            for _ in range(n_envs if n_envs is not None else cfg.eval.batch_size)
+        ]
+    )

    return env
--- a/lerobot/common/envs/utils.py
+++ b/lerobot/common/envs/utils.py
@@ -1,15 +1,23 @@
 import einops
+import numpy as np
 import torch
+from torch import Tensor


-def preprocess_observation(observation):
+def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]:
+    """Convert environment observation to LeRobot format observation.
+    Args:
+        observation: Dictionary of observation batches from a Gym vector environment.
+    Returns:
+        Dictionary of observation batches with keys renamed to LeRobot format and values as tensors.
+    """
    # map to expected inputs for the policy
-    obs = {}
+    return_observations = {}

-    if isinstance(observation["pixels"], dict):
-        imgs = {f"observation.images.{key}": img for key, img in observation["pixels"].items()}
+    if isinstance(observations["pixels"], dict):
+        imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()}
    else:
-        imgs = {"observation.image": observation["pixels"]}
+        imgs = {"observation.image": observations["pixels"]}

    for imgkey, img in imgs.items():
        img = torch.from_numpy(img)
@@ -26,17 +34,10 @@ def preprocess_observation(observation):
        img = img.type(torch.float32)
        img /= 255

-        obs[imgkey] = img
+        return_observations[imgkey] = img

-    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing requirement for "agent_pos"
-    obs["observation.state"] = torch.from_numpy(observation["agent_pos"]).float()
+    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
+    # requirement for "agent_pos"
+    return_observations["observation.state"] = torch.from_numpy(observations["agent_pos"]).float()

-    return obs
-
-
-def postprocess_action(action):
-    action = action.to("cpu").numpy()
-    assert (
-        action.ndim == 2
-    ), "we assume dimensions are respectively the number of parallel envs, action dimensions"
-    return action
+    return return_observations
--- a/lerobot/common/logger.py
+++ b/lerobot/common/logger.py
@@ -115,7 +115,7 @@ class Logger:
            for k, v in d.items():
                self._wandb.log({f"{mode}/{k}": v}, step=step)

-    def log_video(self, video, step, mode="train"):
+    def log_video(self, video_path: str, step: int, mode: str = "train"):
        assert mode in {"train", "eval"}
-        wandb_video = self._wandb.Video(video, fps=self._cfg.fps, format="mp4")
+        wandb_video = self._wandb.Video(video_path, fps=self._cfg.fps, format="mp4")
        self._wandb.log({f"{mode}/video": wandb_video}, step=step)