Train diffusion pusht_keypoints (#307)

Co-authored-by: Remi <re.cadene@gmail.com>
2024-07-09 12:35:50 +01:00
parent a4d77b99f0
commit cc2f6e7404
4 changed files with 206 additions and 56 deletions
--- a/lerobot/common/envs/utils.py
+++ b/lerobot/common/envs/utils.py
@@ -28,31 +28,35 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten
    """
    # map to expected inputs for the policy
    return_observations = {}
+    if "pixels" in observations:
+        if isinstance(observations["pixels"], dict):
+            imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()}
+        else:
+            imgs = {"observation.image": observations["pixels"]}

-    if isinstance(observations["pixels"], dict):
-        imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()}
-    else:
-        imgs = {"observation.image": observations["pixels"]}
+        for imgkey, img in imgs.items():
+            img = torch.from_numpy(img)

-    for imgkey, img in imgs.items():
-        img = torch.from_numpy(img)
+            # sanity check that images are channel last
+            _, h, w, c = img.shape
+            assert c < h and c < w, f"expect channel first images, but instead {img.shape}"

-        # sanity check that images are channel last
-        _, h, w, c = img.shape
-        assert c < h and c < w, f"expect channel first images, but instead {img.shape}"
+            # sanity check that images are uint8
+            assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}"

-        # sanity check that images are uint8
-        assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}"
+            # convert to channel first of type float32 in range [0,1]
+            img = einops.rearrange(img, "b h w c -> b c h w").contiguous()
+            img = img.type(torch.float32)
+            img /= 255

-        # convert to channel first of type float32 in range [0,1]
-        img = einops.rearrange(img, "b h w c -> b c h w").contiguous()
-        img = img.type(torch.float32)
-        img /= 255
+            return_observations[imgkey] = img

-        return_observations[imgkey] = img
+    if "environment_state" in observations:
+        return_observations["observation.environment_state"] = torch.from_numpy(
+            observations["environment_state"]
+        ).float()

    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
    # requirement for "agent_pos"
    return_observations["observation.state"] = torch.from_numpy(observations["agent_pos"]).float()
-
    return return_observations