Simplify configs (#550)

Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: HUANG TZU-CHUN <137322177+tc-huang@users.noreply.github.com>
2025-01-31 13:57:37 +01:00
parent 1ee1acf8ad
commit 3c0a209f9f
119 changed files with 5761 additions and 5466 deletions
--- a/lerobot/common/policies/normalize.py
+++ b/lerobot/common/policies/normalize.py
@@ -16,10 +16,12 @@
 import torch
 from torch import Tensor, nn

+from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature
+

 def create_stats_buffers(
-    shapes: dict[str, list[int]],
-    modes: dict[str, str],
+    features: dict[str, PolicyFeature],
+    norm_map: dict[str, NormalizationMode],
    stats: dict[str, dict[str, Tensor]] | None = None,
 ) -> dict[str, dict[str, nn.ParameterDict]]:
    """
@@ -34,12 +36,16 @@ def create_stats_buffers(
    """
    stats_buffers = {}

-    for key, mode in modes.items():
-        assert mode in ["mean_std", "min_max"]
+    for key, ft in features.items():
+        norm_mode = norm_map.get(ft.type, NormalizationMode.IDENTITY)
+        if norm_mode is NormalizationMode.IDENTITY:
+            continue

-        shape = tuple(shapes[key])
+        assert isinstance(norm_mode, NormalizationMode)

-        if "image" in key:
+        shape = tuple(ft.shape)
+
+        if ft.type is FeatureType.VISUAL:
            # sanity checks
            assert len(shape) == 3, f"number of dimensions of {key} != 3 ({shape=}"
            c, h, w = shape
@@ -52,7 +58,7 @@ def create_stats_buffers(
        # we assert they are not infinity anymore.

        buffer = {}
-        if mode == "mean_std":
+        if norm_mode is NormalizationMode.MEAN_STD:
            mean = torch.ones(shape, dtype=torch.float32) * torch.inf
            std = torch.ones(shape, dtype=torch.float32) * torch.inf
            buffer = nn.ParameterDict(
@@ -61,7 +67,7 @@ def create_stats_buffers(
                    "std": nn.Parameter(std, requires_grad=False),
                }
            )
-        elif mode == "min_max":
+        elif norm_mode is NormalizationMode.MIN_MAX:
            min = torch.ones(shape, dtype=torch.float32) * torch.inf
            max = torch.ones(shape, dtype=torch.float32) * torch.inf
            buffer = nn.ParameterDict(
@@ -71,15 +77,15 @@ def create_stats_buffers(
                }
            )

-        if stats is not None:
+        if stats:
            # Note: The clone is needed to make sure that the logic in save_pretrained doesn't see duplicated
            # tensors anywhere (for example, when we use the same stats for normalization and
            # unnormalization). See the logic here
            # https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L97.
-            if mode == "mean_std":
+            if norm_mode is NormalizationMode.MEAN_STD:
                buffer["mean"].data = stats[key]["mean"].clone()
                buffer["std"].data = stats[key]["std"].clone()
-            elif mode == "min_max":
+            elif norm_mode is NormalizationMode.MIN_MAX:
                buffer["min"].data = stats[key]["min"].clone()
                buffer["max"].data = stats[key]["max"].clone()

@@ -99,8 +105,8 @@ class Normalize(nn.Module):

    def __init__(
        self,
-        shapes: dict[str, list[int]],
-        modes: dict[str, str],
+        features: dict[str, PolicyFeature],
+        norm_map: dict[str, NormalizationMode],
        stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        """
@@ -122,10 +128,10 @@ class Normalize(nn.Module):
                dataset is not needed to get the stats, since they are already in the policy state_dict.
        """
        super().__init__()
-        self.shapes = shapes
-        self.modes = modes
+        self.features = features
+        self.norm_map = norm_map
        self.stats = stats
-        stats_buffers = create_stats_buffers(shapes, modes, stats)
+        stats_buffers = create_stats_buffers(features, norm_map, stats)
        for key, buffer in stats_buffers.items():
            setattr(self, "buffer_" + key.replace(".", "_"), buffer)

@@ -133,16 +139,20 @@ class Normalize(nn.Module):
    @torch.no_grad
    def forward(self, batch: dict[str, Tensor]) -> dict[str, Tensor]:
        batch = dict(batch)  # shallow copy avoids mutating the input batch
-        for key, mode in self.modes.items():
+        for key, ft in self.features.items():
+            norm_mode = self.norm_map.get(ft.type, NormalizationMode.IDENTITY)
+            if norm_mode is NormalizationMode.IDENTITY:
+                continue
+
            buffer = getattr(self, "buffer_" + key.replace(".", "_"))

-            if mode == "mean_std":
+            if norm_mode is NormalizationMode.MEAN_STD:
                mean = buffer["mean"]
                std = buffer["std"]
                assert not torch.isinf(mean).any(), _no_stats_error_str("mean")
                assert not torch.isinf(std).any(), _no_stats_error_str("std")
                batch[key] = (batch[key] - mean) / (std + 1e-8)
-            elif mode == "min_max":
+            elif norm_mode is NormalizationMode.MIN_MAX:
                min = buffer["min"]
                max = buffer["max"]
                assert not torch.isinf(min).any(), _no_stats_error_str("min")
@@ -152,7 +162,7 @@ class Normalize(nn.Module):
                # normalize to [-1, 1]
                batch[key] = batch[key] * 2 - 1
            else:
-                raise ValueError(mode)
+                raise ValueError(norm_mode)
        return batch


@@ -164,8 +174,8 @@ class Unnormalize(nn.Module):

    def __init__(
        self,
-        shapes: dict[str, list[int]],
-        modes: dict[str, str],
+        features: dict[str, PolicyFeature],
+        norm_map: dict[str, NormalizationMode],
        stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        """
@@ -187,11 +197,11 @@ class Unnormalize(nn.Module):
                dataset is not needed to get the stats, since they are already in the policy state_dict.
        """
        super().__init__()
-        self.shapes = shapes
-        self.modes = modes
+        self.features = features
+        self.norm_map = norm_map
        self.stats = stats
        # `self.buffer_observation_state["mean"]` contains `torch.tensor(state_dim)`
-        stats_buffers = create_stats_buffers(shapes, modes, stats)
+        stats_buffers = create_stats_buffers(features, norm_map, stats)
        for key, buffer in stats_buffers.items():
            setattr(self, "buffer_" + key.replace(".", "_"), buffer)

@@ -199,16 +209,20 @@ class Unnormalize(nn.Module):
    @torch.no_grad
    def forward(self, batch: dict[str, Tensor]) -> dict[str, Tensor]:
        batch = dict(batch)  # shallow copy avoids mutating the input batch
-        for key, mode in self.modes.items():
+        for key, ft in self.features.items():
+            norm_mode = self.norm_map.get(ft.type, NormalizationMode.IDENTITY)
+            if norm_mode is NormalizationMode.IDENTITY:
+                continue
+
            buffer = getattr(self, "buffer_" + key.replace(".", "_"))

-            if mode == "mean_std":
+            if norm_mode is NormalizationMode.MEAN_STD:
                mean = buffer["mean"]
                std = buffer["std"]
                assert not torch.isinf(mean).any(), _no_stats_error_str("mean")
                assert not torch.isinf(std).any(), _no_stats_error_str("std")
                batch[key] = batch[key] * std + mean
-            elif mode == "min_max":
+            elif norm_mode is NormalizationMode.MIN_MAX:
                min = buffer["min"]
                max = buffer["max"]
                assert not torch.isinf(min).any(), _no_stats_error_str("min")
@@ -216,5 +230,5 @@ class Unnormalize(nn.Module):
                batch[key] = (batch[key] + 1) / 2
                batch[key] = batch[key] * (max - min) + min
            else:
-                raise ValueError(mode)
+                raise ValueError(norm_mode)
        return batch