Enable logging all the information returned by the forward methods of policies (#151)

2024-05-10 07:45:32 +01:00
parent b187942db4
commit 1249aee3ac
5 changed files with 12 additions and 4 deletions
--- a/lerobot/common/logger.py
+++ b/lerobot/common/logger.py
@@ -114,6 +114,11 @@ class Logger:
        assert mode in {"train", "eval"}
        if self._wandb is not None:
            for k, v in d.items():
+                if not isinstance(v, (int, float, str)):
+                    logging.warning(
+                        f'WandB logging of key "{k}" was ignored as its type is not handled by this wrapper.'
+                    )
+                    continue
                self._wandb.log({f"{mode}/{k}": v}, step=step)

    def log_video(self, video_path: str, step: int, mode: str = "train"):
--- a/lerobot/common/policies/act/modeling_act.py
+++ b/lerobot/common/policies/act/modeling_act.py
@@ -101,7 +101,7 @@ class ACTPolicy(nn.Module, PyTorchModelHubMixin):
            F.l1_loss(batch["action"], actions_hat, reduction="none") * ~batch["action_is_pad"].unsqueeze(-1)
        ).mean()

-        loss_dict = {"l1_loss": l1_loss}
+        loss_dict = {"l1_loss": l1_loss.item()}
        if self.config.use_vae:
            # Calculate Dₖₗ(latent_pdf || standard_normal). Note: After computing the KL-divergence for
            # each dimension independently, we sum over the latent dimension to get the total
@@ -110,7 +110,7 @@ class ACTPolicy(nn.Module, PyTorchModelHubMixin):
            mean_kld = (
                (-0.5 * (1 + log_sigma_x2_hat - mu_hat.pow(2) - (log_sigma_x2_hat).exp())).sum(-1).mean()
            )
-            loss_dict["kld_loss"] = mean_kld
+            loss_dict["kld_loss"] = mean_kld.item()
            loss_dict["loss"] = l1_loss + mean_kld * self.config.kl_weight
        else:
            loss_dict["loss"] = l1_loss
--- a/lerobot/common/policies/policy_protocol.py
+++ b/lerobot/common/policies/policy_protocol.py
@@ -38,7 +38,8 @@ class Policy(Protocol):
    def forward(self, batch: dict[str, Tensor]) -> dict:
        """Run the batch through the model and compute the loss for training or validation.

-        Returns a dictionary with "loss" and maybe other information.
+        Returns a dictionary with "loss" and potentially other information. Apart from "loss" which is a Tensor, all
+        other items should be logging-friendly, native Python types.
        """

    def select_action(self, batch: dict[str, Tensor]):