[WIP] Non functional yet

Add ManiSkill environment configuration and wrappers - Introduced `VideoRecordConfig` for video recording settings. - Added `ManiskillEnvConfig` to encapsulate environment-specific configurations. - Implemented various wrappers for the ManiSkill environment, including observation and action scaling. - Enhanced the `make_maniskill` function to create a wrapped ManiSkill environment with video recording and observation processing. - Updated the `actor_server` and `learner_server` to utilize the new configuration structure. - Refactored the training pipeline to accommodate the new environment and policy configurations.
2025-03-26 08:15:05 +00:00
parent 114ec644d0
commit 056f79d358
9 changed files with 667 additions and 436 deletions
--- a/lerobot/common/utils/wandb_utils.py
+++ b/lerobot/common/utils/wandb_utils.py
@@ -92,6 +92,8 @@ class WandBLogger:
            resume="must" if cfg.resume else None,
            mode=self.cfg.mode if self.cfg.mode in ["online", "offline", "disabled"] else "online",
        )
+        # Handle custom step key for rl asynchronous training.
+        self._wandb_custom_step_key: set[str] | None = None
        print(colored("Logs will be synced with wandb.", "blue", attrs=["bold"]))
        logging.info(f"Track this run --> {colored(wandb.run.get_url(), 'yellow', attrs=['bold'])}")
        self._wandb = wandb
@@ -108,9 +110,24 @@ class WandBLogger:
        artifact.add_file(checkpoint_dir / PRETRAINED_MODEL_DIR / SAFETENSORS_SINGLE_FILE)
        self._wandb.log_artifact(artifact)

-    def log_dict(self, d: dict, step: int, mode: str = "train"):
+    def log_dict(self, d: dict, step: int, mode: str = "train", custom_step_key: str | None = None):
        if mode not in {"train", "eval"}:
            raise ValueError(mode)
+        if step is None and custom_step_key is None:
+                    raise ValueError("Either step or custom_step_key must be provided.")
+
+        # NOTE: This is not simple. Wandb step is it must always monotonically increase and it
+        # increases with each wandb.log call, but in the case of asynchronous RL for example,
+        # multiple time steps is possible for example, the interaction step with the environment,
+        # the training step, the evaluation step, etc. So we need to define a custom step key
+        # to log the correct step for each metric.
+        if custom_step_key is not None:
+            if self._wandb_custom_step_key is None:
+                self._wandb_custom_step_key = set()
+            new_custom_key = f"{mode}/{custom_step_key}"
+            if new_custom_key not in self._wandb_custom_step_key:
+                self._wandb_custom_step_key.add(new_custom_key)
+                self._wandb.define_metric(new_custom_key, hidden=True)

        for k, v in d.items():
            if not isinstance(v, (int, float, str)):
@@ -118,7 +135,26 @@ class WandBLogger:
                    f'WandB logging of key "{k}" was ignored as its type is not handled by this wrapper.'
                )
                continue
-            self._wandb.log({f"{mode}/{k}": v}, step=step)
+
+            # Do not log the custom step key itself.
+            if (
+                self._wandb_custom_step_key is not None
+                and k in self._wandb_custom_step_key
+            ):
+                continue
+
+            if custom_step_key is not None:
+                value_custom_step = d[custom_step_key]
+                self._wandb.log(
+                    {
+                        f"{mode}/{k}": v,
+                        f"{mode}/{custom_step_key}": value_custom_step,
+                    }
+                )
+                continue
+
+            self._wandb.log(data={f"{mode}/{k}": v}, step=step)
+

    def log_video(self, video_path: str, step: int, mode: str = "train"):
        if mode not in {"train", "eval"}: