Reward classifier and training (#528)

Co-authored-by: Daniel Ritchie <daniel@brainwavecollective.ai> Co-authored-by: resolver101757 <kelster101757@hotmail.com> Co-authored-by: Jannik Grothusen <56967823+J4nn1K@users.noreply.github.com> Co-authored-by: Remi <re.cadene@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
2024-12-09 10:21:50 +01:00
parent b568de35ad
commit 58cc445921
10 changed files with 1131 additions and 7 deletions
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -318,7 +318,7 @@ class LeRobotDatasetMetadata:
        obj.root.mkdir(parents=True, exist_ok=False)

        if robot is not None:
-            features = get_features_from_robot(robot, use_videos)
+            features = {**(features or {}), **get_features_from_robot(robot)}
            robot_type = robot.robot_type
            if not all(cam.fps == fps for cam in robot.cameras.values()):
                logging.warning(
--- a/lerobot/common/logger.py
+++ b/lerobot/common/logger.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Borrowed from https://github.com/fyhMer/fowm/blob/main/src/logger.py
+
+# TODO(rcadene, alexander-soare): clean this file
+"""
+
+import logging
+import os
+import re
+from glob import glob
+from pathlib import Path
+
+import torch
+from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
+from omegaconf import DictConfig, OmegaConf
+from termcolor import colored
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import LRScheduler
+
+import wandb
+from lerobot.common.policies.policy_protocol import Policy
+from lerobot.common.utils.utils import get_global_random_state, set_global_random_state
+
+
+def log_output_dir(out_dir):
+    logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {out_dir}")
+
+
+def cfg_to_group(cfg: DictConfig, return_list: bool = False) -> list[str] | str:
+    """Return a group name for logging. Optionally returns group name as list."""
+    lst = [
+        f"policy:{cfg.policy.name}",
+        f"dataset:{cfg.dataset_repo_id}",
+        f"env:{cfg.env.name}",
+        f"seed:{cfg.seed}",
+    ]
+    return lst if return_list else "-".join(lst)
+
+
+def get_wandb_run_id_from_filesystem(checkpoint_dir: Path) -> str:
+    # Get the WandB run ID.
+    paths = glob(str(checkpoint_dir / "../wandb/latest-run/run-*"))
+    if len(paths) != 1:
+        raise RuntimeError("Couldn't get the previous WandB run ID for run resumption.")
+    match = re.search(r"run-([^\.]+).wandb", paths[0].split("/")[-1])
+    if match is None:
+        raise RuntimeError("Couldn't get the previous WandB run ID for run resumption.")
+    wandb_run_id = match.groups(0)[0]
+    return wandb_run_id
+
+
+class Logger:
+    """Primary logger object. Logs either locally or using wandb.
+
+    The logger creates the following directory structure:
+
+    provided_log_dir
+    ├── .hydra  # hydra's configuration cache
+    ├── checkpoints
+    │   ├── specific_checkpoint_name
+    │   │   ├── pretrained_model  # Hugging Face pretrained model directory
+    │   │   │   ├── ...
+    │   │   └── training_state.pth  # optimizer, scheduler, and random states + training step
+    |   ├── another_specific_checkpoint_name
+    │   │   ├── ...
+    |   ├── ...
+    │   └── last  # a softlink to the last logged checkpoint
+    """
+
+    pretrained_model_dir_name = "pretrained_model"
+    training_state_file_name = "training_state.pth"
+
+    def __init__(self, cfg: DictConfig, log_dir: str, wandb_job_name: str | None = None):
+        """
+        Args:
+            log_dir: The directory to save all logs and training outputs to.
+            job_name: The WandB job name.
+        """
+        self._cfg = cfg
+        self.log_dir = Path(log_dir)
+        self.log_dir.mkdir(parents=True, exist_ok=True)
+        self.checkpoints_dir = self.get_checkpoints_dir(log_dir)
+        self.last_checkpoint_dir = self.get_last_checkpoint_dir(log_dir)
+        self.last_pretrained_model_dir = self.get_last_pretrained_model_dir(log_dir)
+
+        # Set up WandB.
+        self._group = cfg_to_group(cfg)
+        project = cfg.get("wandb", {}).get("project")
+        entity = cfg.get("wandb", {}).get("entity")
+        enable_wandb = cfg.get("wandb", {}).get("enable", False)
+        run_offline = not enable_wandb or not project
+        if run_offline:
+            logging.info(colored("Logs will be saved locally.", "yellow", attrs=["bold"]))
+            self._wandb = None
+        else:
+            os.environ["WANDB_SILENT"] = "true"
+            wandb_run_id = None
+            if cfg.resume:
+                wandb_run_id = get_wandb_run_id_from_filesystem(self.checkpoints_dir)
+
+            wandb.init(
+                id=wandb_run_id,
+                project=project,
+                entity=entity,
+                name=wandb_job_name,
+                notes=cfg.get("wandb", {}).get("notes"),
+                tags=cfg_to_group(cfg, return_list=True),
+                dir=log_dir,
+                config=OmegaConf.to_container(cfg, resolve=True),
+                # TODO(rcadene): try set to True
+                save_code=False,
+                # TODO(rcadene): split train and eval, and run async eval with job_type="eval"
+                job_type="train_eval",
+                resume="must" if cfg.resume else None,
+            )
+            print(colored("Logs will be synced with wandb.", "blue", attrs=["bold"]))
+            logging.info(f"Track this run --> {colored(wandb.run.get_url(), 'yellow', attrs=['bold'])}")
+            self._wandb = wandb
+
+    @classmethod
+    def get_checkpoints_dir(cls, log_dir: str | Path) -> Path:
+        """Given the log directory, get the sub-directory in which checkpoints will be saved."""
+        return Path(log_dir) / "checkpoints"
+
+    @classmethod
+    def get_last_checkpoint_dir(cls, log_dir: str | Path) -> Path:
+        """Given the log directory, get the sub-directory in which the last checkpoint will be saved."""
+        return cls.get_checkpoints_dir(log_dir) / "last"
+
+    @classmethod
+    def get_last_pretrained_model_dir(cls, log_dir: str | Path) -> Path:
+        """
+        Given the log directory, get the sub-directory in which the last checkpoint's pretrained weights will
+        be saved.
+        """
+        return cls.get_last_checkpoint_dir(log_dir) / cls.pretrained_model_dir_name
+
+    def save_model(self, save_dir: Path, policy: Policy, wandb_artifact_name: str | None = None):
+        """Save the weights of the Policy model using PyTorchModelHubMixin.
+
+        The weights are saved in a folder called "pretrained_model" under the checkpoint directory.
+
+        Optionally also upload the model to WandB.
+        """
+        self.checkpoints_dir.mkdir(parents=True, exist_ok=True)
+        policy.save_pretrained(save_dir)
+        # Also save the full Hydra config for the env configuration.
+        OmegaConf.save(self._cfg, save_dir / "config.yaml")
+        if self._wandb and not self._cfg.wandb.disable_artifact:
+            # note wandb artifact does not accept ":" or "/" in its name
+            artifact = self._wandb.Artifact(wandb_artifact_name, type="model")
+            artifact.add_file(save_dir / SAFETENSORS_SINGLE_FILE)
+            self._wandb.log_artifact(artifact)
+        if self.last_checkpoint_dir.exists():
+            os.remove(self.last_checkpoint_dir)
+
+    def save_training_state(
+        self,
+        save_dir: Path,
+        train_step: int,
+        optimizer: Optimizer,
+        scheduler: LRScheduler | None,
+    ):
+        """Checkpoint the global training_step, optimizer state, scheduler state, and random state.
+
+        All of these are saved as "training_state.pth" under the checkpoint directory.
+        """
+        training_state = {
+            "step": train_step,
+            "optimizer": optimizer.state_dict(),
+            **get_global_random_state(),
+        }
+        if scheduler is not None:
+            training_state["scheduler"] = scheduler.state_dict()
+        torch.save(training_state, save_dir / self.training_state_file_name)
+
+    def save_checkpoint(
+        self,
+        train_step: int,
+        policy: Policy,
+        optimizer: Optimizer,
+        scheduler: LRScheduler | None,
+        identifier: str,
+    ):
+        """Checkpoint the model weights and the training state."""
+        checkpoint_dir = self.checkpoints_dir / str(identifier)
+        wandb_artifact_name = (
+            None
+            if self._wandb is None
+            else f"{self._group.replace(':', '_').replace('/', '_')}-{self._cfg.seed}-{identifier}"
+        )
+        self.save_model(
+            checkpoint_dir / self.pretrained_model_dir_name, policy, wandb_artifact_name=wandb_artifact_name
+        )
+        self.save_training_state(checkpoint_dir, train_step, optimizer, scheduler)
+        os.symlink(checkpoint_dir.absolute(), self.last_checkpoint_dir)
+
+    def load_last_training_state(self, optimizer: Optimizer, scheduler: LRScheduler | None) -> int:
+        """
+        Given the last checkpoint in the logging directory, load the optimizer state, scheduler state, and
+        random state, and return the global training step.
+        """
+        training_state = torch.load(self.last_checkpoint_dir / self.training_state_file_name)
+        optimizer.load_state_dict(training_state["optimizer"])
+        if scheduler is not None:
+            scheduler.load_state_dict(training_state["scheduler"])
+        elif "scheduler" in training_state:
+            raise ValueError(
+                "The checkpoint contains a scheduler state_dict, but no LRScheduler was provided."
+            )
+        # Small hack to get the expected keys: use `get_global_random_state`.
+        set_global_random_state({k: training_state[k] for k in get_global_random_state()})
+        return training_state["step"]
+
+    def log_dict(self, d, step, mode="train"):
+        assert mode in {"train", "eval"}
+        # TODO(alexander-soare): Add local text log.
+        if self._wandb is not None:
+            for k, v in d.items():
+                if not isinstance(v, (int, float, str, wandb.Table)):
+                    logging.warning(
+                        f'WandB logging of key "{k}" was ignored as its type is not handled by this wrapper.'
+                    )
+                    continue
+                self._wandb.log({f"{mode}/{k}": v}, step=step)
+
+    def log_video(self, video_path: str, step: int, mode: str = "train"):
+        assert mode in {"train", "eval"}
+        assert self._wandb is not None
+        wandb_video = self._wandb.Video(video_path, fps=self._cfg.fps, format="mp4")
+        self._wandb.log({f"{mode}/video": wandb_video}, step=step)
--- a/lerobot/common/policies/hilserl/classifier/configuration_classifier.py
+++ b/lerobot/common/policies/hilserl/classifier/configuration_classifier.py
@@ -0,0 +1,36 @@
+import json
+import os
+from dataclasses import asdict, dataclass
+
+import torch
+
+
+@dataclass
+class ClassifierConfig:
+    """Configuration for the Classifier model."""
+
+    num_classes: int = 2
+    hidden_dim: int = 256
+    dropout_rate: float = 0.1
+    model_name: str = "microsoft/resnet-50"
+    device: str = "cuda" if torch.cuda.is_available() else "mps"
+    model_type: str = "cnn"  # "transformer" or "cnn"
+
+    def save_pretrained(self, save_dir):
+        """Save config to json file."""
+        os.makedirs(save_dir, exist_ok=True)
+
+        # Convert to dict and save as JSON
+        config_dict = asdict(self)
+        with open(os.path.join(save_dir, "config.json"), "w") as f:
+            json.dump(config_dict, f, indent=2)
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path):
+        """Load config from json file."""
+        config_file = os.path.join(pretrained_model_name_or_path, "config.json")
+
+        with open(config_file) as f:
+            config_dict = json.load(f)
+
+        return cls(**config_dict)
--- a/lerobot/common/policies/hilserl/classifier/modeling_classifier.py
+++ b/lerobot/common/policies/hilserl/classifier/modeling_classifier.py
@@ -0,0 +1,134 @@
+import logging
+from typing import Optional
+
+import torch
+from huggingface_hub import PyTorchModelHubMixin
+from torch import Tensor, nn
+from transformers import AutoImageProcessor, AutoModel
+
+from .configuration_classifier import ClassifierConfig
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
+
+
+class ClassifierOutput:
+    """Wrapper for classifier outputs with additional metadata."""
+
+    def __init__(
+        self, logits: Tensor, probabilities: Optional[Tensor] = None, hidden_states: Optional[Tensor] = None
+    ):
+        self.logits = logits
+        self.probabilities = probabilities
+        self.hidden_states = hidden_states
+
+
+class Classifier(
+    nn.Module,
+    PyTorchModelHubMixin,
+    # Add Hub metadata
+    library_name="lerobot",
+    repo_url="https://github.com/huggingface/lerobot",
+    tags=["robotics", "vision-classifier"],
+):
+    """Image classifier built on top of a pre-trained encoder."""
+
+    # Add name attribute for factory
+    name = "classifier"
+
+    def __init__(self, config: ClassifierConfig):
+        super().__init__()
+        self.config = config
+        self.processor = AutoImageProcessor.from_pretrained(self.config.model_name, trust_remote_code=True)
+        encoder = AutoModel.from_pretrained(self.config.model_name, trust_remote_code=True)
+        # Extract vision model if we're given a multimodal model
+        if hasattr(encoder, "vision_model"):
+            logging.info("Multimodal model detected - using vision encoder only")
+            self.encoder = encoder.vision_model
+            self.vision_config = encoder.config.vision_config
+        else:
+            self.encoder = encoder
+            self.vision_config = getattr(encoder, "config", None)
+
+        # Model type from config
+        self.is_cnn = self.config.model_type == "cnn"
+
+        # For CNNs, initialize backbone
+        if self.is_cnn:
+            self._setup_cnn_backbone()
+
+        self._freeze_encoder()
+        self._build_classifier_head()
+
+    def _setup_cnn_backbone(self):
+        """Set up CNN encoder"""
+        if hasattr(self.encoder, "fc"):
+            self.feature_dim = self.encoder.fc.in_features
+            self.encoder = nn.Sequential(*list(self.encoder.children())[:-1])
+        elif hasattr(self.encoder.config, "hidden_sizes"):
+            self.feature_dim = self.encoder.config.hidden_sizes[-1]  # Last channel dimension
+        else:
+            raise ValueError("Unsupported CNN architecture")
+
+    def _freeze_encoder(self) -> None:
+        """Freeze the encoder parameters."""
+        for param in self.encoder.parameters():
+            param.requires_grad = False
+
+    def _build_classifier_head(self) -> None:
+        """Initialize the classifier head architecture."""
+        # Get input dimension based on model type
+        if self.is_cnn:
+            input_dim = self.feature_dim
+        else:  # Transformer models
+            if hasattr(self.encoder.config, "hidden_size"):
+                input_dim = self.encoder.config.hidden_size
+            else:
+                raise ValueError("Unsupported transformer architecture since hidden_size is not found")
+
+        self.classifier_head = nn.Sequential(
+            nn.Linear(input_dim, self.config.hidden_dim),
+            nn.Dropout(self.config.dropout_rate),
+            nn.LayerNorm(self.config.hidden_dim),
+            nn.ReLU(),
+            nn.Linear(self.config.hidden_dim, 1 if self.config.num_classes == 2 else self.config.num_classes),
+        )
+
+    def _get_encoder_output(self, x: torch.Tensor) -> torch.Tensor:
+        """Extract the appropriate output from the encoder."""
+        # Process images with the processor (handles resizing and normalization)
+        processed = self.processor(
+            images=x,  # LeRobotDataset already provides proper tensor format
+            return_tensors="pt",
+        )
+        processed = processed["pixel_values"].to(x.device)
+
+        with torch.no_grad():
+            if self.is_cnn:
+                # The HF ResNet applies pooling internally
+                outputs = self.encoder(processed)
+                # Get pooled output directly
+                features = outputs.pooler_output
+
+                if features.dim() > 2:
+                    features = features.squeeze(-1).squeeze(-1)
+                return features
+            else:  # Transformer models
+                outputs = self.encoder(processed)
+                if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
+                    return outputs.pooler_output
+                return outputs.last_hidden_state[:, 0, :]
+
+    def forward(self, x: torch.Tensor) -> ClassifierOutput:
+        """Forward pass of the classifier."""
+        # For training, we expect input to be a tensor directly from LeRobotDataset
+        encoder_output = self._get_encoder_output(x)
+        logits = self.classifier_head(encoder_output)
+
+        if self.config.num_classes == 2:
+            logits = logits.squeeze(-1)
+            probabilities = torch.sigmoid(logits)
+        else:
+            probabilities = torch.softmax(logits, dim=-1)
+
+        return ClassifierOutput(logits=logits, probabilities=probabilities, hidden_states=encoder_output)
--- a/lerobot/common/robot_devices/control_utils.py
+++ b/lerobot/common/robot_devices/control_utils.py
@@ -128,14 +128,22 @@ def predict_action(observation, policy, device, use_amp):
    return action


-def init_keyboard_listener():
-    # Allow to exit early while recording an episode or resetting the environment,
-    # by tapping the right arrow key '->'. This might require a sudo permission
-    # to allow your terminal to monitor keyboard events.
+def init_keyboard_listener(assign_rewards=False):
+    """
+    Initializes a keyboard listener to enable early termination of an episode 
+    or environment reset by pressing the right arrow key ('->'). This may require 
+    sudo permissions to allow the terminal to monitor keyboard events.
+
+    Args:
+        assign_rewards (bool): If True, allows annotating the collected trajectory 
+        with a binary reward at the end of the episode to indicate success.
+    """
    events = {}
    events["exit_early"] = False
    events["rerecord_episode"] = False
    events["stop_recording"] = False
+    if assign_rewards:
+        events["next.reward"] = 0

    if is_headless():
        logging.warning(
@@ -160,6 +168,13 @@ def init_keyboard_listener():
                print("Escape key pressed. Stopping data recording...")
                events["stop_recording"] = True
                events["exit_early"] = True
+            elif assign_rewards and key == keyboard.Key.space:
+                events["next.reward"] = 1 if events["next.reward"] == 0 else 0
+                print(
+                    "Space key pressed. Assigning new reward to the subsequent frames. New reward:",
+                    events["next.reward"],
+                )
+
        except Exception as e:
            print(f"Error handling key press: {e}")