- Added lerobot/scripts/server/gym_manipulator.py that contains all the necessary wrappers to run a gym-style env around the real robot.

- Added `lerobot/scripts/server/find_joint_limits.py` to test the min and max angles of the motion you wish the robot to explore during RL training. - Added logic in `manipulator.py` to limit the maximum possible joint angles to allow motion within a predefined joint position range. The limits are specified in the yaml config for each robot. Checkout the so100.yaml. Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
2025-02-06 16:29:37 +01:00
parent 163bcbcad4
commit 729b4ed697
8 changed files with 812 additions and 29 deletions
--- a/lerobot/common/policies/hilserl/classifier/modeling_classifier.py
+++ b/lerobot/common/policies/hilserl/classifier/modeling_classifier.py
@@ -47,7 +47,7 @@ class Classifier(

        super().__init__()
        self.config = config
-        self.processor = AutoImageProcessor.from_pretrained(self.config.model_name, trust_remote_code=True)
+        # self.processor = AutoImageProcessor.from_pretrained(self.config.model_name, trust_remote_code=True)
        encoder = AutoModel.from_pretrained(self.config.model_name, trust_remote_code=True)
        # Extract vision model if we're given a multimodal model
        if hasattr(encoder, "vision_model"):
@@ -108,11 +108,12 @@ class Classifier(
    def _get_encoder_output(self, x: torch.Tensor) -> torch.Tensor:
        """Extract the appropriate output from the encoder."""
        # Process images with the processor (handles resizing and normalization)
-        processed = self.processor(
-            images=x,  # LeRobotDataset already provides proper tensor format
-            return_tensors="pt",
-        )
-        processed = processed["pixel_values"].to(x.device)
+        # processed = self.processor(
+        #     images=x,  # LeRobotDataset already provides proper tensor format
+        #     return_tensors="pt",
+        # )
+        # processed = processed["pixel_values"].to(x.device)
+        processed = x

        with torch.no_grad():
            if self.is_cnn:
@@ -146,6 +147,6 @@ class Classifier(

    def predict_reward(self, x):
        if self.config.num_classes == 2:
-            return (self.forward(x).probabilities > 0.5).float()
+            return (self.forward(x).probabilities > 0.6).float()
        else:
            return torch.argmax(self.forward(x).probabilities, dim=1)