- Fixed big issue in the loading of the policy parameters sent by the learner to the actor -- pass only the actor to the update_policy_parameters and remove strict=False

- Fixed big issue in the normalization of the actions in the `forward` function of the critic -- remove the `torch.no_grad` decorator in `normalize.py` in the normalization function - Fixed performance issue to boost the optimization frequency by setting the storage device to be the same as the device of learning. Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
2025-02-19 16:22:51 +00:00
parent d9cd85d976
commit 795063aa1b
7 changed files with 68 additions and 57 deletions
--- a/lerobot/configs/policy/sac_maniskill.yaml
+++ b/lerobot/configs/policy/sac_maniskill.yaml
@@ -64,13 +64,29 @@ policy:
    action: [7]

  # Normalization / Unnormalization
-  input_normalization_modes: null
+  input_normalization_modes:
+    observation.state: min_max
+  input_normalization_params: 
+    observation.state:
+      min: [-1.9361e+00, -7.7640e-01, -7.7094e-01, -2.9709e+00, -8.5656e-01,
+          1.0764e+00, -1.2680e+00,  0.0000e+00,  0.0000e+00, -9.3448e+00,
+         -3.3828e+00, -3.8420e+00, -5.2553e+00, -3.4154e+00, -6.5082e+00,
+         -6.0500e+00, -8.7193e+00, -8.2337e+00, -3.4650e-01, -4.9441e-01,
+          8.3516e-03, -3.1114e-01, -9.9700e-01, -2.3471e-01, -2.7137e-01] 
+
+      max: [ 0.8644,  1.4306,  1.8520, -0.7578,  0.9508,  3.4901,  1.9381,  0.0400,
+          0.0400,  5.0885,  4.7156,  7.9393,  7.9100,  2.9796,  5.7720,  4.7163,
+          7.8145,  9.7415,  0.2422,  0.4505,  0.6306,  0.2622,  1.0000,  0.5135,
+          0.4001]
+
  output_normalization_modes:
    action: min_max
  output_normalization_params:
    action:
      min: [-10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0]
      max: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
+  output_normalization_shapes:
+    action: [7]

  # Architecture / modeling.
  # Neural networks.