[Port HIL-SERL] Adjust Actor-Learner architecture & clean up dependency management for HIL-SERL (#722)

2025-02-21 16:29:00 +07:00
parent 150def839c
commit d48161da1b
17 changed files with 1949 additions and 475 deletions
--- a/lerobot/configs/policy/sac_maniskill.yaml
+++ b/lerobot/configs/policy/sac_maniskill.yaml
@@ -108,5 +108,6 @@ policy:
  utd_ratio: 2 # 10

 actor_learner_config:
-  actor_ip: "127.0.0.1"
-  port: 50051
+  learner_host: "127.0.0.1"
+  learner_port: 50051
+  policy_parameters_push_frequency: 15
--- a/lerobot/configs/policy/sac_real.yaml
+++ b/lerobot/configs/policy/sac_real.yaml
@@ -65,7 +65,7 @@ policy:
    action: [4] # ["${env.action_dim}"]

  # Normalization / Unnormalization
-  input_normalization_modes: 
+  input_normalization_modes:
    observation.images.front: mean_std
    observation.images.side: mean_std
    observation.state: min_max
@@ -80,7 +80,7 @@ policy:
      min: [-77.08008,     56.25,        60.55664,     19.511719,   0., -0.63829786]
      max: [ 7.215820e+01,  1.5398438e+02,  1.6075195e+02,  9.3251953e+01, 0., -1.4184397e-01]

-      # min: [-87.09961,     62.402344,    67.23633,     36.035156,    77.34375,0.53691274] 
+      # min: [-87.09961,     62.402344,    67.23633,     36.035156,    77.34375,0.53691274]
      # max: [58.183594,   131.83594,    145.98633,     82.08984,     78.22266, 0.60402685]
      # min: [-88.50586,  23.81836, 0.87890625, -32.16797, 78.66211,   0.53691274]
      # max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156,  88.18792]
@@ -112,8 +112,9 @@ policy:
  utd_ratio: 2 # 10

 actor_learner_config:
-  actor_ip: "127.0.0.1"
-  port: 50051
+  learner_host: "127.0.0.1"
+  learner_port: 50051
+  policy_parameters_push_frequency: 15

  # # Loss coefficients.
  # reward_coeff: 0.5