- Added JointMaskingActionSpace wrapper in gym_manipulator in order to select which joints will be controlled. For example, we can disable the gripper actions for some tasks.

- Added Nan detection mechanisms in the actor, learner and gym_manipulator for the case where we encounter nans in the loop. - changed the non-blocking in the `.to(device)` functions to only work for the case of cuda because they were causing nans when running the policy on mps - Added some joint clipping and limits in the env, robot and policy configs. TODO clean this part and make the limits in one config file only. Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
2025-02-11 11:34:46 +01:00
parent f2266101df
commit f1af97dc9c
9 changed files with 161 additions and 31 deletions
--- a/lerobot/configs/env/so100_real.yaml
+++ b/lerobot/configs/env/so100_real.yaml
@@ -18,10 +18,12 @@ env:
    control_time_s: 20
    reset_follower_pos: true
    use_relative_joint_positions: true
-    reset_time_s: 10
+    reset_time_s: 5
    display_cameras: false
    delta_action: 0.1
+    joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper

  reward_classifier:
    pretrained_path: outputs/classifier/checkpoints/best/pretrained_model
    config_path: lerobot/configs/policy/hilserl_classifier.yaml
+    
--- a/lerobot/configs/policy/sac_real.yaml
+++ b/lerobot/configs/policy/sac_real.yaml
@@ -20,7 +20,7 @@ training:
  lr: 3e-4

  eval_freq: 2500
-  log_freq: 500
+  log_freq: 1
  save_freq: 2000000

  online_steps: 1000000
@@ -31,7 +31,7 @@ training:
  online_env_seed: 10000
  online_buffer_capacity: 1000000
  online_buffer_seed_size: 0
-  online_step_before_learning: 100 #5000
+  online_step_before_learning: 1000 #5000
  do_online_rollout_async: false
  policy_update_freq: 1

@@ -76,8 +76,10 @@ policy:
      mean: [0.485, 0.456, 0.406]
      std: [0.229, 0.224, 0.225]
    observation.state:
-      min: [-88.50586,  23.81836, 0.87890625, -32.16797, 78.66211,   0.53691274]
-      max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156,  88.18792]
+      min: [-87.09961,     62.402344,    67.23633,     36.035156,    77.34375,0.53691274] 
+      max: [58.183594,   131.83594,    145.98633,     82.08984,     78.22266, 0.60402685]
+      # min: [-88.50586,  23.81836, 0.87890625, -32.16797, 78.66211,   0.53691274]
+      # max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156,  88.18792]

  output_normalization_modes:
    action: min_max
--- a/lerobot/configs/robot/so100.yaml
+++ b/lerobot/configs/robot/so100.yaml
@@ -15,8 +15,13 @@ calibration_dir: .cache/calibration/so100
 # the number of motors in your follower arms.
 max_relative_target: null
 joint_position_relative_bounds: 
-  min: [-88.50586,  23.81836, 0.87890625, -32.16797, 78.66211,   0.53691274]
-  max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156,  88.18792]
+   min: [-87.09961,     62.402344,    67.23633,     36.035156,    77.34375,
+   0.53691274] 
+   max: [58.183594,   131.83594,    145.98633,     82.08984,     78.22266,
+   0.60402685]
+   
+  # min: [-88.50586,  23.81836, 0.87890625, -32.16797, 78.66211,   0.53691274]
+  # max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156,  88.18792]

 leader_arms:
  main: