- Added JointMaskingActionSpace wrapper in gym_manipulator in order to select which joints will be controlled. For example, we can disable the gripper actions for some tasks.

- Added Nan detection mechanisms in the actor, learner and gym_manipulator for the case where we encounter nans in the loop.
- changed the non-blocking in the `.to(device)` functions to only work for the case of cuda because they were causing nans when running the policy on mps
- Added some joint clipping and limits in the env, robot and policy configs. TODO clean this part and make the limits in one config file only.

Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
This commit is contained in:
Michel Aractingi
2025-02-11 11:34:46 +01:00
parent f2266101df
commit f1af97dc9c
9 changed files with 161 additions and 31 deletions

View File

@@ -18,10 +18,12 @@ env:
control_time_s: 20
reset_follower_pos: true
use_relative_joint_positions: true
reset_time_s: 10
reset_time_s: 5
display_cameras: false
delta_action: 0.1
joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper
reward_classifier:
pretrained_path: outputs/classifier/checkpoints/best/pretrained_model
config_path: lerobot/configs/policy/hilserl_classifier.yaml

View File

@@ -20,7 +20,7 @@ training:
lr: 3e-4
eval_freq: 2500
log_freq: 500
log_freq: 1
save_freq: 2000000
online_steps: 1000000
@@ -31,7 +31,7 @@ training:
online_env_seed: 10000
online_buffer_capacity: 1000000
online_buffer_seed_size: 0
online_step_before_learning: 100 #5000
online_step_before_learning: 1000 #5000
do_online_rollout_async: false
policy_update_freq: 1
@@ -76,8 +76,10 @@ policy:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
observation.state:
min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
min: [-87.09961, 62.402344, 67.23633, 36.035156, 77.34375,0.53691274]
max: [58.183594, 131.83594, 145.98633, 82.08984, 78.22266, 0.60402685]
# min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
# max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
output_normalization_modes:
action: min_max

View File

@@ -15,8 +15,13 @@ calibration_dir: .cache/calibration/so100
# the number of motors in your follower arms.
max_relative_target: null
joint_position_relative_bounds:
min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
min: [-87.09961, 62.402344, 67.23633, 36.035156, 77.34375,
0.53691274]
max: [58.183594, 131.83594, 145.98633, 82.08984, 78.22266,
0.60402685]
# min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
# max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
leader_arms:
main: