forked from tangger/lerobot
- Added JointMaskingActionSpace wrapper in gym_manipulator in order to select which joints will be controlled. For example, we can disable the gripper actions for some tasks.
- Added Nan detection mechanisms in the actor, learner and gym_manipulator for the case where we encounter nans in the loop. - changed the non-blocking in the `.to(device)` functions to only work for the case of cuda because they were causing nans when running the policy on mps - Added some joint clipping and limits in the env, robot and policy configs. TODO clean this part and make the limits in one config file only. Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
This commit is contained in:
4
lerobot/configs/env/so100_real.yaml
vendored
4
lerobot/configs/env/so100_real.yaml
vendored
@@ -18,10 +18,12 @@ env:
|
||||
control_time_s: 20
|
||||
reset_follower_pos: true
|
||||
use_relative_joint_positions: true
|
||||
reset_time_s: 10
|
||||
reset_time_s: 5
|
||||
display_cameras: false
|
||||
delta_action: 0.1
|
||||
joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper
|
||||
|
||||
reward_classifier:
|
||||
pretrained_path: outputs/classifier/checkpoints/best/pretrained_model
|
||||
config_path: lerobot/configs/policy/hilserl_classifier.yaml
|
||||
|
||||
@@ -20,7 +20,7 @@ training:
|
||||
lr: 3e-4
|
||||
|
||||
eval_freq: 2500
|
||||
log_freq: 500
|
||||
log_freq: 1
|
||||
save_freq: 2000000
|
||||
|
||||
online_steps: 1000000
|
||||
@@ -31,7 +31,7 @@ training:
|
||||
online_env_seed: 10000
|
||||
online_buffer_capacity: 1000000
|
||||
online_buffer_seed_size: 0
|
||||
online_step_before_learning: 100 #5000
|
||||
online_step_before_learning: 1000 #5000
|
||||
do_online_rollout_async: false
|
||||
policy_update_freq: 1
|
||||
|
||||
@@ -76,8 +76,10 @@ policy:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
observation.state:
|
||||
min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
|
||||
max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
|
||||
min: [-87.09961, 62.402344, 67.23633, 36.035156, 77.34375,0.53691274]
|
||||
max: [58.183594, 131.83594, 145.98633, 82.08984, 78.22266, 0.60402685]
|
||||
# min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
|
||||
# max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
|
||||
|
||||
output_normalization_modes:
|
||||
action: min_max
|
||||
|
||||
@@ -15,8 +15,13 @@ calibration_dir: .cache/calibration/so100
|
||||
# the number of motors in your follower arms.
|
||||
max_relative_target: null
|
||||
joint_position_relative_bounds:
|
||||
min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
|
||||
max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
|
||||
min: [-87.09961, 62.402344, 67.23633, 36.035156, 77.34375,
|
||||
0.53691274]
|
||||
max: [58.183594, 131.83594, 145.98633, 82.08984, 78.22266,
|
||||
0.60402685]
|
||||
|
||||
# min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
|
||||
# max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
|
||||
|
||||
leader_arms:
|
||||
main:
|
||||
|
||||
Reference in New Issue
Block a user