Add end effector action space to hil-serl (#861)

Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Michel Aractingi
2025-03-17 14:22:33 +01:00
committed by AdilZouitine
parent 7960f2c3c1
commit b82faf7d8c
13 changed files with 2138 additions and 139 deletions

View File

@@ -5,26 +5,46 @@ fps: 10
env:
name: real_world
task: null
state_dim: 6
action_dim: 6
state_dim: 15
action_dim: 3
fps: ${fps}
device: mps
wrapper:
crop_params_dict:
observation.images.front: [102, 43, 358, 523]
observation.images.side: [92, 123, 379, 349]
# observation.images.front: [109, 37, 361, 557]
# observation.images.side: [94, 161, 372, 315]
observation.images.front: [171, 207, 116, 251]
observation.images.side: [232, 200, 142, 204]
resize_size: [128, 128]
control_time_s: 20
reset_follower_pos: true
control_time_s: 10
reset_follower_pos: false
use_relative_joint_positions: true
reset_time_s: 5
display_cameras: false
delta_action: 0.1
joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper
delta_action: null #0.3
joint_masking_action_space: null #[1, 1, 1, 1, 0, 0] # disable wrist and gripper
add_joint_velocity_to_observation: true
add_ee_pose_to_observation: true
# If null then the teleoperation will be used to reset the robot
# Bounds for pushcube_gamepad_lerobot15 dataset and experiments
# fixed_reset_joint_positions: [-19.86, 103.19, 117.33, 42.7, 13.89, 0.297]
# ee_action_space_params: # If null then ee_action_space is not used
# bounds:
# max: [0.291, 0.147, 0.074]
# min: [0.139, -0.143, 0.03]
# Bounds for insertcube_gamepad dataset and experiments
fixed_reset_joint_positions: [20.0, 90., 90., 75., -0.7910156, -0.5673759]
ee_action_space_params:
bounds:
max: [0.25295413, 0.07498981, 0.06862044]
min: [0.2010096, -0.12, 0.0433196]
use_gamepad: true
x_step_size: 0.03
y_step_size: 0.03
z_step_size: 0.03
reward_classifier:
pretrained_path: outputs/classifier/13-02-random-sample-resnet10-frozen/checkpoints/best/pretrained_model
config_path: lerobot/configs/policy/hilserl_classifier.yaml
pretrained_path: null # outputs/classifier/13-02-random-sample-resnet10-frozen/checkpoints/best/pretrained_model
config_path: null # lerobot/configs/policy/hilserl_classifier.yaml

View File

@@ -8,8 +8,7 @@
# env.gym.obs_type=environment_state_agent_pos \
seed: 1
dataset_repo_id: aractingi/push_cube_overfit_cropped_resized
#aractingi/push_cube_square_offline_demo_cropped_resized
dataset_repo_id: aractingi/insertcube_simple
training:
# Offline training dataloader
@@ -30,7 +29,7 @@ training:
online_steps_between_rollouts: 1000
online_sampling_ratio: 1.0
online_env_seed: 10000
online_buffer_capacity: 1000000
online_buffer_capacity: 10000
online_buffer_seed_size: 0
online_step_before_learning: 100 #5000
do_online_rollout_async: false
@@ -62,7 +61,7 @@ policy:
observation.images.side: [3, 128, 128]
# observation.image: [3, 128, 128]
output_shapes:
action: [4] # ["${env.action_dim}"]
action: ["${env.action_dim}"]
# Normalization / Unnormalization
input_normalization_modes:
@@ -77,23 +76,16 @@ policy:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
observation.state:
min: [-77.08008, 56.25, 60.55664, 19.511719, 0., -0.63829786]
max: [ 7.215820e+01, 1.5398438e+02, 1.6075195e+02, 9.3251953e+01, 0., -1.4184397e-01]
# min: [-87.09961, 62.402344, 67.23633, 36.035156, 77.34375,0.53691274]
# max: [58.183594, 131.83594, 145.98633, 82.08984, 78.22266, 0.60402685]
# min: [-88.50586, 23.81836, 0.87890625, -32.16797, 78.66211, 0.53691274]
# max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156, 88.18792]
# 6- joint positions, 6- joint velocities, 3- ee position
max: [ 52.822266, 136.14258, 142.03125, 72.1582, 22.675781, -0.5673759, 100., 100., 100., 100., 100., 100., 0.25295413, 0.07498981, 0.06862044]
min: [-2.6367188, 86.572266, 89.82422, 12.392578, -26.015625, -0.5673759, -100., -100., -100., -100., -100., -100., 0.2010096, -0.12, 0.0433196]
output_normalization_modes:
action: min_max
output_normalization_params:
# action:
# min: [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0]
# max: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
action:
min: [-149.23828125, -97.734375, -100.1953125, -73.740234375]
max: [149.23828125, 97.734375, 100.1953125, 73.740234375]
min: [-0.03, -0.03, -0.01]
max: [0.03, 0.03, 0.03]
# Architecture / modeling.
# Neural networks.

View File

@@ -14,9 +14,13 @@ calibration_dir: .cache/calibration/so100
# Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
# the number of motors in your follower arms.
max_relative_target: null
joint_position_relative_bounds:
max: [ 7.2158203e+01, 1.5398438e+02, 1.6075195e+02, 9.3251953e+01, 0., -1.4184397e-01]
min: [-77.08008, 56.25, 60.55664, 19.511719, 0., -0.63829786]
joint_position_relative_bounds: null
# max: [100, 100, 100, 100, 100, 100]
# min: [-100, -100, -100, -100, -100, -100]
# max: [ 7.2158203e+01, 1.5398438e+02, 1.6075195e+02, 9.3251953e+01, 0., -1.4184397e-01]
# min: [-77.08008, 56.25, 60.55664, 19.511719, 0., -0.63829786]
# max: [ 35.06836 , 103.18359 , 127.61719 , 75.58594 , 0., 0.]
# min: [ -8.876953 , 63.808594 , 90.49805 , 49.48242 , 0., 0.]
leader_arms:
main:
@@ -47,13 +51,13 @@ follower_arms:
cameras:
front:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
camera_index: 0
camera_index: 1
fps: 30
width: 640
height: 480
side:
_target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
camera_index: 1
camera_index: 0
fps: 30
width: 640
height: 480