Add end effector action space to hil-serl (#861)

Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-03-17 14:22:33 +01:00
parent 7960f2c3c1
commit b82faf7d8c
13 changed files with 2138 additions and 139 deletions
--- a/lerobot/configs/env/so100_real.yaml
+++ b/lerobot/configs/env/so100_real.yaml
@@ -5,26 +5,46 @@ fps: 10
 env:
  name: real_world
  task: null
-  state_dim: 6
-  action_dim: 6
+  state_dim: 15
+  action_dim: 3
  fps: ${fps}
  device: mps

  wrapper:
    crop_params_dict:
-      observation.images.front: [102, 43, 358, 523]
-      observation.images.side: [92, 123, 379, 349]
-      # observation.images.front: [109, 37, 361, 557]
-      # observation.images.side: [94, 161, 372, 315]
+      observation.images.front: [171, 207, 116, 251]
+      observation.images.side: [232, 200, 142, 204]
    resize_size: [128, 128]
-    control_time_s: 20
-    reset_follower_pos: true
+    control_time_s: 10
+    reset_follower_pos: false
    use_relative_joint_positions: true
    reset_time_s: 5
    display_cameras: false
-    delta_action: 0.1
-    joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper
+    delta_action: null #0.3
+    joint_masking_action_space: null #[1, 1, 1, 1, 0, 0] # disable wrist and gripper
+    add_joint_velocity_to_observation: true
+    add_ee_pose_to_observation: true
+
+    # If null then the teleoperation will be used to reset the robot
+    # Bounds for pushcube_gamepad_lerobot15 dataset and experiments
+    # fixed_reset_joint_positions: [-19.86, 103.19, 117.33, 42.7, 13.89, 0.297]
+    # ee_action_space_params: # If null then ee_action_space is not used
+    #   bounds:
+    #     max: [0.291, 0.147, 0.074]
+    #     min: [0.139, -0.143, 0.03]
+
+    # Bounds for insertcube_gamepad dataset and experiments
+    fixed_reset_joint_positions: [20.0,  90.,   90.,   75.,  -0.7910156, -0.5673759]
+    ee_action_space_params:
+      bounds:
+        max: [0.25295413, 0.07498981, 0.06862044]
+        min: [0.2010096,  -0.12, 0.0433196]
+
+      use_gamepad: true
+      x_step_size: 0.03
+      y_step_size: 0.03
+      z_step_size: 0.03

  reward_classifier:
-    pretrained_path:  outputs/classifier/13-02-random-sample-resnet10-frozen/checkpoints/best/pretrained_model
-    config_path: lerobot/configs/policy/hilserl_classifier.yaml
+    pretrained_path: null # outputs/classifier/13-02-random-sample-resnet10-frozen/checkpoints/best/pretrained_model
+    config_path: null # lerobot/configs/policy/hilserl_classifier.yaml
--- a/lerobot/configs/policy/sac_real.yaml
+++ b/lerobot/configs/policy/sac_real.yaml
@@ -8,8 +8,7 @@
 #   env.gym.obs_type=environment_state_agent_pos \

 seed: 1
-dataset_repo_id: aractingi/push_cube_overfit_cropped_resized
-#aractingi/push_cube_square_offline_demo_cropped_resized
+dataset_repo_id:  aractingi/insertcube_simple

 training:
  # Offline training dataloader
@@ -30,7 +29,7 @@ training:
  online_steps_between_rollouts: 1000
  online_sampling_ratio: 1.0
  online_env_seed: 10000
-  online_buffer_capacity: 1000000
+  online_buffer_capacity: 10000
  online_buffer_seed_size: 0
  online_step_before_learning: 100 #5000
  do_online_rollout_async: false
@@ -62,7 +61,7 @@ policy:
    observation.images.side: [3, 128, 128]
    # observation.image: [3, 128, 128]
  output_shapes:
-    action: [4] # ["${env.action_dim}"]
+    action: ["${env.action_dim}"]

  # Normalization / Unnormalization
  input_normalization_modes:
@@ -77,23 +76,16 @@ policy:
      mean: [0.485, 0.456, 0.406]
      std: [0.229, 0.224, 0.225]
    observation.state:
-      min: [-77.08008,     56.25,        60.55664,     19.511719,   0., -0.63829786]
-      max: [ 7.215820e+01,  1.5398438e+02,  1.6075195e+02,  9.3251953e+01, 0., -1.4184397e-01]
-
-      # min: [-87.09961,     62.402344,    67.23633,     36.035156,    77.34375,0.53691274]
-      # max: [58.183594,   131.83594,    145.98633,     82.08984,     78.22266, 0.60402685]
-      # min: [-88.50586,  23.81836, 0.87890625, -32.16797, 78.66211,   0.53691274]
-      # max: [84.55078, 187.11914, 145.98633, 101.60156, 146.60156,  88.18792]
+      # 6- joint positions, 6- joint velocities, 3- ee position
+      max: [ 52.822266,  136.14258,   142.03125,   72.1582,     22.675781,   -0.5673759, 100., 100., 100., 100., 100., 100., 0.25295413, 0.07498981, 0.06862044]
+      min: [-2.6367188,  86.572266,   89.82422,    12.392578,    -26.015625,   -0.5673759, -100., -100., -100., -100., -100., -100., 0.2010096,  -0.12, 0.0433196]

  output_normalization_modes:
    action: min_max
  output_normalization_params:
-    # action:
-    #   min: [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0]
-    #   max: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
    action:
-      min: [-149.23828125, -97.734375, -100.1953125, -73.740234375]
-      max: [149.23828125, 97.734375, 100.1953125, 73.740234375]
+      min: [-0.03, -0.03, -0.01]
+      max: [0.03, 0.03, 0.03]

  # Architecture / modeling.
  # Neural networks.
--- a/lerobot/configs/robot/so100.yaml
+++ b/lerobot/configs/robot/so100.yaml
@@ -14,9 +14,13 @@ calibration_dir: .cache/calibration/so100
 # Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
 # the number of motors in your follower arms.
 max_relative_target: null
-joint_position_relative_bounds:
-  max: [ 7.2158203e+01,  1.5398438e+02,  1.6075195e+02,  9.3251953e+01, 0., -1.4184397e-01]
-  min: [-77.08008,     56.25,        60.55664,     19.511719,   0., -0.63829786]
+joint_position_relative_bounds: null
+  # max: [100, 100, 100, 100, 100, 100]
+  # min: [-100, -100, -100, -100, -100, -100]
+  # max: [ 7.2158203e+01,  1.5398438e+02,  1.6075195e+02,  9.3251953e+01, 0., -1.4184397e-01]
+  # min: [-77.08008,     56.25,        60.55664,     19.511719,   0., -0.63829786]
+  # max: [ 35.06836 ,  103.18359 ,  127.61719 ,  75.58594 , 0., 0.]
+  # min: [ -8.876953 ,  63.808594 ,  90.49805 ,  49.48242 , 0., 0.]

 leader_arms:
  main:
@@ -47,13 +51,13 @@ follower_arms:
 cameras:
  front:
    _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
-    camera_index: 0
+    camera_index: 1
    fps: 30
    width: 640
    height: 480
  side:
    _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
-    camera_index: 1
+    camera_index: 0
    fps: 30
    width: 640
    height: 480