Fix real-world configs by adding latency

2024-06-03 14:47:06 +00:00
parent 1eb4bfe2e4
commit 6b5c5b6dc4
11 changed files with 34 additions and 6 deletions
--- a/examples/advanced/1_train_act_pusht/act_pusht.yaml
+++ b/examples/advanced/1_train_act_pusht/act_pusht.yaml
@@ -28,7 +28,7 @@ training:
  online_steps_between_rollouts: 1

  delta_timestamps:
-    action: "[i / ${fps} for i in range(${policy.chunk_size})]"
+    action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]"

 eval:
  n_episodes: 50
--- a/lerobot/configs/default.yaml
+++ b/lerobot/configs/default.yaml
@@ -57,3 +57,7 @@ wandb:
  disable_artifact: false
  project: lerobot
  notes: ""
+
+# These configs need to be defined in environment yaml files (e.g. `env/aloha.yaml`)
+fps: ???
+latency: ???
--- a/lerobot/configs/env/aloha.yaml
+++ b/lerobot/configs/env/aloha.yaml
@@ -1,6 +1,12 @@
 # @package _global_

 fps: 50
+# The latency corresponds to the delay in seconds between the current time at which the observation
+# is fed to the policy to compute the next action, and the next action being sent to the controller.
+# Note: For simulation environments, there could be no latency, but for real-world, latency is usually
+# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then
+# a forced sleep to reach `+ 1 / fps` from current observation time.
+latency: 0

 env:
  name: aloha
--- a/lerobot/configs/env/dora_aloha_real.yaml
+++ b/lerobot/configs/env/dora_aloha_real.yaml
@@ -1,6 +1,12 @@
 # @package _global_

 fps: 30
+# The latency corresponds to the delay in seconds between the current time at which the observation
+# is fed to the policy to compute the next action, and the next action being sent to the controller.
+# Note: For simulation environments, there could be no latency, but for real-world, latency is usually
+# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then
+# a forced sleep to reach `+ 1 / fps` from current observation time.
+latency: 1 / ${fps}

 env:
  name: dora
--- a/lerobot/configs/env/pusht.yaml
+++ b/lerobot/configs/env/pusht.yaml
@@ -1,6 +1,12 @@
 # @package _global_

 fps: 10
+# The latency corresponds to the delay in seconds between the current time at which the observation
+# is fed to the policy to compute the next action, and the next action being sent to the controller.
+# Note: For simulation environments, there could be no latency, but for real-world, latency is usually
+# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then
+# a forced sleep to reach `+ 1 / fps` from current observation time.
+latency: 0

 env:
  name: pusht
--- a/lerobot/configs/env/xarm.yaml
+++ b/lerobot/configs/env/xarm.yaml
@@ -1,6 +1,12 @@
 # @package _global_

 fps: 15
+# The latency corresponds to the delay in seconds between the current time at which the observation
+# is fed to the policy to compute the next action, and the next action being sent to the controller.
+# Note: For simulation environments, there could be no latency, but for real-world, latency is usually
+# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then
+# a forced sleep to reach `+ 1 / fps` from current observation time.
+latency: 0

 env:
  name: xarm
--- a/lerobot/configs/policy/act.yaml
+++ b/lerobot/configs/policy/act.yaml
@@ -25,7 +25,7 @@ training:
  online_steps_between_rollouts: 1

  delta_timestamps:
-    action: "[i / ${fps} for i in range(${policy.chunk_size})]"
+    action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]"

 eval:
  n_episodes: 50
--- a/lerobot/configs/policy/act_real.yaml
+++ b/lerobot/configs/policy/act_real.yaml
@@ -51,7 +51,7 @@ training:
  online_steps_between_rollouts: 1

  delta_timestamps:
-    action: "[i / ${fps} for i in range(${policy.chunk_size})]"
+    action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]"

 eval:
  n_episodes: 50
--- a/lerobot/configs/policy/act_real_no_state.yaml
+++ b/lerobot/configs/policy/act_real_no_state.yaml
@@ -49,7 +49,7 @@ training:
  online_steps_between_rollouts: 1

  delta_timestamps:
-    action: "[i / ${fps} for i in range(${policy.chunk_size})]"
+    action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]"

 eval:
  n_episodes: 50
--- a/lerobot/configs/policy/diffusion.yaml
+++ b/lerobot/configs/policy/diffusion.yaml
@@ -42,7 +42,7 @@ training:
  delta_timestamps:
    observation.image: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]"
    observation.state: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]"
-    action: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps} + ${policy.horizon})]"
+    action: "[i / ${fps} + ${latency} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps} + ${policy.horizon})]"

  # The original implementation doesn't sample frames for the last 7 steps,
  # which avoids excessive padding and leads to improved training results.
--- a/lerobot/configs/policy/tdmpc.yaml
+++ b/lerobot/configs/policy/tdmpc.yaml
@@ -20,7 +20,7 @@ training:
    observation.image: "[i / ${fps} for i in range(${policy.horizon} + 1)]"
    observation.state: "[i / ${fps} for i in range(${policy.horizon} + 1)]"
    action: "[i / ${fps} for i in range(${policy.horizon})]"
-    next.reward: "[i / ${fps} for i in range(${policy.horizon})]"
+    next.reward: "[i / ${fps} + ${latency} for i in range(${policy.horizon})]"

 policy:
  name: tdmpc