diff --git a/examples/advanced/1_train_act_pusht/act_pusht.yaml b/examples/advanced/1_train_act_pusht/act_pusht.yaml index 38e542fb4..473f7d01f 100644 --- a/examples/advanced/1_train_act_pusht/act_pusht.yaml +++ b/examples/advanced/1_train_act_pusht/act_pusht.yaml @@ -28,7 +28,7 @@ training: online_steps_between_rollouts: 1 delta_timestamps: - action: "[i / ${fps} for i in range(${policy.chunk_size})]" + action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]" eval: n_episodes: 50 diff --git a/lerobot/configs/default.yaml b/lerobot/configs/default.yaml index 85b9ceea0..0a500bb95 100644 --- a/lerobot/configs/default.yaml +++ b/lerobot/configs/default.yaml @@ -57,3 +57,7 @@ wandb: disable_artifact: false project: lerobot notes: "" + +# These configs need to be defined in environment yaml files (e.g. `env/aloha.yaml`) +fps: ??? +latency: ??? diff --git a/lerobot/configs/env/aloha.yaml b/lerobot/configs/env/aloha.yaml index 296a4481c..129aff177 100644 --- a/lerobot/configs/env/aloha.yaml +++ b/lerobot/configs/env/aloha.yaml @@ -1,6 +1,12 @@ # @package _global_ fps: 50 +# The latency corresponds to the delay in seconds between the current time at which the observation +# is fed to the policy to compute the next action, and the next action being sent to the controller. +# Note: For simulation environments, there could be no latency, but for real-world, latency is usually +# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then +# a forced sleep to reach `+ 1 / fps` from current observation time. +latency: 0 env: name: aloha diff --git a/lerobot/configs/env/dora_aloha_real.yaml b/lerobot/configs/env/dora_aloha_real.yaml index 088781d4e..d5d7438a3 100644 --- a/lerobot/configs/env/dora_aloha_real.yaml +++ b/lerobot/configs/env/dora_aloha_real.yaml @@ -1,6 +1,12 @@ # @package _global_ fps: 30 +# The latency corresponds to the delay in seconds between the current time at which the observation +# is fed to the policy to compute the next action, and the next action being sent to the controller. +# Note: For simulation environments, there could be no latency, but for real-world, latency is usually +# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then +# a forced sleep to reach `+ 1 / fps` from current observation time. +latency: 1 / ${fps} env: name: dora diff --git a/lerobot/configs/env/pusht.yaml b/lerobot/configs/env/pusht.yaml index 771fbbf4d..08f3366f0 100644 --- a/lerobot/configs/env/pusht.yaml +++ b/lerobot/configs/env/pusht.yaml @@ -1,6 +1,12 @@ # @package _global_ fps: 10 +# The latency corresponds to the delay in seconds between the current time at which the observation +# is fed to the policy to compute the next action, and the next action being sent to the controller. +# Note: For simulation environments, there could be no latency, but for real-world, latency is usually +# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then +# a forced sleep to reach `+ 1 / fps` from current observation time. +latency: 0 env: name: pusht diff --git a/lerobot/configs/env/xarm.yaml b/lerobot/configs/env/xarm.yaml index 9dbb96f56..6c143b967 100644 --- a/lerobot/configs/env/xarm.yaml +++ b/lerobot/configs/env/xarm.yaml @@ -1,6 +1,12 @@ # @package _global_ fps: 15 +# The latency corresponds to the delay in seconds between the current time at which the observation +# is fed to the policy to compute the next action, and the next action being sent to the controller. +# Note: For simulation environments, there could be no latency, but for real-world, latency is usually +# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then +# a forced sleep to reach `+ 1 / fps` from current observation time. +latency: 0 env: name: xarm diff --git a/lerobot/configs/policy/act.yaml b/lerobot/configs/policy/act.yaml index bba2e5638..9ee1833ca 100644 --- a/lerobot/configs/policy/act.yaml +++ b/lerobot/configs/policy/act.yaml @@ -25,7 +25,7 @@ training: online_steps_between_rollouts: 1 delta_timestamps: - action: "[i / ${fps} for i in range(${policy.chunk_size})]" + action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]" eval: n_episodes: 50 diff --git a/lerobot/configs/policy/act_real.yaml b/lerobot/configs/policy/act_real.yaml index b49426152..3f479a273 100644 --- a/lerobot/configs/policy/act_real.yaml +++ b/lerobot/configs/policy/act_real.yaml @@ -51,7 +51,7 @@ training: online_steps_between_rollouts: 1 delta_timestamps: - action: "[i / ${fps} for i in range(${policy.chunk_size})]" + action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]" eval: n_episodes: 50 diff --git a/lerobot/configs/policy/act_real_no_state.yaml b/lerobot/configs/policy/act_real_no_state.yaml index a8b1c9b6c..12e31b6de 100644 --- a/lerobot/configs/policy/act_real_no_state.yaml +++ b/lerobot/configs/policy/act_real_no_state.yaml @@ -49,7 +49,7 @@ training: online_steps_between_rollouts: 1 delta_timestamps: - action: "[i / ${fps} for i in range(${policy.chunk_size})]" + action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]" eval: n_episodes: 50 diff --git a/lerobot/configs/policy/diffusion.yaml b/lerobot/configs/policy/diffusion.yaml index b04ecf1bd..2b4e30144 100644 --- a/lerobot/configs/policy/diffusion.yaml +++ b/lerobot/configs/policy/diffusion.yaml @@ -42,7 +42,7 @@ training: delta_timestamps: observation.image: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]" observation.state: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]" - action: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps} + ${policy.horizon})]" + action: "[i / ${fps} + ${latency} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps} + ${policy.horizon})]" # The original implementation doesn't sample frames for the last 7 steps, # which avoids excessive padding and leads to improved training results. diff --git a/lerobot/configs/policy/tdmpc.yaml b/lerobot/configs/policy/tdmpc.yaml index 09326ab42..bca857e7a 100644 --- a/lerobot/configs/policy/tdmpc.yaml +++ b/lerobot/configs/policy/tdmpc.yaml @@ -20,7 +20,7 @@ training: observation.image: "[i / ${fps} for i in range(${policy.horizon} + 1)]" observation.state: "[i / ${fps} for i in range(${policy.horizon} + 1)]" action: "[i / ${fps} for i in range(${policy.horizon})]" - next.reward: "[i / ${fps} for i in range(${policy.horizon})]" + next.reward: "[i / ${fps} + ${latency} for i in range(${policy.horizon})]" policy: name: tdmpc