Fix real-world configs by adding latency

This commit is contained in:
Remi Cadene
2024-06-03 14:47:06 +00:00
parent 1eb4bfe2e4
commit 6b5c5b6dc4
11 changed files with 34 additions and 6 deletions

View File

@@ -28,7 +28,7 @@ training:
online_steps_between_rollouts: 1
delta_timestamps:
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]"
eval:
n_episodes: 50

View File

@@ -57,3 +57,7 @@ wandb:
disable_artifact: false
project: lerobot
notes: ""
# These configs need to be defined in environment yaml files (e.g. `env/aloha.yaml`)
fps: ???
latency: ???

View File

@@ -1,6 +1,12 @@
# @package _global_
fps: 50
# The latency corresponds to the delay in seconds between the current time at which the observation
# is fed to the policy to compute the next action, and the next action being sent to the controller.
# Note: For simulation environments, there could be no latency, but for real-world, latency is usually
# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then
# a forced sleep to reach `+ 1 / fps` from current observation time.
latency: 0
env:
name: aloha

View File

@@ -1,6 +1,12 @@
# @package _global_
fps: 30
# The latency corresponds to the delay in seconds between the current time at which the observation
# is fed to the policy to compute the next action, and the next action being sent to the controller.
# Note: For simulation environments, there could be no latency, but for real-world, latency is usually
# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then
# a forced sleep to reach `+ 1 / fps` from current observation time.
latency: 1 / ${fps}
env:
name: dora

View File

@@ -1,6 +1,12 @@
# @package _global_
fps: 10
# The latency corresponds to the delay in seconds between the current time at which the observation
# is fed to the policy to compute the next action, and the next action being sent to the controller.
# Note: For simulation environments, there could be no latency, but for real-world, latency is usually
# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then
# a forced sleep to reach `+ 1 / fps` from current observation time.
latency: 0
env:
name: pusht

View File

@@ -1,6 +1,12 @@
# @package _global_
fps: 15
# The latency corresponds to the delay in seconds between the current time at which the observation
# is fed to the policy to compute the next action, and the next action being sent to the controller.
# Note: For simulation environments, there could be no latency, but for real-world, latency is usually
# set to `1 / fps` to account for the runtime of the policy (which is often lower than `1 / fps`) and then
# a forced sleep to reach `+ 1 / fps` from current observation time.
latency: 0
env:
name: xarm

View File

@@ -25,7 +25,7 @@ training:
online_steps_between_rollouts: 1
delta_timestamps:
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]"
eval:
n_episodes: 50

View File

@@ -51,7 +51,7 @@ training:
online_steps_between_rollouts: 1
delta_timestamps:
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]"
eval:
n_episodes: 50

View File

@@ -49,7 +49,7 @@ training:
online_steps_between_rollouts: 1
delta_timestamps:
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
action: "[i / ${fps} + ${latency} for i in range(${policy.chunk_size})]"
eval:
n_episodes: 50

View File

@@ -42,7 +42,7 @@ training:
delta_timestamps:
observation.image: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]"
observation.state: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]"
action: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps} + ${policy.horizon})]"
action: "[i / ${fps} + ${latency} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps} + ${policy.horizon})]"
# The original implementation doesn't sample frames for the last 7 steps,
# which avoids excessive padding and leads to improved training results.

View File

@@ -20,7 +20,7 @@ training:
observation.image: "[i / ${fps} for i in range(${policy.horizon} + 1)]"
observation.state: "[i / ${fps} for i in range(${policy.horizon} + 1)]"
action: "[i / ${fps} for i in range(${policy.horizon})]"
next.reward: "[i / ${fps} for i in range(${policy.horizon})]"
next.reward: "[i / ${fps} + ${latency} for i in range(${policy.horizon})]"
policy:
name: tdmpc