Remove latency, tdmpc policy passes tests (TODO: make it work with online RL)

This commit is contained in:
Cadene
2024-04-07 16:01:22 +00:00
parent 44656d2706
commit 4371a5570d
8 changed files with 123 additions and 133 deletions

View File

@@ -10,7 +10,6 @@ log_freq: 250
horizon: 100
n_obs_steps: 1
n_latency_steps: 0
# when temporal_agg=False, n_action_steps=horizon
n_action_steps: ${horizon}

View File

@@ -16,7 +16,6 @@ seed: 100000
horizon: 16
n_obs_steps: 2
n_action_steps: 8
n_latency_steps: 0
dataset_obs_steps: ${n_obs_steps}
past_action_visible: False
keypoint_visible_rate: 1.0
@@ -38,7 +37,6 @@ policy:
shape_meta: ${shape_meta}
horizon: ${horizon}
# n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
n_obs_steps: ${n_obs_steps}
num_inference_steps: 100
obs_as_global_cond: ${obs_as_global_cond}
@@ -64,6 +62,11 @@ policy:
lr_warmup_steps: 500
grad_clip_norm: 10
delta_timestamps:
observation.image: [-.1, 0]
observation.state: [-.1, 0]
action: [-.1, 0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0, 1.1, 1.2, 1.3, 1.4]
noise_scheduler:
_target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
num_train_timesteps: 100

View File

@@ -77,3 +77,9 @@ policy:
num_q: 5
mlp_dim: 512
latent_dim: 50
delta_timestamps:
observation.image: "[i / ${fps} for i in range(6)]"
observation.state: "[i / ${fps} for i in range(6)]"
action: "[i / ${fps} for i in range(5)]"
next.reward: "[i / ${fps} for i in range(5)]"