online training works (loss goes down), remove repeat_action, eval_policy outputs episodes data, eval_policy uses max_episodes_rendered

2024-04-10 11:34:01 +00:00
parent 19e7661b8d
commit 06573d7f67
11 changed files with 219 additions and 211 deletions
--- a/lerobot/configs/env/aloha.yaml
+++ b/lerobot/configs/env/aloha.yaml
@@ -18,7 +18,6 @@ env:
  from_pixels: True
  pixels_only: False
  image_size: [3, 480, 640]
-  action_repeat: 1
  episode_length: 400
  fps: ${fps}

--- a/lerobot/configs/env/pusht.yaml
+++ b/lerobot/configs/env/pusht.yaml
@@ -18,7 +18,6 @@ env:
  from_pixels: True
  pixels_only: False
  image_size: 96
-  action_repeat: 1
  episode_length: 300
  fps: ${fps}

--- a/lerobot/configs/env/xarm.yaml
+++ b/lerobot/configs/env/xarm.yaml
@@ -17,7 +17,6 @@ env:
  from_pixels: True
  pixels_only: False
  image_size: 84
-  # action_repeat: 2  # we can remove if policy has n_action_steps=2
  episode_length: 25
  fps: ${fps}

--- a/lerobot/configs/policy/tdmpc.yaml
+++ b/lerobot/configs/policy/tdmpc.yaml
@@ -36,6 +36,7 @@ policy:
  log_std_max: 2

  # learning
+  batch_size: 256
  max_buffer_size: 10000
  horizon: 5
  reward_coef: 0.5