remove abstracmethods, fix online training

This commit is contained in:
Alexander Soare
2024-03-20 14:49:41 +00:00
parent 5332766a82
commit 4b7ec81dde
3 changed files with 13 additions and 19 deletions

View File

@@ -112,6 +112,8 @@ def train(cfg: dict, out_dir=None, job_name=None):
raise NotImplementedError()
if job_name is None:
raise NotImplementedError()
if cfg.online_steps > 0:
assert cfg.rollout_batch_size == 1, "rollout_batch_size > 1 not supported for online training steps"
init_logging()
@@ -218,11 +220,11 @@ def train(cfg: dict, out_dir=None, job_name=None):
# TODO: add configurable number of rollout? (default=1)
with torch.no_grad():
rollout = env.rollout(
max_steps=cfg.env.episode_length // cfg.n_action_steps,
max_steps=cfg.env.episode_length,
policy=td_policy,
auto_cast_to_device=True,
)
assert len(rollout) <= cfg.env.episode_length // cfg.n_action_steps
assert len(rollout) <= cfg.env.episode_length
# set same episode index for all time steps contained in this rollout
rollout["episode"] = torch.tensor([env_step] * len(rollout), dtype=torch.int)
online_buffer.extend(rollout)