online training works (loss goes down), remove repeat_action, eval_policy outputs episodes data, eval_policy uses max_episodes_rendered
This commit is contained in:
@@ -35,9 +35,9 @@ def make_policy(cfg):
|
||||
if cfg.policy.pretrained_model_path:
|
||||
# TODO(rcadene): hack for old pretrained models from fowm
|
||||
if cfg.policy.name == "tdmpc" and "fowm" in cfg.policy.pretrained_model_path:
|
||||
if "offline" in cfg.pretrained_model_path:
|
||||
if "offline" in cfg.policy.pretrained_model_path:
|
||||
policy.step[0] = 25000
|
||||
elif "final" in cfg.pretrained_model_path:
|
||||
elif "final" in cfg.policy.pretrained_model_path:
|
||||
policy.step[0] = 100000
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
Reference in New Issue
Block a user