backup wip
This commit is contained in:
@@ -49,9 +49,9 @@ class AbstractExperienceReplay(TensorDictReplayBuffer):
|
||||
@property
|
||||
def stats_patterns(self) -> dict:
|
||||
return {
|
||||
("observation", "state"): "b c -> 1 c",
|
||||
("observation", "image"): "b c h w -> 1 c 1 1",
|
||||
("action",): "b c -> 1 c",
|
||||
("observation", "state"): "b c -> c",
|
||||
("observation", "image"): "b c h w -> c",
|
||||
("action",): "b c -> c",
|
||||
}
|
||||
|
||||
@property
|
||||
|
||||
@@ -113,11 +113,11 @@ class AlohaExperienceReplay(AbstractExperienceReplay):
|
||||
@property
|
||||
def stats_patterns(self) -> dict:
|
||||
d = {
|
||||
("observation", "state"): "b c -> 1 c",
|
||||
("action",): "b c -> 1 c",
|
||||
("observation", "state"): "b c -> c",
|
||||
("action",): "b c -> c",
|
||||
}
|
||||
for cam in CAMERAS[self.dataset_id]:
|
||||
d[("observation", "image", cam)] = "b c h w -> 1 c 1 1"
|
||||
d[("observation", "image", cam)] = "b c h w -> c"
|
||||
return d
|
||||
|
||||
@property
|
||||
|
||||
@@ -1,17 +1,31 @@
|
||||
from torchrl.envs.transforms import Compose, StepCounter, Transform, TransformedEnv
|
||||
|
||||
|
||||
def make_env(cfg, seed=None, transform=None):
|
||||
def make_env(cfg, transform=None):
|
||||
"""
|
||||
Provide seed to override the seed in the cfg (useful for batched environments).
|
||||
"""
|
||||
# assert cfg.rollout_batch_size == 1, \
|
||||
# """
|
||||
# For the time being, rollout batch sizes of > 1 are not supported. This is because the SerialEnv rollout does not
|
||||
# correctly handle terminated environments. If you really want to use a larger batch size, read on...
|
||||
|
||||
# When calling `EnvBase.rollout` with `break_when_any_done == True` all environments stop rolling out as soon as the
|
||||
# first is terminated or truncated. This almost certainly results in incorrect success metrics, as all but the first
|
||||
# environment get an opportunity to reach the goal. A possible work around is to comment out `if any_done: break`
|
||||
# inf `EnvBase._rollout_stop_early`. One potential downside is that the environments `step` function will continue
|
||||
# to be called and the outputs will continue to be added to the rollout.
|
||||
|
||||
# When calling `EnvBase.rollout` with `break_when_any_done == False` environments are reset when done.
|
||||
# """
|
||||
|
||||
kwargs = {
|
||||
"frame_skip": cfg.env.action_repeat,
|
||||
"from_pixels": cfg.env.from_pixels,
|
||||
"pixels_only": cfg.env.pixels_only,
|
||||
"image_size": cfg.env.image_size,
|
||||
"num_prev_obs": cfg.n_obs_steps - 1,
|
||||
"seed": seed if seed is not None else cfg.seed,
|
||||
"seed": cfg.seed,
|
||||
}
|
||||
|
||||
if cfg.env.name == "simxarm":
|
||||
@@ -33,22 +47,33 @@ def make_env(cfg, seed=None, transform=None):
|
||||
else:
|
||||
raise ValueError(cfg.env.name)
|
||||
|
||||
env = clsfunc(**kwargs)
|
||||
def _make_env(seed):
|
||||
nonlocal kwargs
|
||||
kwargs["seed"] = seed
|
||||
env = clsfunc(**kwargs)
|
||||
|
||||
# limit rollout to max_steps
|
||||
env = TransformedEnv(env, StepCounter(max_steps=cfg.env.episode_length))
|
||||
# limit rollout to max_steps
|
||||
env = TransformedEnv(env, StepCounter(max_steps=cfg.env.episode_length))
|
||||
|
||||
if transform is not None:
|
||||
# useful to add normalization
|
||||
if isinstance(transform, Compose):
|
||||
for tf in transform:
|
||||
env.append_transform(tf.clone())
|
||||
elif isinstance(transform, Transform):
|
||||
env.append_transform(transform.clone())
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
if transform is not None:
|
||||
# useful to add normalization
|
||||
if isinstance(transform, Compose):
|
||||
for tf in transform:
|
||||
env.append_transform(tf.clone())
|
||||
elif isinstance(transform, Transform):
|
||||
env.append_transform(transform.clone())
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
return env
|
||||
return env
|
||||
|
||||
# return SerialEnv(
|
||||
# cfg.rollout_batch_size,
|
||||
# create_env_fn=_make_env,
|
||||
# create_env_kwargs={
|
||||
# "seed": env_seed for env_seed in range(cfg.seed, cfg.seed + cfg.rollout_batch_size)
|
||||
# },
|
||||
# )
|
||||
|
||||
|
||||
# def make_env(env_name, frame_skip, device, is_test=False):
|
||||
|
||||
@@ -30,7 +30,7 @@ class AbstractPolicy(nn.Module, ABC):
|
||||
Should return a (batch_size, n_action_steps, *) tensor of actions.
|
||||
"""
|
||||
|
||||
def forward(self, *args, **kwargs):
|
||||
def forward(self, *args, **kwargs) -> Tensor:
|
||||
"""Inference step that makes multi-step policies compatible with their single-step environments.
|
||||
|
||||
WARNING: In general, this should not be overriden.
|
||||
|
||||
Reference in New Issue
Block a user