wip - requesting feedback

2024-03-13 12:54:31 +00:00
2 changed files with 115 additions and 2 deletions
--- a/lerobot/common/envs/abstract.py
+++ b/lerobot/common/envs/abstract.py
@@ -4,9 +4,113 @@ from typing import Optional
 from tensordict import TensorDict
 from torchrl.envs import EnvBase
 from torchrl.envs.utils import _terminated_or_truncated, step_mdp
-class AbstractEnv(EnvBase):
+class EnvBaseWithMultiStepRollouts(EnvBase):
    """Adds handling of policies that output action trajectories to be execute with a fixed horizon."""
    def _rollout_stop_early(
        self,
        *,
        tensordict,
        auto_cast_to_device,
        max_steps,
        policy,
        policy_device,
        env_device,
        callback,
    ):
        """Override adds handling of multi-step policies."""
        tensordicts = []
        step_ix = 0
        do_break = False
        while not do_break:
            if auto_cast_to_device:
                if policy_device is not None:
                    tensordict = tensordict.to(policy_device, non_blocking=True)
                else:
                    tensordict.clear_device_()
            tensordict = policy(tensordict)
            if auto_cast_to_device:
                if env_device is not None:
                    tensordict = tensordict.to(env_device, non_blocking=True)
                else:
                    tensordict.clear_device_()
            for action in tensordict["action"].clone():
                tensordict["action"] = action
                tensordict = self.step(tensordict)
                tensordicts.append(tensordict.clone(False))
                if step_ix == max_steps - 1:
                    # we don't truncated as one could potentially continue the run
                    do_break = True
                    break
                tensordict = step_mdp(
                    tensordict,
                    keep_other=True,
                    exclude_action=False,
                    exclude_reward=True,
                    reward_keys=self.reward_keys,
                    action_keys=self.action_keys,
                    done_keys=self.done_keys,
                )
                # done and truncated are in done_keys
                # We read if any key is done.
                any_done = _terminated_or_truncated(
                    tensordict,
                    full_done_spec=self.output_spec["full_done_spec"],
                    key=None,
                )
                if any_done:
                    break
                if callback is not None:
                    callback(self, tensordict)
                step_ix += 1
        return tensordicts
    def _rollout_nonstop(
        self,
        *,
        tensordict,
        auto_cast_to_device,
        max_steps,
        policy,
        policy_device,
        env_device,
        callback,
    ):
        """Override adds handling of multi-step policies."""
        tensordicts = []
        tensordict_ = tensordict
        for i in range(max_steps):
            if auto_cast_to_device:
                if policy_device is not None:
                    tensordict_ = tensordict_.to(policy_device, non_blocking=True)
                else:
                    tensordict_.clear_device_()
            tensordict_ = policy(tensordict_)
            if auto_cast_to_device:
                if env_device is not None:
                    tensordict_ = tensordict_.to(env_device, non_blocking=True)
                else:
                    tensordict_.clear_device_()
            tensordict, tensordict_ = self.step_and_maybe_reset(tensordict_)
            tensordicts.append(tensordict)
            if i == max_steps - 1:
                # we don't truncated as one could potentially continue the run
                break
            if callback is not None:
                callback(self, tensordict)
        return tensordicts
 class AbstractEnv(EnvBaseWithMultiStepRollouts):
    def __init__(
        self,
        task,
--- a/lerobot/common/envs/transforms.py
+++ b/lerobot/common/envs/transforms.py
@@ -4,7 +4,16 @@ import torch
 from tensordict import TensorDictBase
 from tensordict.nn import dispatch
 from tensordict.utils import NestedKey
-from torchrl.envs.transforms import ObservationTransform, Transform
+from torchrl.envs.transforms import ObservationTransform, Transform, TransformedEnv
 from torchrl.envs.transforms.transforms import _TEnvPostInit
 from lerobot.common.envs.abstract import EnvBaseWithMultiStepRollouts
 class TransformedEnv(EnvBaseWithMultiStepRollouts, TransformedEnv, metaclass=_TEnvPostInit):
    """Keep method overrides from EnvBaseWithMultiStepRollouts."""
    pass
 class Prod(ObservationTransform):