forked from tangger/lerobot
wip: still needs batch logic for act and tdmp
This commit is contained in:
@@ -1,7 +1,15 @@
|
||||
|
||||
import pytest
|
||||
from tensordict import TensorDict
|
||||
from tensordict.nn import TensorDictModule
|
||||
import torch
|
||||
from torchrl.data import UnboundedContinuousTensorSpec
|
||||
from torchrl.envs import EnvBase
|
||||
|
||||
from lerobot.common.policies.factory import make_policy
|
||||
|
||||
from lerobot.common.policies.abstract import AbstractPolicy
|
||||
|
||||
from .utils import DEVICE, init_config
|
||||
|
||||
|
||||
@@ -23,3 +31,75 @@ def test_factory(env_name, policy_name):
|
||||
]
|
||||
)
|
||||
policy = make_policy(cfg)
|
||||
|
||||
|
||||
def test_abstract_policy_forward():
|
||||
"""
|
||||
Given an underlying policy that produces an action trajectory with n_action_steps actions, checks that:
|
||||
- The policy is invoked the expected number of times during a rollout.
|
||||
- The environment's termination condition is respected even when part way through an action trajectory.
|
||||
- The observations are returned correctly.
|
||||
"""
|
||||
|
||||
n_action_steps = 8 # our test policy will output 8 action step horizons
|
||||
terminate_at = 10 # some number that is more than n_action_steps but not a multiple
|
||||
rollout_max_steps = terminate_at + 1 # some number greater than terminate_at
|
||||
|
||||
# A minimal environment for testing.
|
||||
class StubEnv(EnvBase):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.action_spec = UnboundedContinuousTensorSpec(shape=(1,))
|
||||
self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
|
||||
|
||||
def _step(self, tensordict: TensorDict) -> TensorDict:
|
||||
self.invocation_count += 1
|
||||
return TensorDict(
|
||||
{
|
||||
"observation": torch.tensor([self.invocation_count]),
|
||||
"reward": torch.tensor([self.invocation_count]),
|
||||
"terminated": torch.tensor(
|
||||
tensordict["action"].item() == terminate_at
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
def _reset(self, tensordict: TensorDict) -> TensorDict:
|
||||
self.invocation_count = 0
|
||||
return TensorDict(
|
||||
{
|
||||
"observation": torch.tensor([self.invocation_count]),
|
||||
"reward": torch.tensor([self.invocation_count]),
|
||||
}
|
||||
)
|
||||
|
||||
def _set_seed(self, seed: int | None):
|
||||
return
|
||||
|
||||
|
||||
class StubPolicy(AbstractPolicy):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.n_action_steps = n_action_steps
|
||||
self.n_policy_invocations = 0
|
||||
|
||||
def select_action(self):
|
||||
self.n_policy_invocations += 1
|
||||
return torch.stack([torch.tensor([i]) for i in range(self.n_action_steps)]).unsqueeze(0)
|
||||
|
||||
|
||||
env = StubEnv()
|
||||
policy = StubPolicy()
|
||||
policy = TensorDictModule(
|
||||
policy,
|
||||
in_keys=[],
|
||||
out_keys=["action"],
|
||||
)
|
||||
|
||||
# Keep track to make sure the policy is called the expected number of times
|
||||
rollout = env.rollout(rollout_max_steps, policy)
|
||||
|
||||
assert len(rollout) == terminate_at + 1 # +1 for the reset observation
|
||||
assert policy.n_policy_invocations == (terminate_at // n_action_steps) + 1
|
||||
assert torch.equal(rollout['observation'].flatten(), torch.arange(terminate_at + 1))
|
||||
|
||||
Reference in New Issue
Block a user