Add Pi0 (#681)

Co-authored-by: Simon Alibert <simon.alibert@huggingface.co> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Pablo <pablo.montalvo.leroux@gmail.com>
2025-02-04 18:01:04 +01:00
parent dd974529cf
commit 638d411cd3
26 changed files with 2365 additions and 92 deletions
--- a/tests/test_policies.py
+++ b/tests/test_policies.py
@@ -167,14 +167,16 @@ def test_policy(ds_repo_id, env_name, env_kwargs, policy_name, policy_kwargs):
    batch = next(dl_iter)

    for key in batch:
-        batch[key] = batch[key].to(DEVICE, non_blocking=True)
+        if isinstance(batch[key], torch.Tensor):
+            batch[key] = batch[key].to(DEVICE, non_blocking=True)

    # Test updating the policy (and test that it does not mutate the batch)
    batch_ = deepcopy(batch)
    policy.forward(batch)
    assert set(batch) == set(batch_), "Batch keys are not the same after a forward pass."
    assert all(
-        torch.equal(batch[k], batch_[k]) for k in batch
+        torch.equal(batch[k], batch_[k]) if isinstance(batch[k], torch.Tensor) else batch[k] == batch_[k]
+        for k in batch
    ), "Batch values are not the same after a forward pass."

    # reset the policy and environment