Add AbstractEnv, Refactor AlohaEnv, Add rendering_hook in env, Minor modifications, (TODO: Refactor Pusht and Simxarm)

Add tasks without end_effector that are compatible with dataset, Eval can run (TODO: training and pretrained model)
Training can runs (TODO: eval)
2024-03-10 22:00:48 +00:00 · 2024-03-10 10:52:12 +00:00 · 2024-03-09 16:52:08 +00:00 · 2024-03-08 18:08:28 +00:00 · 2024-03-08 18:07:49 +00:00 · 2024-03-08 18:07:33 +00:00
135 changed files with 307 additions and 7930 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +0,0 @@
-*.memmap filter=lfs diff=lfs merge=lfs -text
--- a/.github/poetry/cpu/poetry.lock
+++ b/.github/poetry/cpu/poetry.lock
--- a/.github/poetry/cpu/pyproject.toml
+++ b/.github/poetry/cpu/pyproject.toml
@@ -1,107 +0,0 @@
-[tool.poetry]
-name = "lerobot"
-version = "0.1.0"
-description = "Le robot is learning"
-authors = [
-    "Rémi Cadène <re.cadene@gmail.com>",
-    "Simon Alibert <alibert.sim@gmail.com>",
-]
-repository = "https://github.com/Cadene/lerobot"
-readme = "README.md"
-license = "MIT"
-classifiers=[
-    "Development Status :: 3 - Alpha",
-    "Intended Audience :: Developers",
-    "Topic :: Software Development :: Build Tools",
-    "License :: OSI Approved :: MIT License",
-    "Programming Language :: Python :: 3.10",
-]
-packages = [{include = "lerobot"}]
-
-
-[tool.poetry.dependencies]
-python = "^3.10"
-cython = "^3.0.8"
-termcolor = "^2.4.0"
-omegaconf = "^2.3.0"
-dm-env = "^1.6"
-pandas = "^2.2.1"
-wandb = "^0.16.3"
-moviepy = "^1.0.3"
-imageio = {extras = ["pyav"], version = "^2.34.0"}
-gdown = "^5.1.0"
-hydra-core = "^1.3.2"
-einops = "^0.7.0"
-pygame = "^2.5.2"
-pymunk = "^6.6.0"
-zarr = "^2.17.0"
-shapely = "^2.0.3"
-scikit-image = "^0.22.0"
-numba = "^0.59.0"
-mpmath = "^1.3.0"
-torch = {version = "^2.2.1", source = "torch-cpu"}
-tensordict = {git = "https://github.com/pytorch/tensordict"}
-torchrl = {git = "https://github.com/pytorch/rl", rev = "13bef426dcfa5887c6e5034a6e9697993fa92c37"}
-mujoco = "^3.1.2"
-mujoco-py = "^2.1.2.14"
-gym = "^0.26.2"
-opencv-python = "^4.9.0.80"
-diffusers = "^0.26.3"
-torchvision = {version = "^0.17.1", source = "torch-cpu"}
-h5py = "^3.10.0"
-dm = "^1.3"
-dm-control = "^1.0.16"
-huggingface-hub = "^0.21.4"
-
-
-[tool.poetry.group.dev.dependencies]
-pre-commit = "^3.6.2"
-debugpy = "^1.8.1"
-pytest = "^8.1.0"
-
-
-[[tool.poetry.source]]
-name = "torch-cpu"
-url = "https://download.pytorch.org/whl/cpu"
-priority = "supplemental"
-
-
-[tool.ruff]
-line-length = 110
-target-version = "py310"
-exclude = [
-    ".bzr",
-    ".direnv",
-    ".eggs",
-    ".git",
-    ".git-rewrite",
-    ".hg",
-    ".mypy_cache",
-    ".nox",
-    ".pants.d",
-    ".pytype",
-    ".ruff_cache",
-    ".svn",
-    ".tox",
-    ".venv",
-    "__pypackages__",
-    "_build",
-    "buck-out",
-    "build",
-    "dist",
-    "node_modules",
-    "venv",
-]
-
-
-[tool.ruff.lint]
-select = ["E4", "E7", "E9", "F", "I", "N", "B", "C4", "SIM"]
-
-
-[tool.poetry-dynamic-versioning]
-enable = true
-
-
-[build-system]
-requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]
-build-backend = "poetry_dynamic_versioning.backend"
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -17,28 +17,17 @@ jobs:
    runs-on: ubuntu-latest
    env:
      POETRY_VERSION: 1.8.1
-      DATA_DIR: tests/data
-      TMPDIR: ~/tmp
-      TEMP: ~/tmp
-      TMP: ~/tmp
-      PYOPENGL_PLATFORM: egl
-      MUJOCO_GL: egl
-      LEROBOT_TESTS_DEVICE: cpu
    steps:
      #----------------------------------------------
      #       check-out repo and set-up python
      #----------------------------------------------
      - name: Check out repository
        uses: actions/checkout@v4
-        with:
-          lfs: true
-
      - name: Set up python
        id: setup-python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'
-
      #----------------------------------------------
      #         install & configure poetry
      #----------------------------------------------
@@ -46,9 +35,8 @@ jobs:
        id: restore-poetry-cache
        uses: actions/cache/restore@v3
        with:
-          path: ~/.local
-          key: poetry-${{ env.POETRY_VERSION }}
-
+          path: ~/.local  # the path depends on the OS
+          key: poetry-${{ env.POETRY_VERSION }}  # increment to reset cache
      - name: Install Poetry
        if: steps.restore-poetry-cache.outputs.cache-hit != 'true'
        uses: snok/install-poetry@v1
@@ -56,7 +44,6 @@ jobs:
          version: ${{ env.POETRY_VERSION }}
          virtualenvs-create: true
          installer-parallel: true
-
      - name: Save cached Poetry installation
        if: |
          steps.restore-poetry-cache.outputs.cache-hit != 'true' &&
@@ -64,32 +51,25 @@ jobs:
        id: save-poetry-cache
        uses: actions/cache/save@v3
        with:
-          path: ~/.local
-          key: poetry-${{ env.POETRY_VERSION }}
-
+          path: ~/.local  # the path depends on the OS
+          key: poetry-${{ env.POETRY_VERSION }}  # increment to reset cache
      - name: Configure Poetry
        run: poetry config virtualenvs.in-project true
-
      #----------------------------------------------
      #           install dependencies
      #----------------------------------------------
-      # TODO(aliberts): move to gpu runners
-      - name: Select cpu dependencies  # HACK
-        run: cp -t . .github/poetry/cpu/pyproject.toml .github/poetry/cpu/poetry.lock
-
      - name: Load cached venv
        id: restore-dependencies-cache
        uses: actions/cache/restore@v3
        with:
          path: .venv
          key: venv-${{ steps.setup-python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
-
      - name: Install dependencies
        if: steps.restore-dependencies-cache.outputs.cache-hit != 'true'
        run: |
-          mkdir ~/tmp
          poetry install --no-interaction --no-root
-
+          git clone https://github.com/real-stanford/diffusion_policy
+          cp -r diffusion_policy/diffusion_policy $(poetry env info -p)/lib/python3.10/site-packages/
      - name: Save cached venv
        if: |
            steps.restore-dependencies-cache.outputs.cache-hit != 'true' &&
@@ -99,16 +79,11 @@ jobs:
        with:
          path: .venv
          key: venv-${{ steps.setup-python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
-
-      - name: Install libegl1-mesa-dev (to use MUJOCO_GL=egl)
-        run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
-
      #----------------------------------------------
      #             install project
      #----------------------------------------------
      - name: Install project
        run: poetry install --no-interaction
-
      #----------------------------------------------
      #               run tests
      #----------------------------------------------
@@ -116,7 +91,6 @@ jobs:
        run: |
          source .venv/bin/activate
          pytest tests
-
      - name: Test train pusht end-to-end
        run: |
          source .venv/bin/activate
@@ -124,21 +98,21 @@ jobs:
            hydra.job.name=pusht \
            env=pusht \
            wandb.enable=False \
-            offline_steps=2 \
+            offline_steps=1 \
            online_steps=0 \
-            device=cpu \
-            save_model=true \
-            save_freq=1 \
-            hydra.run.dir=tests/outputs/
-
-      - name: Test eval pusht end-to-end
-        run: |
-          source .venv/bin/activate
-          python lerobot/scripts/eval.py \
-            hydra.job.name=pusht \
-            env=pusht \
-            wandb.enable=False \
-            eval_episodes=1 \
-            env.episode_length=8 \
-            device=cpu \
-            policy.pretrained_model_path=tests/outputs/models/1.pt
+            device=cpu
+      # TODO(rcadene, aliberts): Add end-to-end test of eval checkpoint post training
+      # - name: Test eval pusht end-to-end
+      #   run: |
+      #     source .venv/bin/activate
+      #     python lerobot/scripts/eval.py
+      #       hydra.job.name=pusht \
+      #       env=pusht \
+      #       wandb.enable=False \
+      #       eval_episodes=1 \
+      #       device=cpu
+      #----------------------------------------------
+      #              cleanup
+      #----------------------------------------------
+      - name: Cleanup
+        run: rm -rf diffusion_policy data
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# Custom
+diffusion_policy
+
 # Logging
 logs
 tmp
@@ -51,7 +54,6 @@ pip-log.txt
 pip-delete-this-directory.txt

 # Unit test / coverage reports
-!tests/data
 htmlcov/
 .tox/
 .nox/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-exclude: ^(data/|tests/)
+exclude: ^(data/|tests/|diffusion_policy/)
 default_language_version:
    python: python3.10
 repos:
--- a/278
+++ b/278
@@ -1,278 +0,0 @@
-Copyright 2024 The Hugging Face team. All rights reserved.
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
-
-## Some of lerobot's code is derived from Diffusion Policy, which is subject to the following copyright notice:
-
-MIT License
-
-Copyright (c) 2023 Columbia Artificial Intelligence and Robotics Lab
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-
-## Some of lerobot's code is derived from FOWM, which is subject to the following copyright notice:
-
-MIT License
-
-Copyright (c) 2023 Yunhai Feng
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-
-## Some of lerobot's code is derived from ALOHA, which is subject to the following copyright notice:
-
-MIT License
-
-Copyright (c) 2023 Tony Z. Zhao
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@

 ## Installation

-Create a virtual environment with Python 3.10, e.g. using `conda`:
+Create a virtual environment with python 3.10, e.g. using `conda`:
 ```
 conda create -y -n lerobot python=3.10
 conda activate lerobot
@@ -24,9 +24,11 @@ mkdir ~/tmp
 export TMPDIR='~/tmp'
 ```

-To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiments tracking, log in with
+Install `diffusion_policy` #HACK
 ```
-wandb login
+# from this directory
+git clone https://github.com/real-stanford/diffusion_policy
+cp -r diffusion_policy/diffusion_policy $(poetry env info -p)/lib/python3.10/site-packages/
 ```

 ## Usage
@@ -59,7 +61,7 @@ env=pusht

 ## TODO

-If you are not sure how to contribute or want to know the next features we working on, look on this project page: [LeRobot TODO](https://github.com/users/Cadene/projects/1)
+If you don't know how to contribute or want to know the next features we working on, look on this project page: [LeRobot TODO](https://github.com/users/Cadene/projects/1)

 Ask [Remi Cadene](re.cadene@gmail.com) for access if needed.

@@ -103,69 +105,7 @@ pre-commit install
 pre-commit run -a
 ```

-**Adding dependencies (temporary)**
-
-Right now, for the CI to work, whenever a new dependency is added it needs to be also added to the cpu env, eg:
-
-```
-# Run in this directory, adds the package to the main env with cuda
-poetry add some-package
-
-# Adds the same package to the cpu env
-cd .github/poetry/cpu && poetry add some-package
-```
-
 **Tests**
-
-Install [git lfs](https://git-lfs.com/) to retrieve test artifacts (if you don't have it already).
-
-On Mac:
 ```
-brew install git-lfs
-git lfs install
+pytest -sx tests
 ```
-
-On Ubuntu:
-```
-sudo apt-get install git-lfs
-git lfs install
-```
-
-Pull artifacts if they're not in [tests/data](tests/data)
-```
-git lfs pull
-```
-
-When adding a new dataset, mock it with
-```
-python tests/scripts/mock_dataset.py --in-data-dir data/<dataset_id> --out-data-dir tests/data/<dataset_id>
-```
-
-Run tests
-```
-DATA_DIR="tests/data" pytest -sx tests
-```
-
-**Datasets**
-
-To add a pytorch rl dataset to the hub, first login and use a token generated from [huggingface settings](https://huggingface.co/settings/tokens) with write access:
-```
-huggingface-cli login --token $HUGGINGFACE_TOKEN --add-to-git-credential
-```
-
-Then you can upload it to the hub with:
-```
-HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload --repo-type dataset $HF_USER/$DATASET data/$DATASET
-```
-
-For instance, for [cadene/pusht](https://huggingface.co/datasets/cadene/pusht), we used:
-```
-HF_USER=cadene
-DATASET=pusht
-```
-
-
-## Acknowledgment
- Our Diffusion policy and Pusht environment are adapted from [Diffusion Policy](https://diffusion-policy.cs.columbia.edu/)
- Our TDMPC policy and Simxarm environment are adapted from [FOWM](https://www.yunhaifeng.com/FOWM/)
- Our ACT policy and ALOHA environment are adapted from [ALOHA](https://tonyzhaozh.github.io/aloha/)
--- a/lerobot/init.py
+++ b/lerobot/init.py
@@ -1 +0,0 @@
-from lerobot.__version__ import __version__  # noqa: F401
--- a/lerobot/version.py
+++ b/lerobot/version.py
@@ -1,8 +1 @@
-""" To enable `lerobot.__version__` """
-
-from importlib.metadata import PackageNotFoundError, version
-
-try:
-    __version__ = version("lerobot")
-except PackageNotFoundError:
-    __version__ = "unknown"
+__version__ = "0.0.0"
--- a/lerobot/common/init.py
+++ b/lerobot/common/init.py
--- a/lerobot/common/datasets/init.py
+++ b/lerobot/common/datasets/init.py
--- a/lerobot/common/datasets/abstract.py
+++ b/lerobot/common/datasets/abstract.py
@@ -1,3 +1,4 @@
+import abc
 import logging
 from pathlib import Path
 from typing import Callable
@@ -6,13 +7,12 @@ import einops
 import torch
 import torchrl
 import tqdm
-from huggingface_hub import snapshot_download
 from tensordict import TensorDict
+from torchrl.data.datasets.utils import _get_root_dir
 from torchrl.data.replay_buffers.replay_buffers import TensorDictReplayBuffer
 from torchrl.data.replay_buffers.samplers import SliceSampler
 from torchrl.data.replay_buffers.storages import TensorStorage, _collate_id
 from torchrl.data.replay_buffers.writers import ImmutableDatasetWriter, Writer
-from torchrl.envs.transforms.transforms import Compose


 class AbstractExperienceReplay(TensorDictReplayBuffer):
@@ -22,7 +22,7 @@ class AbstractExperienceReplay(TensorDictReplayBuffer):
        batch_size: int = None,
        *,
        shuffle: bool = True,
-        root: Path | None = None,
+        root: Path = None,
        pin_memory: bool = False,
        prefetch: int = None,
        sampler: SliceSampler = None,
@@ -32,8 +32,11 @@ class AbstractExperienceReplay(TensorDictReplayBuffer):
    ):
        self.dataset_id = dataset_id
        self.shuffle = shuffle
-        self.root = root
-        storage = self._download_or_load_dataset()
+        self.root = _get_root_dir(self.dataset_id) if root is None else root
+        self.root = Path(self.root)
+        self.data_dir = self.root / self.dataset_id
+
+        storage = self._download_or_load_storage()

        super().__init__(
            storage=storage,
@@ -51,7 +54,7 @@ class AbstractExperienceReplay(TensorDictReplayBuffer):
        return {
            ("observation", "state"): "b c -> 1 c",
            ("observation", "image"): "b c h w -> 1 c 1 1",
-            ("action",): "b c -> 1 c",
+            ("action"): "b c -> 1 c",
        }

    @property
@@ -70,22 +73,11 @@ class AbstractExperienceReplay(TensorDictReplayBuffer):
    def num_episodes(self) -> int:
        return len(self._storage._storage["episode"].unique())

-    @property
-    def transform(self):
-        return self._transform
-
    def set_transform(self, transform):
-        if not isinstance(transform, Compose):
-            # required since torchrl calls `len(self._transform)` downstream
-            if isinstance(transform, list):
-                self._transform = Compose(*transform)
-            else:
-                self._transform = Compose(transform)
-        else:
-            self._transform = transform
+        self.transform = transform

    def compute_or_load_stats(self, num_batch=100, batch_size=32) -> TensorDict:
-        stats_path = Path(self.data_dir) / "stats.pth"
+        stats_path = self.data_dir / "stats.pth"
        if stats_path.exists():
            stats = torch.load(stats_path)
        else:
@@ -94,12 +86,19 @@ class AbstractExperienceReplay(TensorDictReplayBuffer):
            torch.save(stats, stats_path)
        return stats

-    def _download_or_load_dataset(self) -> torch.StorageBase:
-        if self.root is None:
-            self.data_dir = snapshot_download(repo_id=f"cadene/{self.dataset_id}", repo_type="dataset")
+    @abc.abstractmethod
+    def _download_and_preproc(self) -> torch.StorageBase:
+        raise NotImplementedError()
+
+    def _download_or_load_storage(self):
+        if not self._is_downloaded():
+            storage = self._download_and_preproc()
        else:
-            self.data_dir = self.root / self.dataset_id
-        return TensorStorage(TensorDict.load_memmap(self.data_dir))
+            storage = TensorStorage(TensorDict.load_memmap(self.data_dir))
+        return storage
+
+    def _is_downloaded(self) -> bool:
+        return self.data_dir.is_dir()

    def _compute_stats(self, num_batch=100, batch_size=32):
        rb = TensorDictReplayBuffer(
--- a/lerobot/common/datasets/aloha.py
+++ b/lerobot/common/datasets/aloha.py
@@ -87,7 +87,7 @@ class AlohaExperienceReplay(AbstractExperienceReplay):
        batch_size: int = None,
        *,
        shuffle: bool = True,
-        root: Path | None = None,
+        root: Path = None,
        pin_memory: bool = False,
        prefetch: int = None,
        sampler: SliceSampler = None,
@@ -114,7 +114,7 @@ class AlohaExperienceReplay(AbstractExperienceReplay):
    def stats_patterns(self) -> dict:
        d = {
            ("observation", "state"): "b c -> 1 c",
-            ("action",): "b c -> 1 c",
+            ("action"): "b c -> 1 c",
        }
        for cam in CAMERAS[self.dataset_id]:
            d[("observation", "image", cam)] = "b c h w -> 1 c 1 1"
@@ -124,9 +124,8 @@ class AlohaExperienceReplay(AbstractExperienceReplay):
    def image_keys(self) -> list:
        return [("observation", "image", cam) for cam in CAMERAS[self.dataset_id]]

-    def _download_and_preproc_obsolete(self):
-        assert self.root is not None
-        raw_dir = self.root / f"{self.dataset_id}_raw"
+    def _download_and_preproc(self):
+        raw_dir = self.data_dir.parent / f"{self.data_dir.name}_raw"
        if not raw_dir.is_dir():
            download(raw_dir, self.dataset_id)

@@ -175,7 +174,7 @@ class AlohaExperienceReplay(AbstractExperienceReplay):

                if ep_id == 0:
                    # hack to initialize tensordict data structure to store episodes
-                    td_data = ep_td[0].expand(total_num_frames).memmap_like(self.root / f"{self.dataset_id}")
+                    td_data = ep_td[0].expand(total_num_frames).memmap_like(self.data_dir)

                td_data[idxtd : idxtd + len(ep_td)] = ep_td
                idxtd = idxtd + len(ep_td)
--- a/lerobot/common/datasets/factory.py
+++ b/lerobot/common/datasets/factory.py
@@ -5,12 +5,9 @@ from pathlib import Path
 import torch
 from torchrl.data.replay_buffers import PrioritizedSliceSampler, SliceSampler

-from lerobot.common.envs.transforms import NormalizeTransform, Prod
+from lerobot.common.envs.transforms import NormalizeTransform

-# DATA_DIR specifies to location where datasets are loaded. By default, DATA_DIR is None and
-# we load from `$HOME/.cache/huggingface/hub/datasets`. For our unit tests, we set `DATA_DIR=tests/data`
-# to load a subset of our datasets for faster continuous integration.
-DATA_DIR = Path(os.environ["DATA_DIR"]) if "DATA_DIR" in os.environ else None
+DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))


 def make_offline_buffer(
@@ -80,49 +77,35 @@ def make_offline_buffer(

    offline_buffer = clsfunc(
        dataset_id=dataset_id,
+        root=DATA_DIR,
        sampler=sampler,
        batch_size=batch_size,
-        root=DATA_DIR,
        pin_memory=pin_memory,
        prefetch=prefetch if isinstance(prefetch, int) else None,
    )

-    if cfg.policy.name == "tdmpc":
-        img_keys = []
-        for key in offline_buffer.image_keys:
-            img_keys.append(("next", *key))
-        img_keys += offline_buffer.image_keys
-    else:
-        img_keys = offline_buffer.image_keys
-
-    transforms = [Prod(in_keys=img_keys, prod=1 / 255)]
-
    if normalize:
        # TODO(rcadene): make normalization strategy configurable between mean_std, min_max, manual_min_max, min_max_from_spec
        stats = offline_buffer.compute_or_load_stats()
-
-        # we only normalize the state and action, since the images are usually normalized inside the model for now (except for tdmpc: see the following)
        in_keys = [("observation", "state"), ("action")]

-        if cfg.policy.name == "tdmpc":
-            # TODO(rcadene): we add img_keys to the keys to normalize for tdmpc only, since diffusion and act policies normalize the image inside the model for now
-            in_keys += img_keys
-            # TODO(racdene): since we use next observations in tdmpc, we also add them to the normalization. We are wasting a bit of compute on this for now.
-            in_keys += [("next", *key) for key in img_keys]
+        if cfg.policy == "tdmpc":
+            for key in offline_buffer.image_keys:
+                # TODO(rcadene): imagenet normalization is applied inside diffusion policy, but no normalization inside tdmpc
+                in_keys.append(key)
+                # since we use next observations in tdmpc
+                in_keys.append(("next", *key))
            in_keys.append(("next", "observation", "state"))

-        if cfg.policy.name == "diffusion" and cfg.env.name == "pusht":
+        if cfg.policy == "diffusion" and cfg.env.name == "pusht":
            # TODO(rcadene): we overwrite stats to have the same as pretrained model, but we should remove this
            stats["observation", "state", "min"] = torch.tensor([13.456424, 32.938293], dtype=torch.float32)
            stats["observation", "state", "max"] = torch.tensor([496.14618, 510.9579], dtype=torch.float32)
            stats["action", "min"] = torch.tensor([12.0, 25.0], dtype=torch.float32)
            stats["action", "max"] = torch.tensor([511.0, 511.0], dtype=torch.float32)

-        # TODO(rcadene): remove this and put it in config. Ideally we want to reproduce SOTA results just with mean_std
-        normalization_mode = "mean_std" if cfg.env.name == "aloha" else "min_max"
-        transforms.append(NormalizeTransform(stats, in_keys, mode=normalization_mode))
-
-    offline_buffer.set_transform(transforms)
+        transform = NormalizeTransform(stats, in_keys, mode="min_max")
+        offline_buffer.set_transform(transform)

    if not overwrite_sampler:
        index = torch.arange(0, offline_buffer.num_samples, 1)
--- a/lerobot/common/datasets/pusht.py
+++ b/lerobot/common/datasets/pusht.py
@@ -8,6 +8,8 @@ import pymunk
 import torch
 import torchrl
 import tqdm
+from diffusion_policy.common.replay_buffer import ReplayBuffer as DiffusionPolicyReplayBuffer
+from diffusion_policy.env.pusht.pusht_env import pymunk_to_shapely
 from tensordict import TensorDict
 from torchrl.data.replay_buffers.samplers import SliceSampler
 from torchrl.data.replay_buffers.storages import TensorStorage
@@ -15,12 +17,11 @@ from torchrl.data.replay_buffers.writers import Writer

 from lerobot.common.datasets.abstract import AbstractExperienceReplay
 from lerobot.common.datasets.utils import download_and_extract_zip
-from lerobot.common.envs.pusht.pusht_env import pymunk_to_shapely
-from lerobot.common.policies.diffusion.replay_buffer import ReplayBuffer as DiffusionPolicyReplayBuffer

 # as define in env
 SUCCESS_THRESHOLD = 0.95  # 95% coverage,

+DEFAULT_TEE_MASK = pymunk.ShapeFilter.ALL_MASKS()
 PUSHT_URL = "https://diffusion-policy.cs.columbia.edu/data/training/pusht.zip"
 PUSHT_ZARR = Path("pusht/pusht_cchi_v7_replay.zarr")

@@ -48,10 +49,8 @@ def add_tee(
    angle,
    scale=30,
    color="LightSlateGray",
-    mask=None,
+    mask=DEFAULT_TEE_MASK,
 ):
-    if mask is None:
-        mask = pymunk.ShapeFilter.ALL_MASKS()
    mass = 1
    length = 4
    vertices1 = [
@@ -90,7 +89,7 @@ class PushtExperienceReplay(AbstractExperienceReplay):
        batch_size: int = None,
        *,
        shuffle: bool = True,
-        root: Path | None = None,
+        root: Path = None,
        pin_memory: bool = False,
        prefetch: int = None,
        sampler: SliceSampler = None,
@@ -111,9 +110,8 @@ class PushtExperienceReplay(AbstractExperienceReplay):
            transform=transform,
        )

-    def _download_and_preproc_obsolete(self):
-        assert self.root is not None
-        raw_dir = self.root / f"{self.dataset_id}_raw"
+    def _download_and_preproc(self):
+        raw_dir = self.data_dir.parent / f"{self.data_dir.name}_raw"
        zarr_path = (raw_dir / PUSHT_ZARR).resolve()
        if not zarr_path.is_dir():
            raw_dir.mkdir(parents=True, exist_ok=True)
@@ -127,9 +125,6 @@ class PushtExperienceReplay(AbstractExperienceReplay):
        episode_ids = torch.from_numpy(dataset_dict.get_episode_idxs())
        num_episodes = dataset_dict.meta["episode_ends"].shape[0]
        total_frames = dataset_dict["action"].shape[0]
-        # to create test artifact
-        # num_episodes = 1
-        # total_frames = 50
        assert len(
            {dataset_dict[key].shape[0] for key in dataset_dict.keys()}  # noqa: SIM118
        ), "Some data type dont have the same number of total frames."
@@ -147,8 +142,6 @@ class PushtExperienceReplay(AbstractExperienceReplay):
        idxtd = 0
        for episode_id in tqdm.tqdm(range(num_episodes)):
            idx1 = dataset_dict.meta["episode_ends"][episode_id]
-            # to create test artifact
-            # idx1 = 51

            num_frames = idx1 - idx0

@@ -209,7 +202,7 @@ class PushtExperienceReplay(AbstractExperienceReplay):

            if episode_id == 0:
                # hack to initialize tensordict data structure to store episodes
-                td_data = ep_td[0].expand(total_frames).memmap_like(self.root / f"{self.dataset_id}")
+                td_data = ep_td[0].expand(total_frames).memmap_like(self.data_dir)

            td_data[idxtd : idxtd + len(ep_td)] = ep_td

--- a/lerobot/common/datasets/simxarm.py
+++ b/lerobot/common/datasets/simxarm.py
@@ -43,7 +43,7 @@ class SimxarmExperienceReplay(AbstractExperienceReplay):
        batch_size: int = None,
        *,
        shuffle: bool = True,
-        root: Path | None = None,
+        root: Path = None,
        pin_memory: bool = False,
        prefetch: int = None,
        sampler: SliceSampler = None,
@@ -64,12 +64,11 @@ class SimxarmExperienceReplay(AbstractExperienceReplay):
            transform=transform,
        )

-    def _download_and_preproc_obsolete(self):
-        assert self.root is not None
+    def _download_and_preproc(self):
        # TODO(rcadene): finish download
        download()

-        dataset_path = self.root / f"{self.dataset_id}_raw" / "buffer.pkl"
+        dataset_path = self.data_dir / "buffer.pkl"
        print(f"Using offline dataset '{dataset_path}'")
        with open(dataset_path, "rb") as f:
            dataset_dict = pickle.load(f)
@@ -111,7 +110,7 @@ class SimxarmExperienceReplay(AbstractExperienceReplay):

            if episode_id == 0:
                # hack to initialize tensordict data structure to store episodes
-                td_data = episode[0].expand(total_frames).memmap_like(self.root / f"{self.dataset_id}")
+                td_data = episode[0].expand(total_frames).memmap_like(self.data_dir)

            td_data[idx0:idx1] = episode

--- a/lerobot/common/envs/init.py
+++ b/lerobot/common/envs/init.py
--- a/lerobot/common/envs/abstract.py
+++ b/lerobot/common/envs/abstract.py
@@ -34,7 +34,6 @@ class AbstractEnv(EnvBase):
        if from_pixels:
            assert image_size

-        self._make_env()
        self._make_spec()
        self._current_seed = self.set_seed(seed)

@@ -67,10 +66,6 @@ class AbstractEnv(EnvBase):
    def _step(self, tensordict: TensorDict):
        raise NotImplementedError()

-    @abc.abstractmethod
-    def _make_env(self):
-        raise NotImplementedError()
-
    @abc.abstractmethod
    def _make_spec(self):
        raise NotImplementedError()
--- a/lerobot/common/envs/aloha/env.py
+++ b/lerobot/common/envs/aloha/env.py
@@ -58,15 +58,13 @@ class AlohaEnv(AbstractEnv):
            num_prev_obs=num_prev_obs,
            num_prev_action=num_prev_action,
        )
-
-    def _make_env(self):
        if not _has_gym:
            raise ImportError("Cannot import gym.")

-        if not self.from_pixels:
+        if not from_pixels:
            raise NotImplementedError()

-        self._env = self._make_env_task(self.task)
+        self._env = self._make_env_task(task)

    def render(self, mode="rgb_array", width=640, height=480):
        # TODO(rcadene): render and visualizer several cameras (e.g. angle, front_close)
@@ -107,8 +105,7 @@ class AlohaEnv(AbstractEnv):
        if self.from_pixels:
            image = torch.from_numpy(raw_obs["images"]["top"].copy())
            image = einops.rearrange(image, "h w c -> c h w")
-            assert image.dtype == torch.uint8
-            obs = {"image": {"top": image}}
+            obs = {"image": image.type(torch.float32) / 255.0}

            if not self.pixels_only:
                obs["state"] = torch.from_numpy(raw_obs["qpos"]).type(torch.float32)
@@ -142,9 +139,9 @@ class AlohaEnv(AbstractEnv):
                stacked_obs = {}
                if "image" in obs:
                    self._prev_obs_image_queue = deque(
-                        [obs["image"]["top"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
+                        [obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
                    )
-                    stacked_obs["image"] = {"top": torch.stack(list(self._prev_obs_image_queue))}
+                    stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
                if "state" in obs:
                    self._prev_obs_state_queue = deque(
                        [obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
@@ -191,8 +188,8 @@ class AlohaEnv(AbstractEnv):
            if self.num_prev_obs > 0:
                stacked_obs = {}
                if "image" in obs:
-                    self._prev_obs_image_queue.append(obs["image"]["top"])
-                    stacked_obs["image"] = {"top": torch.stack(list(self._prev_obs_image_queue))}
+                    self._prev_obs_image_queue.append(obs["image"])
+                    stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
                if "state" in obs:
                    self._prev_obs_state_queue.append(obs["state"])
                    stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
@@ -228,15 +225,13 @@ class AlohaEnv(AbstractEnv):
            if self.num_prev_obs > 0:
                image_shape = (self.num_prev_obs + 1, *image_shape)

-            obs["image"] = {
-                "top": BoundedTensorSpec(
-                    low=0,
-                    high=255,
-                    shape=image_shape,
-                    dtype=torch.uint8,
-                    device=self.device,
-                )
-            }
+            obs["image"] = BoundedTensorSpec(
+                low=0,
+                high=1,
+                shape=image_shape,
+                dtype=torch.float32,
+                device=self.device,
+            )
            if not self.pixels_only:
                state_shape = (len(JOINTS),)
                if self.num_prev_obs > 0:
--- a/lerobot/common/envs/factory.py
+++ b/lerobot/common/envs/factory.py
@@ -1,4 +1,4 @@
-from torchrl.envs.transforms import Compose, StepCounter, Transform, TransformedEnv
+from torchrl.envs.transforms import StepCounter, TransformedEnv


 def make_env(cfg, transform=None):
@@ -18,7 +18,7 @@ def make_env(cfg, transform=None):
        kwargs["task"] = cfg.env.task
        clsfunc = SimxarmEnv
    elif cfg.env.name == "pusht":
-        from lerobot.common.envs.pusht.env import PushtEnv
+        from lerobot.common.envs.pusht import PushtEnv

        # assert kwargs["seed"] > 200, "Seed 0-200 are used for the demonstration dataset, so we don't want to seed the eval env with this range."

@@ -38,13 +38,7 @@ def make_env(cfg, transform=None):

    if transform is not None:
        # useful to add normalization
-        if isinstance(transform, Compose):
-            for tf in transform:
-                env.append_transform(tf.clone())
-        elif isinstance(transform, Transform):
-            env.append_transform(transform.clone())
-        else:
-            raise NotImplementedError()
+        env.append_transform(transform)

    return env

--- a/lerobot/common/envs/pusht/env.py
+++ b/lerobot/common/envs/pusht/env.py
@@ -11,52 +11,64 @@ from torchrl.data.tensor_specs import (
    DiscreteTensorSpec,
    UnboundedContinuousTensorSpec,
 )
+from torchrl.envs import EnvBase
 from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform

-from lerobot.common.envs.abstract import AbstractEnv
 from lerobot.common.utils import set_seed

 _has_gym = importlib.util.find_spec("gym") is not None
+_has_diffpolicy = importlib.util.find_spec("diffusion_policy") is not None and _has_gym


-class PushtEnv(AbstractEnv):
+class PushtEnv(EnvBase):
    def __init__(
        self,
-        task="pusht",
        frame_skip: int = 1,
        from_pixels: bool = False,
        pixels_only: bool = False,
        image_size=None,
        seed=1337,
        device="cpu",
-        num_prev_obs=1,
+        num_prev_obs=0,
        num_prev_action=0,
    ):
-        super().__init__(
-            task=task,
-            frame_skip=frame_skip,
-            from_pixels=from_pixels,
-            pixels_only=pixels_only,
-            image_size=image_size,
-            seed=seed,
-            device=device,
-            num_prev_obs=num_prev_obs,
-            num_prev_action=num_prev_action,
-        )
+        super().__init__(device=device, batch_size=[])
+        self.frame_skip = frame_skip
+        self.from_pixels = from_pixels
+        self.pixels_only = pixels_only
+        self.image_size = image_size
+        self.num_prev_obs = num_prev_obs
+        self.num_prev_action = num_prev_action

-    def _make_env(self):
+        if pixels_only:
+            assert from_pixels
+        if from_pixels:
+            assert image_size
+
+        if not _has_diffpolicy:
+            raise ImportError("Cannot import diffusion_policy.")
        if not _has_gym:
            raise ImportError("Cannot import gym.")

        # TODO(rcadene) (PushTEnv is similar to PushTImageEnv, but without the image rendering, it's faster to iterate on)
-        # from lerobot.common.envs.pusht.pusht_env import PushTEnv
+        # from diffusion_policy.env.pusht.pusht_env import PushTEnv

-        if not self.from_pixels:
+        if not from_pixels:
            raise NotImplementedError("Use PushTEnv, instead of PushTImageEnv")
-        from lerobot.common.envs.pusht.pusht_image_env import PushTImageEnv
+        from diffusion_policy.env.pusht.pusht_image_env import PushTImageEnv

        self._env = PushTImageEnv(render_size=self.image_size)

+        self._make_spec()
+        self._current_seed = self.set_seed(seed)
+
+        if self.num_prev_obs > 0:
+            self._prev_obs_image_queue = deque(maxlen=self.num_prev_obs)
+            self._prev_obs_state_queue = deque(maxlen=self.num_prev_obs)
+        if self.num_prev_action > 0:
+            raise NotImplementedError()
+            # self._prev_action_queue = deque(maxlen=self.num_prev_action)
+
    def render(self, mode="rgb_array", width=384, height=384):
        if width != height:
            raise NotImplementedError()
@@ -113,8 +125,6 @@ class PushtEnv(AbstractEnv):
            )
        else:
            raise NotImplementedError()
-
-        self.call_rendering_hooks()
        return td

    def _step(self, tensordict: TensorDict):
@@ -147,8 +157,6 @@ class PushtEnv(AbstractEnv):
                    stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
                obs = stacked_obs

-            self.call_rendering_hooks()
-
        td = TensorDict(
            {
                "observation": TensorDict(obs, batch_size=[]),
@@ -170,9 +178,9 @@ class PushtEnv(AbstractEnv):

            obs["image"] = BoundedTensorSpec(
                low=0,
-                high=255,
+                high=1,
                shape=image_shape,
-                dtype=torch.uint8,
+                dtype=torch.float32,
                device=self.device,
            )
            if not self.pixels_only:
--- a/lerobot/common/envs/pusht/pusht_env.py
+++ b/lerobot/common/envs/pusht/pusht_env.py
@@ -1,378 +0,0 @@
-import collections
-
-import cv2
-import gym
-import numpy as np
-import pygame
-import pymunk
-import pymunk.pygame_util
-import shapely.geometry as sg
-import skimage.transform as st
-from gym import spaces
-from pymunk.vec2d import Vec2d
-
-from lerobot.common.envs.pusht.pymunk_override import DrawOptions
-
-
-def pymunk_to_shapely(body, shapes):
-    geoms = []
-    for shape in shapes:
-        if isinstance(shape, pymunk.shapes.Poly):
-            verts = [body.local_to_world(v) for v in shape.get_vertices()]
-            verts += [verts[0]]
-            geoms.append(sg.Polygon(verts))
-        else:
-            raise RuntimeError(f"Unsupported shape type {type(shape)}")
-    geom = sg.MultiPolygon(geoms)
-    return geom
-
-
-class PushTEnv(gym.Env):
-    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 10}
-    reward_range = (0.0, 1.0)
-
-    def __init__(
-        self,
-        legacy=False,
-        block_cog=None,
-        damping=None,
-        render_action=True,
-        render_size=96,
-        reset_to_state=None,
-    ):
-        self._seed = None
-        self.seed()
-        self.window_size = ws = 512  # The size of the PyGame window
-        self.render_size = render_size
-        self.sim_hz = 100
-        # Local controller params.
-        self.k_p, self.k_v = 100, 20  # PD control.z
-        self.control_hz = self.metadata["video.frames_per_second"]
-        # legcay set_state for data compatibility
-        self.legacy = legacy
-
-        # agent_pos, block_pos, block_angle
-        self.observation_space = spaces.Box(
-            low=np.array([0, 0, 0, 0, 0], dtype=np.float64),
-            high=np.array([ws, ws, ws, ws, np.pi * 2], dtype=np.float64),
-            shape=(5,),
-            dtype=np.float64,
-        )
-
-        # positional goal for agent
-        self.action_space = spaces.Box(
-            low=np.array([0, 0], dtype=np.float64),
-            high=np.array([ws, ws], dtype=np.float64),
-            shape=(2,),
-            dtype=np.float64,
-        )
-
-        self.block_cog = block_cog
-        self.damping = damping
-        self.render_action = render_action
-
-        """
-        If human-rendering is used, `self.window` will be a reference
-        to the window that we draw to. `self.clock` will be a clock that is used
-        to ensure that the environment is rendered at the correct framerate in
-        human-mode. They will remain `None` until human-mode is used for the
-        first time.
-        """
-        self.window = None
-        self.clock = None
-        self.screen = None
-
-        self.space = None
-        self.teleop = None
-        self.render_buffer = None
-        self.latest_action = None
-        self.reset_to_state = reset_to_state
-
-    def reset(self):
-        seed = self._seed
-        self._setup()
-        if self.block_cog is not None:
-            self.block.center_of_gravity = self.block_cog
-        if self.damping is not None:
-            self.space.damping = self.damping
-
-        # use legacy RandomState for compatibility
-        state = self.reset_to_state
-        if state is None:
-            rs = np.random.RandomState(seed=seed)
-            state = np.array(
-                [
-                    rs.randint(50, 450),
-                    rs.randint(50, 450),
-                    rs.randint(100, 400),
-                    rs.randint(100, 400),
-                    rs.randn() * 2 * np.pi - np.pi,
-                ]
-            )
-        self._set_state(state)
-
-        observation = self._get_obs()
-        return observation
-
-    def step(self, action):
-        dt = 1.0 / self.sim_hz
-        self.n_contact_points = 0
-        n_steps = self.sim_hz // self.control_hz
-        if action is not None:
-            self.latest_action = action
-            for _ in range(n_steps):
-                # Step PD control.
-                # self.agent.velocity = self.k_p * (act - self.agent.position)    # P control works too.
-                acceleration = self.k_p * (action - self.agent.position) + self.k_v * (
-                    Vec2d(0, 0) - self.agent.velocity
-                )
-                self.agent.velocity += acceleration * dt
-
-                # Step physics.
-                self.space.step(dt)
-
-        # compute reward
-        goal_body = self._get_goal_pose_body(self.goal_pose)
-        goal_geom = pymunk_to_shapely(goal_body, self.block.shapes)
-        block_geom = pymunk_to_shapely(self.block, self.block.shapes)
-
-        intersection_area = goal_geom.intersection(block_geom).area
-        goal_area = goal_geom.area
-        coverage = intersection_area / goal_area
-        reward = np.clip(coverage / self.success_threshold, 0, 1)
-        done = coverage > self.success_threshold
-
-        observation = self._get_obs()
-        info = self._get_info()
-
-        return observation, reward, done, info
-
-    def render(self, mode):
-        return self._render_frame(mode)
-
-    def teleop_agent(self):
-        TeleopAgent = collections.namedtuple("TeleopAgent", ["act"])
-
-        def act(obs):
-            act = None
-            mouse_position = pymunk.pygame_util.from_pygame(Vec2d(*pygame.mouse.get_pos()), self.screen)
-            if self.teleop or (mouse_position - self.agent.position).length < 30:
-                self.teleop = True
-                act = mouse_position
-            return act
-
-        return TeleopAgent(act)
-
-    def _get_obs(self):
-        obs = np.array(
-            tuple(self.agent.position) + tuple(self.block.position) + (self.block.angle % (2 * np.pi),)
-        )
-        return obs
-
-    def _get_goal_pose_body(self, pose):
-        mass = 1
-        inertia = pymunk.moment_for_box(mass, (50, 100))
-        body = pymunk.Body(mass, inertia)
-        # preserving the legacy assignment order for compatibility
-        # the order here doesn't matter somehow, maybe because CoM is aligned with body origin
-        body.position = pose[:2].tolist()
-        body.angle = pose[2]
-        return body
-
-    def _get_info(self):
-        n_steps = self.sim_hz // self.control_hz
-        n_contact_points_per_step = int(np.ceil(self.n_contact_points / n_steps))
-        info = {
-            "pos_agent": np.array(self.agent.position),
-            "vel_agent": np.array(self.agent.velocity),
-            "block_pose": np.array(list(self.block.position) + [self.block.angle]),
-            "goal_pose": self.goal_pose,
-            "n_contacts": n_contact_points_per_step,
-        }
-        return info
-
-    def _render_frame(self, mode):
-        if self.window is None and mode == "human":
-            pygame.init()
-            pygame.display.init()
-            self.window = pygame.display.set_mode((self.window_size, self.window_size))
-        if self.clock is None and mode == "human":
-            self.clock = pygame.time.Clock()
-
-        canvas = pygame.Surface((self.window_size, self.window_size))
-        canvas.fill((255, 255, 255))
-        self.screen = canvas
-
-        draw_options = DrawOptions(canvas)
-
-        # Draw goal pose.
-        goal_body = self._get_goal_pose_body(self.goal_pose)
-        for shape in self.block.shapes:
-            goal_points = [
-                pymunk.pygame_util.to_pygame(goal_body.local_to_world(v), draw_options.surface)
-                for v in shape.get_vertices()
-            ]
-            goal_points += [goal_points[0]]
-            pygame.draw.polygon(canvas, self.goal_color, goal_points)
-
-        # Draw agent and block.
-        self.space.debug_draw(draw_options)
-
-        if mode == "human":
-            # The following line copies our drawings from `canvas` to the visible window
-            self.window.blit(canvas, canvas.get_rect())
-            pygame.event.pump()
-            pygame.display.update()
-
-            # the clock is already ticked during in step for "human"
-
-        img = np.transpose(np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2))
-        img = cv2.resize(img, (self.render_size, self.render_size))
-        if self.render_action and self.latest_action is not None:
-            action = np.array(self.latest_action)
-            coord = (action / 512 * 96).astype(np.int32)
-            marker_size = int(8 / 96 * self.render_size)
-            thickness = int(1 / 96 * self.render_size)
-            cv2.drawMarker(
-                img,
-                coord,
-                color=(255, 0, 0),
-                markerType=cv2.MARKER_CROSS,
-                markerSize=marker_size,
-                thickness=thickness,
-            )
-        return img
-
-    def close(self):
-        if self.window is not None:
-            pygame.display.quit()
-            pygame.quit()
-
-    def seed(self, seed=None):
-        if seed is None:
-            seed = np.random.randint(0, 25536)
-        self._seed = seed
-        self.np_random = np.random.default_rng(seed)
-
-    def _handle_collision(self, arbiter, space, data):
-        self.n_contact_points += len(arbiter.contact_point_set.points)
-
-    def _set_state(self, state):
-        if isinstance(state, np.ndarray):
-            state = state.tolist()
-        pos_agent = state[:2]
-        pos_block = state[2:4]
-        rot_block = state[4]
-        self.agent.position = pos_agent
-        # setting angle rotates with respect to center of mass
-        # therefore will modify the geometric position
-        # if not the same as CoM
-        # therefore should be modified first.
-        if self.legacy:
-            # for compatibility with legacy data
-            self.block.position = pos_block
-            self.block.angle = rot_block
-        else:
-            self.block.angle = rot_block
-            self.block.position = pos_block
-
-        # Run physics to take effect
-        self.space.step(1.0 / self.sim_hz)
-
-    def _set_state_local(self, state_local):
-        agent_pos_local = state_local[:2]
-        block_pose_local = state_local[2:]
-        tf_img_obj = st.AffineTransform(translation=self.goal_pose[:2], rotation=self.goal_pose[2])
-        tf_obj_new = st.AffineTransform(translation=block_pose_local[:2], rotation=block_pose_local[2])
-        tf_img_new = st.AffineTransform(matrix=tf_img_obj.params @ tf_obj_new.params)
-        agent_pos_new = tf_img_new(agent_pos_local)
-        new_state = np.array(list(agent_pos_new[0]) + list(tf_img_new.translation) + [tf_img_new.rotation])
-        self._set_state(new_state)
-        return new_state
-
-    def _setup(self):
-        self.space = pymunk.Space()
-        self.space.gravity = 0, 0
-        self.space.damping = 0
-        self.teleop = False
-        self.render_buffer = []
-
-        # Add walls.
-        walls = [
-            self._add_segment((5, 506), (5, 5), 2),
-            self._add_segment((5, 5), (506, 5), 2),
-            self._add_segment((506, 5), (506, 506), 2),
-            self._add_segment((5, 506), (506, 506), 2),
-        ]
-        self.space.add(*walls)
-
-        # Add agent, block, and goal zone.
-        self.agent = self.add_circle((256, 400), 15)
-        self.block = self.add_tee((256, 300), 0)
-        self.goal_color = pygame.Color("LightGreen")
-        self.goal_pose = np.array([256, 256, np.pi / 4])  # x, y, theta (in radians)
-
-        # Add collision handling
-        self.collision_handeler = self.space.add_collision_handler(0, 0)
-        self.collision_handeler.post_solve = self._handle_collision
-        self.n_contact_points = 0
-
-        self.max_score = 50 * 100
-        self.success_threshold = 0.95  # 95% coverage.
-
-    def _add_segment(self, a, b, radius):
-        shape = pymunk.Segment(self.space.static_body, a, b, radius)
-        shape.color = pygame.Color("LightGray")  # https://htmlcolorcodes.com/color-names
-        return shape
-
-    def add_circle(self, position, radius):
-        body = pymunk.Body(body_type=pymunk.Body.KINEMATIC)
-        body.position = position
-        body.friction = 1
-        shape = pymunk.Circle(body, radius)
-        shape.color = pygame.Color("RoyalBlue")
-        self.space.add(body, shape)
-        return body
-
-    def add_box(self, position, height, width):
-        mass = 1
-        inertia = pymunk.moment_for_box(mass, (height, width))
-        body = pymunk.Body(mass, inertia)
-        body.position = position
-        shape = pymunk.Poly.create_box(body, (height, width))
-        shape.color = pygame.Color("LightSlateGray")
-        self.space.add(body, shape)
-        return body
-
-    def add_tee(self, position, angle, scale=30, color="LightSlateGray", mask=None):
-        if mask is None:
-            mask = pymunk.ShapeFilter.ALL_MASKS()
-        mass = 1
-        length = 4
-        vertices1 = [
-            (-length * scale / 2, scale),
-            (length * scale / 2, scale),
-            (length * scale / 2, 0),
-            (-length * scale / 2, 0),
-        ]
-        inertia1 = pymunk.moment_for_poly(mass, vertices=vertices1)
-        vertices2 = [
-            (-scale / 2, scale),
-            (-scale / 2, length * scale),
-            (scale / 2, length * scale),
-            (scale / 2, scale),
-        ]
-        inertia2 = pymunk.moment_for_poly(mass, vertices=vertices1)
-        body = pymunk.Body(mass, inertia1 + inertia2)
-        shape1 = pymunk.Poly(body, vertices1)
-        shape2 = pymunk.Poly(body, vertices2)
-        shape1.color = pygame.Color(color)
-        shape2.color = pygame.Color(color)
-        shape1.filter = pymunk.ShapeFilter(mask=mask)
-        shape2.filter = pymunk.ShapeFilter(mask=mask)
-        body.center_of_gravity = (shape1.center_of_gravity + shape2.center_of_gravity) / 2
-        body.position = position
-        body.angle = angle
-        body.friction = 1
-        self.space.add(body, shape1, shape2)
-        return body
--- a/lerobot/common/envs/pusht/pusht_image_env.py
+++ b/lerobot/common/envs/pusht/pusht_image_env.py
@@ -1,55 +0,0 @@
-import cv2
-import numpy as np
-from gym import spaces
-
-from lerobot.common.envs.pusht.pusht_env import PushTEnv
-
-
-class PushTImageEnv(PushTEnv):
-    metadata = {"render.modes": ["rgb_array"], "video.frames_per_second": 10}
-
-    def __init__(self, legacy=False, block_cog=None, damping=None, render_size=96):
-        super().__init__(
-            legacy=legacy, block_cog=block_cog, damping=damping, render_size=render_size, render_action=False
-        )
-        ws = self.window_size
-        self.observation_space = spaces.Dict(
-            {
-                "image": spaces.Box(low=0, high=1, shape=(3, render_size, render_size), dtype=np.float32),
-                "agent_pos": spaces.Box(low=0, high=ws, shape=(2,), dtype=np.float32),
-            }
-        )
-        self.render_cache = None
-
-    def _get_obs(self):
-        img = super()._render_frame(mode="rgb_array")
-
-        agent_pos = np.array(self.agent.position)
-        img_obs = np.moveaxis(img, -1, 0)
-        obs = {"image": img_obs, "agent_pos": agent_pos}
-
-        # draw action
-        if self.latest_action is not None:
-            action = np.array(self.latest_action)
-            coord = (action / 512 * 96).astype(np.int32)
-            marker_size = int(8 / 96 * self.render_size)
-            thickness = int(1 / 96 * self.render_size)
-            cv2.drawMarker(
-                img,
-                coord,
-                color=(255, 0, 0),
-                markerType=cv2.MARKER_CROSS,
-                markerSize=marker_size,
-                thickness=thickness,
-            )
-        self.render_cache = img
-
-        return obs
-
-    def render(self, mode):
-        assert mode == "rgb_array"
-
-        if self.render_cache is None:
-            self._get_obs()
-
-        return self.render_cache
--- a/lerobot/common/envs/pusht/pymunk_override.py
+++ b/lerobot/common/envs/pusht/pymunk_override.py
@@ -1,244 +0,0 @@
-# ----------------------------------------------------------------------------
-# pymunk
-# Copyright (c) 2007-2016 Victor Blomqvist
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ----------------------------------------------------------------------------
-
-"""This submodule contains helper functions to help with quick prototyping
-using pymunk together with pygame.
-
-Intended to help with debugging and prototyping, not for actual production use
-in a full application. The methods contained in this module is opinionated
-about your coordinate system and not in any way optimized.
-"""
-
-__docformat__ = "reStructuredText"
-
-__all__ = [
-    "DrawOptions",
-    "get_mouse_pos",
-    "to_pygame",
-    "from_pygame",
-    # "lighten",
-    "positive_y_is_up",
-]
-
-from typing import Sequence, Tuple
-
-import numpy as np
-import pygame
-import pymunk
-from pymunk.space_debug_draw_options import SpaceDebugColor
-from pymunk.vec2d import Vec2d
-
-positive_y_is_up: bool = False
-"""Make increasing values of y point upwards.
-
-When True::
-
-    y
-    ^
-    |      . (3, 3)
-    |
-    |   . (2, 2)
-    |
-    +------ > x
-
-When False::
-
-    +------ > x
-    |
-    |   . (2, 2)
-    |
-    |      . (3, 3)
-    v
-    y
-
-"""
-
-
-class DrawOptions(pymunk.SpaceDebugDrawOptions):
-    def __init__(self, surface: pygame.Surface) -> None:
-        """Draw a pymunk.Space on a pygame.Surface object.
-
-        Typical usage::
-
-        >>> import pymunk
-        >>> surface = pygame.Surface((10,10))
-        >>> space = pymunk.Space()
-        >>> options = pymunk.pygame_util.DrawOptions(surface)
-        >>> space.debug_draw(options)
-
-        You can control the color of a shape by setting shape.color to the color
-        you want it drawn in::
-
-        >>> c = pymunk.Circle(None, 10)
-        >>> c.color = pygame.Color("pink")
-
-        See pygame_util.demo.py for a full example
-
-        Since pygame uses a coordinate system where y points down (in contrast
-        to many other cases), you either have to make the physics simulation
-        with Pymunk also behave in that way, or flip everything when you draw.
-
-        The easiest is probably to just make the simulation behave the same
-        way as Pygame does. In that way all coordinates used are in the same
-        orientation and easy to reason about::
-
-        >>> space = pymunk.Space()
-        >>> space.gravity = (0, -1000)
-        >>> body = pymunk.Body()
-        >>> body.position = (0, 0) # will be positioned in the top left corner
-        >>> space.debug_draw(options)
-
-        To flip the drawing its possible to set the module property
-        :py:data:`positive_y_is_up` to True. Then the pygame drawing will flip
-        the simulation upside down before drawing::
-
-        >>> positive_y_is_up = True
-        >>> body = pymunk.Body()
-        >>> body.position = (0, 0)
-        >>> # Body will be position in bottom left corner
-
-        :Parameters:
-                surface : pygame.Surface
-                    Surface that the objects will be drawn on
-        """
-        self.surface = surface
-        super().__init__()
-
-    def draw_circle(
-        self,
-        pos: Vec2d,
-        angle: float,
-        radius: float,
-        outline_color: SpaceDebugColor,
-        fill_color: SpaceDebugColor,
-    ) -> None:
-        p = to_pygame(pos, self.surface)
-
-        pygame.draw.circle(self.surface, fill_color.as_int(), p, round(radius), 0)
-        pygame.draw.circle(self.surface, light_color(fill_color).as_int(), p, round(radius - 4), 0)
-
-        # circle_edge = pos + Vec2d(radius, 0).rotated(angle)
-        # p2 = to_pygame(circle_edge, self.surface)
-        # line_r = 2 if radius > 20 else 1
-        # pygame.draw.lines(self.surface, outline_color.as_int(), False, [p, p2], line_r)
-
-    def draw_segment(self, a: Vec2d, b: Vec2d, color: SpaceDebugColor) -> None:
-        p1 = to_pygame(a, self.surface)
-        p2 = to_pygame(b, self.surface)
-
-        pygame.draw.aalines(self.surface, color.as_int(), False, [p1, p2])
-
-    def draw_fat_segment(
-        self,
-        a: Tuple[float, float],
-        b: Tuple[float, float],
-        radius: float,
-        outline_color: SpaceDebugColor,
-        fill_color: SpaceDebugColor,
-    ) -> None:
-        p1 = to_pygame(a, self.surface)
-        p2 = to_pygame(b, self.surface)
-
-        r = round(max(1, radius * 2))
-        pygame.draw.lines(self.surface, fill_color.as_int(), False, [p1, p2], r)
-        if r > 2:
-            orthog = [abs(p2[1] - p1[1]), abs(p2[0] - p1[0])]
-            if orthog[0] == 0 and orthog[1] == 0:
-                return
-            scale = radius / (orthog[0] * orthog[0] + orthog[1] * orthog[1]) ** 0.5
-            orthog[0] = round(orthog[0] * scale)
-            orthog[1] = round(orthog[1] * scale)
-            points = [
-                (p1[0] - orthog[0], p1[1] - orthog[1]),
-                (p1[0] + orthog[0], p1[1] + orthog[1]),
-                (p2[0] + orthog[0], p2[1] + orthog[1]),
-                (p2[0] - orthog[0], p2[1] - orthog[1]),
-            ]
-            pygame.draw.polygon(self.surface, fill_color.as_int(), points)
-            pygame.draw.circle(
-                self.surface,
-                fill_color.as_int(),
-                (round(p1[0]), round(p1[1])),
-                round(radius),
-            )
-            pygame.draw.circle(
-                self.surface,
-                fill_color.as_int(),
-                (round(p2[0]), round(p2[1])),
-                round(radius),
-            )
-
-    def draw_polygon(
-        self,
-        verts: Sequence[Tuple[float, float]],
-        radius: float,
-        outline_color: SpaceDebugColor,
-        fill_color: SpaceDebugColor,
-    ) -> None:
-        ps = [to_pygame(v, self.surface) for v in verts]
-        ps += [ps[0]]
-
-        radius = 2
-        pygame.draw.polygon(self.surface, light_color(fill_color).as_int(), ps)
-
-        if radius > 0:
-            for i in range(len(verts)):
-                a = verts[i]
-                b = verts[(i + 1) % len(verts)]
-                self.draw_fat_segment(a, b, radius, fill_color, fill_color)
-
-    def draw_dot(self, size: float, pos: Tuple[float, float], color: SpaceDebugColor) -> None:
-        p = to_pygame(pos, self.surface)
-        pygame.draw.circle(self.surface, color.as_int(), p, round(size), 0)
-
-
-def get_mouse_pos(surface: pygame.Surface) -> Tuple[int, int]:
-    """Get position of the mouse pointer in pymunk coordinates."""
-    p = pygame.mouse.get_pos()
-    return from_pygame(p, surface)
-
-
-def to_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
-    """Convenience method to convert pymunk coordinates to pygame surface
-    local coordinates.
-
-    Note that in case positive_y_is_up is False, this function won't actually do
-    anything except converting the point to integers.
-    """
-    if positive_y_is_up:
-        return round(p[0]), surface.get_height() - round(p[1])
-    else:
-        return round(p[0]), round(p[1])
-
-
-def from_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
-    """Convenience method to convert pygame surface local coordinates to
-    pymunk coordinates
-    """
-    return to_pygame(p, surface)
-
-
-def light_color(color: SpaceDebugColor):
-    color = np.minimum(1.2 * np.float32([color.r, color.g, color.b, color.a]), np.float32([255]))
-    color = SpaceDebugColor(r=color[0], g=color[1], b=color[2], a=color[3])
-    return color
--- a/lerobot/common/envs/simxarm.py
+++ b/lerobot/common/envs/simxarm.py
@@ -1,8 +1,6 @@
 import importlib
-from collections import deque
 from typing import Optional

-import einops
 import numpy as np
 import torch
 from tensordict import TensorDict
@@ -12,9 +10,9 @@ from torchrl.data.tensor_specs import (
    DiscreteTensorSpec,
    UnboundedContinuousTensorSpec,
 )
+from torchrl.envs import EnvBase
 from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform

-from lerobot.common.envs.abstract import AbstractEnv
 from lerobot.common.utils import set_seed

 MAX_NUM_ACTIONS = 4
@@ -23,7 +21,7 @@ _has_gym = importlib.util.find_spec("gym") is not None
 _has_simxarm = importlib.util.find_spec("simxarm") is not None and _has_gym


-class SimxarmEnv(AbstractEnv):
+class SimxarmEnv(EnvBase):
    def __init__(
        self,
        task,
@@ -33,22 +31,19 @@ class SimxarmEnv(AbstractEnv):
        image_size=None,
        seed=1337,
        device="cpu",
-        num_prev_obs=0,
-        num_prev_action=0,
    ):
-        super().__init__(
-            task=task,
-            frame_skip=frame_skip,
-            from_pixels=from_pixels,
-            pixels_only=pixels_only,
-            image_size=image_size,
-            seed=seed,
-            device=device,
-            num_prev_obs=num_prev_obs,
-            num_prev_action=num_prev_action,
-        )
+        super().__init__(device=device, batch_size=[])
+        self.task = task
+        self.frame_skip = frame_skip
+        self.from_pixels = from_pixels
+        self.pixels_only = pixels_only
+        self.image_size = image_size
+
+        if pixels_only:
+            assert from_pixels
+        if from_pixels:
+            assert image_size

-    def _make_env(self):
        if not _has_simxarm:
            raise ImportError("Cannot import simxarm.")
        if not _has_gym:
@@ -68,6 +63,9 @@ class SimxarmEnv(AbstractEnv):
        if "w" not in TASKS[self.task]["action_space"]:
            self._action_padding[-1] = 1.0

+        self._make_spec()
+        self.set_seed(seed)
+
    def render(self, mode="rgb_array", width=384, height=384):
        return self._env.render(mode, width=width, height=height)

@@ -92,33 +90,15 @@ class SimxarmEnv(AbstractEnv):
        if td is None or td.is_empty():
            raw_obs = self._env.reset()

-            obs = self._format_raw_obs(raw_obs)
-
-            if self.num_prev_obs > 0:
-                stacked_obs = {}
-                if "image" in obs:
-                    self._prev_obs_image_queue = deque(
-                        [obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
-                    )
-                    stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
-                if "state" in obs:
-                    self._prev_obs_state_queue = deque(
-                        [obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
-                    )
-                    stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
-                obs = stacked_obs
-
            td = TensorDict(
                {
-                    "observation": TensorDict(obs, batch_size=[]),
+                    "observation": self._format_raw_obs(raw_obs),
                    "done": torch.tensor([False], dtype=torch.bool),
                },
                batch_size=[],
            )
        else:
            raise NotImplementedError()
-
-        self.call_rendering_hooks()
        return td

    def _step(self, tensordict: TensorDict):
@@ -128,32 +108,10 @@ class SimxarmEnv(AbstractEnv):
        action = np.concatenate([action, self._action_padding])
        # TODO(rcadene): add info["is_success"] and info["success"] ?
        sum_reward = 0
-
-        if action.ndim == 1:
-            action = einops.repeat(action, "c -> t c", t=self.frame_skip)
-        else:
-            if self.frame_skip > 1:
-                raise NotImplementedError()
-
-        num_action_steps = action.shape[0]
-        for i in range(num_action_steps):
-            raw_obs, reward, done, info = self._env.step(action[i])
+        for _ in range(self.frame_skip):
+            raw_obs, reward, done, info = self._env.step(action)
            sum_reward += reward

-            obs = self._format_raw_obs(raw_obs)
-
-            if self.num_prev_obs > 0:
-                stacked_obs = {}
-                if "image" in obs:
-                    self._prev_obs_image_queue.append(obs["image"])
-                    stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
-                if "state" in obs:
-                    self._prev_obs_state_queue.append(obs["state"])
-                    stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
-                obs = stacked_obs
-
-            self.call_rendering_hooks()
-
        td = TensorDict(
            {
                "observation": self._format_raw_obs(raw_obs),
@@ -168,36 +126,23 @@ class SimxarmEnv(AbstractEnv):
    def _make_spec(self):
        obs = {}
        if self.from_pixels:
-            image_shape = (3, self.image_size, self.image_size)
-            if self.num_prev_obs > 0:
-                image_shape = (self.num_prev_obs + 1, *image_shape)
-
            obs["image"] = BoundedTensorSpec(
                low=0,
                high=255,
-                shape=image_shape,
+                shape=(3, self.image_size, self.image_size),
                dtype=torch.uint8,
                device=self.device,
            )
            if not self.pixels_only:
-                state_shape = (len(self._env.robot_state),)
-                if self.num_prev_obs > 0:
-                    state_shape = (self.num_prev_obs + 1, *state_shape)
-
                obs["state"] = UnboundedContinuousTensorSpec(
-                    shape=state_shape,
+                    shape=(len(self._env.robot_state),),
                    dtype=torch.float32,
                    device=self.device,
                )
        else:
            # TODO(rcadene): add observation_space achieved_goal and desired_goal?
-            state_shape = self._env.observation_space["observation"].shape
-            if self.num_prev_obs > 0:
-                state_shape = (self.num_prev_obs + 1, *state_shape)
-
            obs["state"] = UnboundedContinuousTensorSpec(
-                # TODO:
-                shape=state_shape,
+                shape=self._env.observation_space["observation"].shape,
                dtype=torch.float32,
                device=self.device,
            )
--- a/lerobot/common/envs/transforms.py
+++ b/lerobot/common/envs/transforms.py
@@ -1,6 +1,5 @@
 from typing import Sequence

-import torch
 from tensordict import TensorDictBase
 from tensordict.nn import dispatch
 from tensordict.utils import NestedKey
@@ -8,45 +7,19 @@ from torchrl.envs.transforms import ObservationTransform, Transform


 class Prod(ObservationTransform):
-    invertible = True
-
    def __init__(self, in_keys: Sequence[NestedKey], prod: float):
        super().__init__()
        self.in_keys = in_keys
        self.prod = prod
-        self.original_dtypes = {}
-
-    def _reset(self, tensordict: TensorDictBase, tensordict_reset: TensorDictBase) -> TensorDictBase:
-        # _reset is called once when the environment reset to normalize the first observation
-        tensordict_reset = self._call(tensordict_reset)
-        return tensordict_reset
-
-    @dispatch(source="in_keys", dest="out_keys")
-    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        return self._call(tensordict)

    def _call(self, td):
        for key in self.in_keys:
-            if td.get(key, None) is None:
-                continue
-            self.original_dtypes[key] = td[key].dtype
-            td[key] = td[key].type(torch.float32) * self.prod
-        return td
-
-    def _inv_call(self, td: TensorDictBase) -> TensorDictBase:
-        for key in self.in_keys:
-            if td.get(key, None) is None:
-                continue
-            td[key] = (td[key] / self.prod).type(self.original_dtypes[key])
+            td[key] *= self.prod
        return td

    def transform_observation_spec(self, obs_spec):
        for key in self.in_keys:
-            if obs_spec.get(key, None) is None:
-                continue
-            obs_spec[key].space.high = obs_spec[key].space.high.type(torch.float32) * self.prod
-            obs_spec[key].space.low = obs_spec[key].space.low.type(torch.float32) * self.prod
-            obs_spec[key].dtype = torch.float32
+            obs_spec[key].space.high *= self.prod
        return obs_spec


--- a/lerobot/common/logger.py
+++ b/lerobot/common/logger.py
@@ -38,7 +38,7 @@ class Logger:
        project = cfg.get("wandb", {}).get("project")
        entity = cfg.get("wandb", {}).get("entity")
        enable_wandb = cfg.get("wandb", {}).get("enable", False)
-        run_offline = not enable_wandb or not project
+        run_offline = not enable_wandb or not project or not entity
        if run_offline:
            logging.info(colored("Logs will be saved locally.", "yellow", attrs=["bold"]))
            self._wandb = None
@@ -63,7 +63,6 @@ class Logger:
                resume=None,
            )
            print(colored("Logs will be synced with wandb.", "blue", attrs=["bold"]))
-            logging.info(f"Track this run --> {colored(wandb.run.get_url(), 'yellow', attrs=['bold'])}")
            self._wandb = wandb

    def save_model(self, policy, identifier):
--- a/lerobot/common/policies/init.py
+++ b/lerobot/common/policies/init.py
--- a/lerobot/common/policies/act/policy.py
+++ b/lerobot/common/policies/act/policy.py
@@ -49,6 +49,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
        self.model, self.optimizer = build_act_model_and_optimizer(cfg)
        self.kl_weight = self.cfg.kl_weight
        logging.info(f"KL Weight {self.kl_weight}")
+
        self.to(self.device)

    def update(self, replay_buffer, step):
@@ -154,15 +155,15 @@ class ActionChunkingTransformerPolicy(nn.Module):
        self.eval()

        # TODO(rcadene): remove unsqueeze hack to add bsize=1
-        observation["image", "top"] = observation["image", "top"].unsqueeze(0)
-        # observation["state"] = observation["state"].unsqueeze(0)
+        observation["image"] = observation["image"].unsqueeze(0)
+        observation["state"] = observation["state"].unsqueeze(0)

        # TODO(rcadene): remove hack
        # add 1 camera dimension
-        observation["image", "top"] = observation["image", "top"].unsqueeze(1)
+        observation["image"] = observation["image"].unsqueeze(1)

        obs_dict = {
-            "image": observation["image", "top"],
+            "image": observation["image"],
            "agent_pos": observation["state"],
        }
        action = self._forward(qpos=obs_dict["agent_pos"], image=obs_dict["image"])
--- a/lerobot/common/policies/diffusion/diffusion_unet_image_policy.py
+++ b/lerobot/common/policies/diffusion/diffusion_unet_image_policy.py
@@ -5,33 +5,11 @@ import torch.nn.functional as F  # noqa: N812
 from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
 from einops import reduce

-from lerobot.common.policies.diffusion.model.conditional_unet1d import ConditionalUnet1D
-from lerobot.common.policies.diffusion.model.mask_generator import LowdimMaskGenerator
-from lerobot.common.policies.diffusion.model.module_attr_mixin import ModuleAttrMixin
-from lerobot.common.policies.diffusion.model.multi_image_obs_encoder import MultiImageObsEncoder
-from lerobot.common.policies.diffusion.model.normalizer import LinearNormalizer
-from lerobot.common.policies.diffusion.pytorch_utils import dict_apply
-
-
-class BaseImagePolicy(ModuleAttrMixin):
-    # init accepts keyword argument shape_meta, see config/task/*_image.yaml
-
-    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        """
-        obs_dict:
-            str: B,To,*
-        return: B,Ta,Da
-        """
-        raise NotImplementedError()
-
-    # reset state for stateful policies
-    def reset(self):
-        pass
-
-    # ========== training ===========
-    # no standard training interface except setting normalizer
-    def set_normalizer(self, normalizer: LinearNormalizer):
-        raise NotImplementedError()
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.model.diffusion.conditional_unet1d import ConditionalUnet1D
+from diffusion_policy.model.diffusion.mask_generator import LowdimMaskGenerator
+from diffusion_policy.model.vision.multi_image_obs_encoder import MultiImageObsEncoder
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy


 class DiffusionUnetImagePolicy(BaseImagePolicy):
--- a/lerobot/common/policies/diffusion/model/conditional_unet1d.py
+++ b/lerobot/common/policies/diffusion/model/conditional_unet1d.py
@@ -1,286 +0,0 @@
-import logging
-from typing import Union
-
-import einops
-import torch
-import torch.nn as nn
-from einops.layers.torch import Rearrange
-
-from lerobot.common.policies.diffusion.model.conv1d_components import Conv1dBlock, Downsample1d, Upsample1d
-from lerobot.common.policies.diffusion.model.positional_embedding import SinusoidalPosEmb
-
-logger = logging.getLogger(__name__)
-
-
-class ConditionalResidualBlock1D(nn.Module):
-    def __init__(
-        self, in_channels, out_channels, cond_dim, kernel_size=3, n_groups=8, cond_predict_scale=False
-    ):
-        super().__init__()
-
-        self.blocks = nn.ModuleList(
-            [
-                Conv1dBlock(in_channels, out_channels, kernel_size, n_groups=n_groups),
-                Conv1dBlock(out_channels, out_channels, kernel_size, n_groups=n_groups),
-            ]
-        )
-
-        # FiLM modulation https://arxiv.org/abs/1709.07871
-        # predicts per-channel scale and bias
-        cond_channels = out_channels
-        if cond_predict_scale:
-            cond_channels = out_channels * 2
-        self.cond_predict_scale = cond_predict_scale
-        self.out_channels = out_channels
-        self.cond_encoder = nn.Sequential(
-            nn.Mish(),
-            nn.Linear(cond_dim, cond_channels),
-            Rearrange("batch t -> batch t 1"),
-        )
-
-        # make sure dimensions compatible
-        self.residual_conv = (
-            nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else nn.Identity()
-        )
-
-    def forward(self, x, cond):
-        """
-        x : [ batch_size x in_channels x horizon ]
-        cond : [ batch_size x cond_dim]
-
-        returns:
-        out : [ batch_size x out_channels x horizon ]
-        """
-        out = self.blocks[0](x)
-        embed = self.cond_encoder(cond)
-        if self.cond_predict_scale:
-            embed = embed.reshape(embed.shape[0], 2, self.out_channels, 1)
-            scale = embed[:, 0, ...]
-            bias = embed[:, 1, ...]
-            out = scale * out + bias
-        else:
-            out = out + embed
-        out = self.blocks[1](out)
-        out = out + self.residual_conv(x)
-        return out
-
-
-class ConditionalUnet1D(nn.Module):
-    def __init__(
-        self,
-        input_dim,
-        local_cond_dim=None,
-        global_cond_dim=None,
-        diffusion_step_embed_dim=256,
-        down_dims=None,
-        kernel_size=3,
-        n_groups=8,
-        cond_predict_scale=False,
-    ):
-        super().__init__()
-        if down_dims is None:
-            down_dims = [256, 512, 1024]
-
-        all_dims = [input_dim] + list(down_dims)
-        start_dim = down_dims[0]
-
-        dsed = diffusion_step_embed_dim
-        diffusion_step_encoder = nn.Sequential(
-            SinusoidalPosEmb(dsed),
-            nn.Linear(dsed, dsed * 4),
-            nn.Mish(),
-            nn.Linear(dsed * 4, dsed),
-        )
-        cond_dim = dsed
-        if global_cond_dim is not None:
-            cond_dim += global_cond_dim
-
-        in_out = list(zip(all_dims[:-1], all_dims[1:], strict=False))
-
-        local_cond_encoder = None
-        if local_cond_dim is not None:
-            _, dim_out = in_out[0]
-            dim_in = local_cond_dim
-            local_cond_encoder = nn.ModuleList(
-                [
-                    # down encoder
-                    ConditionalResidualBlock1D(
-                        dim_in,
-                        dim_out,
-                        cond_dim=cond_dim,
-                        kernel_size=kernel_size,
-                        n_groups=n_groups,
-                        cond_predict_scale=cond_predict_scale,
-                    ),
-                    # up encoder
-                    ConditionalResidualBlock1D(
-                        dim_in,
-                        dim_out,
-                        cond_dim=cond_dim,
-                        kernel_size=kernel_size,
-                        n_groups=n_groups,
-                        cond_predict_scale=cond_predict_scale,
-                    ),
-                ]
-            )
-
-        mid_dim = all_dims[-1]
-        self.mid_modules = nn.ModuleList(
-            [
-                ConditionalResidualBlock1D(
-                    mid_dim,
-                    mid_dim,
-                    cond_dim=cond_dim,
-                    kernel_size=kernel_size,
-                    n_groups=n_groups,
-                    cond_predict_scale=cond_predict_scale,
-                ),
-                ConditionalResidualBlock1D(
-                    mid_dim,
-                    mid_dim,
-                    cond_dim=cond_dim,
-                    kernel_size=kernel_size,
-                    n_groups=n_groups,
-                    cond_predict_scale=cond_predict_scale,
-                ),
-            ]
-        )
-
-        down_modules = nn.ModuleList([])
-        for ind, (dim_in, dim_out) in enumerate(in_out):
-            is_last = ind >= (len(in_out) - 1)
-            down_modules.append(
-                nn.ModuleList(
-                    [
-                        ConditionalResidualBlock1D(
-                            dim_in,
-                            dim_out,
-                            cond_dim=cond_dim,
-                            kernel_size=kernel_size,
-                            n_groups=n_groups,
-                            cond_predict_scale=cond_predict_scale,
-                        ),
-                        ConditionalResidualBlock1D(
-                            dim_out,
-                            dim_out,
-                            cond_dim=cond_dim,
-                            kernel_size=kernel_size,
-                            n_groups=n_groups,
-                            cond_predict_scale=cond_predict_scale,
-                        ),
-                        Downsample1d(dim_out) if not is_last else nn.Identity(),
-                    ]
-                )
-            )
-
-        up_modules = nn.ModuleList([])
-        for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])):
-            is_last = ind >= (len(in_out) - 1)
-            up_modules.append(
-                nn.ModuleList(
-                    [
-                        ConditionalResidualBlock1D(
-                            dim_out * 2,
-                            dim_in,
-                            cond_dim=cond_dim,
-                            kernel_size=kernel_size,
-                            n_groups=n_groups,
-                            cond_predict_scale=cond_predict_scale,
-                        ),
-                        ConditionalResidualBlock1D(
-                            dim_in,
-                            dim_in,
-                            cond_dim=cond_dim,
-                            kernel_size=kernel_size,
-                            n_groups=n_groups,
-                            cond_predict_scale=cond_predict_scale,
-                        ),
-                        Upsample1d(dim_in) if not is_last else nn.Identity(),
-                    ]
-                )
-            )
-
-        final_conv = nn.Sequential(
-            Conv1dBlock(start_dim, start_dim, kernel_size=kernel_size),
-            nn.Conv1d(start_dim, input_dim, 1),
-        )
-
-        self.diffusion_step_encoder = diffusion_step_encoder
-        self.local_cond_encoder = local_cond_encoder
-        self.up_modules = up_modules
-        self.down_modules = down_modules
-        self.final_conv = final_conv
-
-        logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
-
-    def forward(
-        self,
-        sample: torch.Tensor,
-        timestep: Union[torch.Tensor, float, int],
-        local_cond=None,
-        global_cond=None,
-        **kwargs,
-    ):
-        """
-        x: (B,T,input_dim)
-        timestep: (B,) or int, diffusion step
-        local_cond: (B,T,local_cond_dim)
-        global_cond: (B,global_cond_dim)
-        output: (B,T,input_dim)
-        """
-        sample = einops.rearrange(sample, "b h t -> b t h")
-
-        # 1. time
-        timesteps = timestep
-        if not torch.is_tensor(timesteps):
-            # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
-            timesteps = torch.tensor([timesteps], dtype=torch.long, device=sample.device)
-        elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0:
-            timesteps = timesteps[None].to(sample.device)
-        # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
-        timesteps = timesteps.expand(sample.shape[0])
-
-        global_feature = self.diffusion_step_encoder(timesteps)
-
-        if global_cond is not None:
-            global_feature = torch.cat([global_feature, global_cond], axis=-1)
-
-        # encode local features
-        h_local = []
-        if local_cond is not None:
-            local_cond = einops.rearrange(local_cond, "b h t -> b t h")
-            resnet, resnet2 = self.local_cond_encoder
-            x = resnet(local_cond, global_feature)
-            h_local.append(x)
-            x = resnet2(local_cond, global_feature)
-            h_local.append(x)
-
-        x = sample
-        h = []
-        for idx, (resnet, resnet2, downsample) in enumerate(self.down_modules):
-            x = resnet(x, global_feature)
-            if idx == 0 and len(h_local) > 0:
-                x = x + h_local[0]
-            x = resnet2(x, global_feature)
-            h.append(x)
-            x = downsample(x)
-
-        for mid_module in self.mid_modules:
-            x = mid_module(x, global_feature)
-
-        for idx, (resnet, resnet2, upsample) in enumerate(self.up_modules):
-            x = torch.cat((x, h.pop()), dim=1)
-            x = resnet(x, global_feature)
-            # The correct condition should be:
-            # if idx == (len(self.up_modules)-1) and len(h_local) > 0:
-            # However this change will break compatibility with published checkpoints.
-            # Therefore it is left as a comment.
-            if idx == len(self.up_modules) and len(h_local) > 0:
-                x = x + h_local[1]
-            x = resnet2(x, global_feature)
-            x = upsample(x)
-
-        x = self.final_conv(x)
-
-        x = einops.rearrange(x, "b t h -> b h t")
-        return x
--- a/lerobot/common/policies/diffusion/model/conv1d_components.py
+++ b/lerobot/common/policies/diffusion/model/conv1d_components.py
@@ -1,47 +0,0 @@
-import torch.nn as nn
-
-# from einops.layers.torch import Rearrange
-
-
-class Downsample1d(nn.Module):
-    def __init__(self, dim):
-        super().__init__()
-        self.conv = nn.Conv1d(dim, dim, 3, 2, 1)
-
-    def forward(self, x):
-        return self.conv(x)
-
-
-class Upsample1d(nn.Module):
-    def __init__(self, dim):
-        super().__init__()
-        self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1)
-
-    def forward(self, x):
-        return self.conv(x)
-
-
-class Conv1dBlock(nn.Module):
-    """
-    Conv1d --> GroupNorm --> Mish
-    """
-
-    def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8):
-        super().__init__()
-
-        self.block = nn.Sequential(
-            nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2),
-            # Rearrange('batch channels horizon -> batch channels 1 horizon'),
-            nn.GroupNorm(n_groups, out_channels),
-            # Rearrange('batch channels 1 horizon -> batch channels horizon'),
-            nn.Mish(),
-        )
-
-    def forward(self, x):
-        return self.block(x)
-
-
-# def test():
-#     cb = Conv1dBlock(256, 128, kernel_size=3)
-#     x = torch.zeros((1,256,16))
-#     o = cb(x)
--- a/lerobot/common/policies/diffusion/model/crop_randomizer.py
+++ b/lerobot/common/policies/diffusion/model/crop_randomizer.py
@@ -1,294 +0,0 @@
-import torch
-import torch.nn as nn
-import torchvision.transforms.functional as ttf
-
-import lerobot.common.policies.diffusion.model.tensor_utils as tu
-
-
-class CropRandomizer(nn.Module):
-    """
-    Randomly sample crops at input, and then average across crop features at output.
-    """
-
-    def __init__(
-        self,
-        input_shape,
-        crop_height,
-        crop_width,
-        num_crops=1,
-        pos_enc=False,
-    ):
-        """
-        Args:
-            input_shape (tuple, list): shape of input (not including batch dimension)
-            crop_height (int): crop height
-            crop_width (int): crop width
-            num_crops (int): number of random crops to take
-            pos_enc (bool): if True, add 2 channels to the output to encode the spatial
-                location of the cropped pixels in the source image
-        """
-        super().__init__()
-
-        assert len(input_shape) == 3  # (C, H, W)
-        assert crop_height < input_shape[1]
-        assert crop_width < input_shape[2]
-
-        self.input_shape = input_shape
-        self.crop_height = crop_height
-        self.crop_width = crop_width
-        self.num_crops = num_crops
-        self.pos_enc = pos_enc
-
-    def output_shape_in(self, input_shape=None):
-        """
-        Function to compute output shape from inputs to this module. Corresponds to
-        the @forward_in operation, where raw inputs (usually observation modalities)
-        are passed in.
-
-        Args:
-            input_shape (iterable of int): shape of input. Does not include batch dimension.
-                Some modules may not need this argument, if their output does not depend
-                on the size of the input, or if they assume fixed size input.
-
-        Returns:
-            out_shape ([int]): list of integers corresponding to output shape
-        """
-
-        # outputs are shape (C, CH, CW), or maybe C + 2 if using position encoding, because
-        # the number of crops are reshaped into the batch dimension, increasing the batch
-        # size from B to B * N
-        out_c = self.input_shape[0] + 2 if self.pos_enc else self.input_shape[0]
-        return [out_c, self.crop_height, self.crop_width]
-
-    def output_shape_out(self, input_shape=None):
-        """
-        Function to compute output shape from inputs to this module. Corresponds to
-        the @forward_out operation, where processed inputs (usually encoded observation
-        modalities) are passed in.
-
-        Args:
-            input_shape (iterable of int): shape of input. Does not include batch dimension.
-                Some modules may not need this argument, if their output does not depend
-                on the size of the input, or if they assume fixed size input.
-
-        Returns:
-            out_shape ([int]): list of integers corresponding to output shape
-        """
-
-        # since the forward_out operation splits [B * N, ...] -> [B, N, ...]
-        # and then pools to result in [B, ...], only the batch dimension changes,
-        # and so the other dimensions retain their shape.
-        return list(input_shape)
-
-    def forward_in(self, inputs):
-        """
-        Samples N random crops for each input in the batch, and then reshapes
-        inputs to [B * N, ...].
-        """
-        assert len(inputs.shape) >= 3  # must have at least (C, H, W) dimensions
-        if self.training:
-            # generate random crops
-            out, _ = sample_random_image_crops(
-                images=inputs,
-                crop_height=self.crop_height,
-                crop_width=self.crop_width,
-                num_crops=self.num_crops,
-                pos_enc=self.pos_enc,
-            )
-            # [B, N, ...] -> [B * N, ...]
-            return tu.join_dimensions(out, 0, 1)
-        else:
-            # take center crop during eval
-            out = ttf.center_crop(img=inputs, output_size=(self.crop_height, self.crop_width))
-            if self.num_crops > 1:
-                B, C, H, W = out.shape  # noqa: N806
-                out = out.unsqueeze(1).expand(B, self.num_crops, C, H, W).reshape(-1, C, H, W)
-                # [B * N, ...]
-            return out
-
-    def forward_out(self, inputs):
-        """
-        Splits the outputs from shape [B * N, ...] -> [B, N, ...] and then average across N
-        to result in shape [B, ...] to make sure the network output is consistent with
-        what would have happened if there were no randomization.
-        """
-        if self.num_crops <= 1:
-            return inputs
-        else:
-            batch_size = inputs.shape[0] // self.num_crops
-            out = tu.reshape_dimensions(
-                inputs, begin_axis=0, end_axis=0, target_dims=(batch_size, self.num_crops)
-            )
-            return out.mean(dim=1)
-
-    def forward(self, inputs):
-        return self.forward_in(inputs)
-
-    def __repr__(self):
-        """Pretty print network."""
-        header = "{}".format(str(self.__class__.__name__))
-        msg = header + "(input_shape={}, crop_size=[{}, {}], num_crops={})".format(
-            self.input_shape, self.crop_height, self.crop_width, self.num_crops
-        )
-        return msg
-
-
-def crop_image_from_indices(images, crop_indices, crop_height, crop_width):
-    """
-    Crops images at the locations specified by @crop_indices. Crops will be
-    taken across all channels.
-
-    Args:
-        images (torch.Tensor): batch of images of shape [..., C, H, W]
-
-        crop_indices (torch.Tensor): batch of indices of shape [..., N, 2] where
-            N is the number of crops to take per image and each entry corresponds
-            to the pixel height and width of where to take the crop. Note that
-            the indices can also be of shape [..., 2] if only 1 crop should
-            be taken per image. Leading dimensions must be consistent with
-            @images argument. Each index specifies the top left of the crop.
-            Values must be in range [0, H - CH - 1] x [0, W - CW - 1] where
-            H and W are the height and width of @images and CH and CW are
-            @crop_height and @crop_width.
-
-        crop_height (int): height of crop to take
-
-        crop_width (int): width of crop to take
-
-    Returns:
-        crops (torch.Tesnor): cropped images of shape [..., C, @crop_height, @crop_width]
-    """
-
-    # make sure length of input shapes is consistent
-    assert crop_indices.shape[-1] == 2
-    ndim_im_shape = len(images.shape)
-    ndim_indices_shape = len(crop_indices.shape)
-    assert (ndim_im_shape == ndim_indices_shape + 1) or (ndim_im_shape == ndim_indices_shape + 2)
-
-    # maybe pad so that @crop_indices is shape [..., N, 2]
-    is_padded = False
-    if ndim_im_shape == ndim_indices_shape + 2:
-        crop_indices = crop_indices.unsqueeze(-2)
-        is_padded = True
-
-    # make sure leading dimensions between images and indices are consistent
-    assert images.shape[:-3] == crop_indices.shape[:-2]
-
-    device = images.device
-    image_c, image_h, image_w = images.shape[-3:]
-    num_crops = crop_indices.shape[-2]
-
-    # make sure @crop_indices are in valid range
-    assert (crop_indices[..., 0] >= 0).all().item()
-    assert (crop_indices[..., 0] < (image_h - crop_height)).all().item()
-    assert (crop_indices[..., 1] >= 0).all().item()
-    assert (crop_indices[..., 1] < (image_w - crop_width)).all().item()
-
-    # convert each crop index (ch, cw) into a list of pixel indices that correspond to the entire window.
-
-    # 2D index array with columns [0, 1, ..., CH - 1] and shape [CH, CW]
-    crop_ind_grid_h = torch.arange(crop_height).to(device)
-    crop_ind_grid_h = tu.unsqueeze_expand_at(crop_ind_grid_h, size=crop_width, dim=-1)
-    # 2D index array with rows [0, 1, ..., CW - 1] and shape [CH, CW]
-    crop_ind_grid_w = torch.arange(crop_width).to(device)
-    crop_ind_grid_w = tu.unsqueeze_expand_at(crop_ind_grid_w, size=crop_height, dim=0)
-    # combine into shape [CH, CW, 2]
-    crop_in_grid = torch.cat((crop_ind_grid_h.unsqueeze(-1), crop_ind_grid_w.unsqueeze(-1)), dim=-1)
-
-    # Add above grid with the offset index of each sampled crop to get 2d indices for each crop.
-    # After broadcasting, this will be shape [..., N, CH, CW, 2] and each crop has a [CH, CW, 2]
-    # shape array that tells us which pixels from the corresponding source image to grab.
-    grid_reshape = [1] * len(crop_indices.shape[:-1]) + [crop_height, crop_width, 2]
-    all_crop_inds = crop_indices.unsqueeze(-2).unsqueeze(-2) + crop_in_grid.reshape(grid_reshape)
-
-    # For using @torch.gather, convert to flat indices from 2D indices, and also
-    # repeat across the channel dimension. To get flat index of each pixel to grab for
-    # each sampled crop, we just use the mapping: ind = h_ind * @image_w + w_ind
-    all_crop_inds = all_crop_inds[..., 0] * image_w + all_crop_inds[..., 1]  # shape [..., N, CH, CW]
-    all_crop_inds = tu.unsqueeze_expand_at(all_crop_inds, size=image_c, dim=-3)  # shape [..., N, C, CH, CW]
-    all_crop_inds = tu.flatten(all_crop_inds, begin_axis=-2)  # shape [..., N, C, CH * CW]
-
-    # Repeat and flatten the source images -> [..., N, C, H * W] and then use gather to index with crop pixel inds
-    images_to_crop = tu.unsqueeze_expand_at(images, size=num_crops, dim=-4)
-    images_to_crop = tu.flatten(images_to_crop, begin_axis=-2)
-    crops = torch.gather(images_to_crop, dim=-1, index=all_crop_inds)
-    # [..., N, C, CH * CW] -> [..., N, C, CH, CW]
-    reshape_axis = len(crops.shape) - 1
-    crops = tu.reshape_dimensions(
-        crops, begin_axis=reshape_axis, end_axis=reshape_axis, target_dims=(crop_height, crop_width)
-    )
-
-    if is_padded:
-        # undo padding -> [..., C, CH, CW]
-        crops = crops.squeeze(-4)
-    return crops
-
-
-def sample_random_image_crops(images, crop_height, crop_width, num_crops, pos_enc=False):
-    """
-    For each image, randomly sample @num_crops crops of size (@crop_height, @crop_width), from
-    @images.
-
-    Args:
-        images (torch.Tensor): batch of images of shape [..., C, H, W]
-
-        crop_height (int): height of crop to take
-
-        crop_width (int): width of crop to take
-
-        num_crops (n): number of crops to sample
-
-        pos_enc (bool): if True, also add 2 channels to the outputs that gives a spatial
-            encoding of the original source pixel locations. This means that the
-            output crops will contain information about where in the source image
-            it was sampled from.
-
-    Returns:
-        crops (torch.Tensor): crops of shape (..., @num_crops, C, @crop_height, @crop_width)
-            if @pos_enc is False, otherwise (..., @num_crops, C + 2, @crop_height, @crop_width)
-
-        crop_inds (torch.Tensor): sampled crop indices of shape (..., N, 2)
-    """
-    device = images.device
-
-    # maybe add 2 channels of spatial encoding to the source image
-    source_im = images
-    if pos_enc:
-        # spatial encoding [y, x] in [0, 1]
-        h, w = source_im.shape[-2:]
-        pos_y, pos_x = torch.meshgrid(torch.arange(h), torch.arange(w))
-        pos_y = pos_y.float().to(device) / float(h)
-        pos_x = pos_x.float().to(device) / float(w)
-        position_enc = torch.stack((pos_y, pos_x))  # shape [C, H, W]
-
-        # unsqueeze and expand to match leading dimensions -> shape [..., C, H, W]
-        leading_shape = source_im.shape[:-3]
-        position_enc = position_enc[(None,) * len(leading_shape)]
-        position_enc = position_enc.expand(*leading_shape, -1, -1, -1)
-
-        # concat across channel dimension with input
-        source_im = torch.cat((source_im, position_enc), dim=-3)
-
-    # make sure sample boundaries ensure crops are fully within the images
-    image_c, image_h, image_w = source_im.shape[-3:]
-    max_sample_h = image_h - crop_height
-    max_sample_w = image_w - crop_width
-
-    # Sample crop locations for all tensor dimensions up to the last 3, which are [C, H, W].
-    # Each gets @num_crops samples - typically this will just be the batch dimension (B), so
-    # we will sample [B, N] indices, but this supports having more than one leading dimension,
-    # or possibly no leading dimension.
-    #
-    # Trick: sample in [0, 1) with rand, then re-scale to [0, M) and convert to long to get sampled ints
-    crop_inds_h = (max_sample_h * torch.rand(*source_im.shape[:-3], num_crops).to(device)).long()
-    crop_inds_w = (max_sample_w * torch.rand(*source_im.shape[:-3], num_crops).to(device)).long()
-    crop_inds = torch.cat((crop_inds_h.unsqueeze(-1), crop_inds_w.unsqueeze(-1)), dim=-1)  # shape [..., N, 2]
-
-    crops = crop_image_from_indices(
-        images=source_im,
-        crop_indices=crop_inds,
-        crop_height=crop_height,
-        crop_width=crop_width,
-    )
-
-    return crops, crop_inds
--- a/lerobot/common/policies/diffusion/model/dict_of_tensor_mixin.py
+++ b/lerobot/common/policies/diffusion/model/dict_of_tensor_mixin.py
@@ -1,41 +0,0 @@
-import torch
-import torch.nn as nn
-
-
-class DictOfTensorMixin(nn.Module):
-    def __init__(self, params_dict=None):
-        super().__init__()
-        if params_dict is None:
-            params_dict = nn.ParameterDict()
-        self.params_dict = params_dict
-
-    @property
-    def device(self):
-        return next(iter(self.parameters())).device
-
-    def _load_from_state_dict(
-        self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
-    ):
-        def dfs_add(dest, keys, value: torch.Tensor):
-            if len(keys) == 1:
-                dest[keys[0]] = value
-                return
-
-            if keys[0] not in dest:
-                dest[keys[0]] = nn.ParameterDict()
-            dfs_add(dest[keys[0]], keys[1:], value)
-
-        def load_dict(state_dict, prefix):
-            out_dict = nn.ParameterDict()
-            for key, value in state_dict.items():
-                value: torch.Tensor
-                if key.startswith(prefix):
-                    param_keys = key[len(prefix) :].split(".")[1:]
-                    # if len(param_keys) == 0:
-                    #     import pdb; pdb.set_trace()
-                    dfs_add(out_dict, param_keys, value.clone())
-            return out_dict
-
-        self.params_dict = load_dict(state_dict, prefix + "params_dict")
-        self.params_dict.requires_grad_(False)
-        return
--- a/lerobot/common/policies/diffusion/model/ema_model.py
+++ b/lerobot/common/policies/diffusion/model/ema_model.py
@@ -1,84 +0,0 @@
-import torch
-from torch.nn.modules.batchnorm import _BatchNorm
-
-
-class EMAModel:
-    """
-    Exponential Moving Average of models weights
-    """
-
-    def __init__(
-        self, model, update_after_step=0, inv_gamma=1.0, power=2 / 3, min_value=0.0, max_value=0.9999
-    ):
-        """
-        @crowsonkb's notes on EMA Warmup:
-            If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan
-            to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps),
-            gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999
-            at 215.4k steps).
-        Args:
-            inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
-            power (float): Exponential factor of EMA warmup. Default: 2/3.
-            min_value (float): The minimum EMA decay rate. Default: 0.
-        """
-
-        self.averaged_model = model
-        self.averaged_model.eval()
-        self.averaged_model.requires_grad_(False)
-
-        self.update_after_step = update_after_step
-        self.inv_gamma = inv_gamma
-        self.power = power
-        self.min_value = min_value
-        self.max_value = max_value
-
-        self.decay = 0.0
-        self.optimization_step = 0
-
-    def get_decay(self, optimization_step):
-        """
-        Compute the decay factor for the exponential moving average.
-        """
-        step = max(0, optimization_step - self.update_after_step - 1)
-        value = 1 - (1 + step / self.inv_gamma) ** -self.power
-
-        if step <= 0:
-            return 0.0
-
-        return max(self.min_value, min(value, self.max_value))
-
-    @torch.no_grad()
-    def step(self, new_model):
-        self.decay = self.get_decay(self.optimization_step)
-
-        # old_all_dataptrs = set()
-        # for param in new_model.parameters():
-        #     data_ptr = param.data_ptr()
-        #     if data_ptr != 0:
-        #         old_all_dataptrs.add(data_ptr)
-
-        # all_dataptrs = set()
-        for module, ema_module in zip(new_model.modules(), self.averaged_model.modules(), strict=False):
-            for param, ema_param in zip(
-                module.parameters(recurse=False), ema_module.parameters(recurse=False), strict=False
-            ):
-                # iterative over immediate parameters only.
-                if isinstance(param, dict):
-                    raise RuntimeError("Dict parameter not supported")
-
-                # data_ptr = param.data_ptr()
-                # if data_ptr != 0:
-                #     all_dataptrs.add(data_ptr)
-
-                if isinstance(module, _BatchNorm):
-                    # skip batchnorms
-                    ema_param.copy_(param.to(dtype=ema_param.dtype).data)
-                elif not param.requires_grad:
-                    ema_param.copy_(param.to(dtype=ema_param.dtype).data)
-                else:
-                    ema_param.mul_(self.decay)
-                    ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay)
-
-        # verify that iterating over module and then parameters is identical to parameters recursively.
-        # assert old_all_dataptrs == all_dataptrs
-        self.optimization_step += 1
--- a/lerobot/common/policies/diffusion/model/lr_scheduler.py
+++ b/lerobot/common/policies/diffusion/model/lr_scheduler.py
@@ -1,46 +0,0 @@
-from diffusers.optimization import TYPE_TO_SCHEDULER_FUNCTION, Optimizer, Optional, SchedulerType, Union
-
-
-def get_scheduler(
-    name: Union[str, SchedulerType],
-    optimizer: Optimizer,
-    num_warmup_steps: Optional[int] = None,
-    num_training_steps: Optional[int] = None,
-    **kwargs,
-):
-    """
-    Added kwargs vs diffuser's original implementation
-
-    Unified API to get any scheduler from its name.
-
-    Args:
-        name (`str` or `SchedulerType`):
-            The name of the scheduler to use.
-        optimizer (`torch.optim.Optimizer`):
-            The optimizer that will be used during training.
-        num_warmup_steps (`int`, *optional*):
-            The number of warmup steps to do. This is not required by all schedulers (hence the argument being
-            optional), the function will raise an error if it's unset and the scheduler type requires it.
-        num_training_steps (`int``, *optional*):
-            The number of training steps to do. This is not required by all schedulers (hence the argument being
-            optional), the function will raise an error if it's unset and the scheduler type requires it.
-    """
-    name = SchedulerType(name)
-    schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name]
-    if name == SchedulerType.CONSTANT:
-        return schedule_func(optimizer, **kwargs)
-
-    # All other schedulers require `num_warmup_steps`
-    if num_warmup_steps is None:
-        raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.")
-
-    if name == SchedulerType.CONSTANT_WITH_WARMUP:
-        return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **kwargs)
-
-    # All other schedulers require `num_training_steps`
-    if num_training_steps is None:
-        raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.")
-
-    return schedule_func(
-        optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, **kwargs
-    )
--- a/lerobot/common/policies/diffusion/model/mask_generator.py
+++ b/lerobot/common/policies/diffusion/model/mask_generator.py
@@ -1,65 +0,0 @@
-import torch
-
-from lerobot.common.policies.diffusion.model.module_attr_mixin import ModuleAttrMixin
-
-
-class LowdimMaskGenerator(ModuleAttrMixin):
-    def __init__(
-        self,
-        action_dim,
-        obs_dim,
-        # obs mask setup
-        max_n_obs_steps=2,
-        fix_obs_steps=True,
-        # action mask
-        action_visible=False,
-    ):
-        super().__init__()
-        self.action_dim = action_dim
-        self.obs_dim = obs_dim
-        self.max_n_obs_steps = max_n_obs_steps
-        self.fix_obs_steps = fix_obs_steps
-        self.action_visible = action_visible
-
-    @torch.no_grad()
-    def forward(self, shape, seed=None):
-        device = self.device
-        B, T, D = shape  # noqa: N806
-        assert (self.action_dim + self.obs_dim) == D
-
-        # create all tensors on this device
-        rng = torch.Generator(device=device)
-        if seed is not None:
-            rng = rng.manual_seed(seed)
-
-        # generate dim mask
-        dim_mask = torch.zeros(size=shape, dtype=torch.bool, device=device)
-        is_action_dim = dim_mask.clone()
-        is_action_dim[..., : self.action_dim] = True
-        is_obs_dim = ~is_action_dim
-
-        # generate obs mask
-        if self.fix_obs_steps:
-            obs_steps = torch.full((B,), fill_value=self.max_n_obs_steps, device=device)
-        else:
-            obs_steps = torch.randint(
-                low=1, high=self.max_n_obs_steps + 1, size=(B,), generator=rng, device=device
-            )
-
-        steps = torch.arange(0, T, device=device).reshape(1, T).expand(B, T)
-        obs_mask = (obs_steps > steps.T).T.reshape(B, T, 1).expand(B, T, D)
-        obs_mask = obs_mask & is_obs_dim
-
-        # generate action mask
-        if self.action_visible:
-            action_steps = torch.maximum(
-                obs_steps - 1, torch.tensor(0, dtype=obs_steps.dtype, device=obs_steps.device)
-            )
-            action_mask = (action_steps > steps.T).T.reshape(B, T, 1).expand(B, T, D)
-            action_mask = action_mask & is_action_dim
-
-        mask = obs_mask
-        if self.action_visible:
-            mask = mask | action_mask
-
-        return mask
--- a/lerobot/common/policies/diffusion/model/module_attr_mixin.py
+++ b/lerobot/common/policies/diffusion/model/module_attr_mixin.py
@@ -1,15 +0,0 @@
-import torch.nn as nn
-
-
-class ModuleAttrMixin(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self._dummy_variable = nn.Parameter()
-
-    @property
-    def device(self):
-        return next(iter(self.parameters())).device
-
-    @property
-    def dtype(self):
-        return next(iter(self.parameters())).dtype
--- a/lerobot/common/policies/diffusion/model/normalizer.py
+++ b/lerobot/common/policies/diffusion/model/normalizer.py
@@ -1,358 +0,0 @@
-from typing import Dict, Union
-
-import numpy as np
-import torch
-import torch.nn as nn
-import zarr
-
-from lerobot.common.policies.diffusion.model.dict_of_tensor_mixin import DictOfTensorMixin
-from lerobot.common.policies.diffusion.pytorch_utils import dict_apply
-
-
-class LinearNormalizer(DictOfTensorMixin):
-    avaliable_modes = ["limits", "gaussian"]
-
-    @torch.no_grad()
-    def fit(
-        self,
-        data: Union[Dict, torch.Tensor, np.ndarray, zarr.Array],
-        last_n_dims=1,
-        dtype=torch.float32,
-        mode="limits",
-        output_max=1.0,
-        output_min=-1.0,
-        range_eps=1e-4,
-        fit_offset=True,
-    ):
-        if isinstance(data, dict):
-            for key, value in data.items():
-                self.params_dict[key] = _fit(
-                    value,
-                    last_n_dims=last_n_dims,
-                    dtype=dtype,
-                    mode=mode,
-                    output_max=output_max,
-                    output_min=output_min,
-                    range_eps=range_eps,
-                    fit_offset=fit_offset,
-                )
-        else:
-            self.params_dict["_default"] = _fit(
-                data,
-                last_n_dims=last_n_dims,
-                dtype=dtype,
-                mode=mode,
-                output_max=output_max,
-                output_min=output_min,
-                range_eps=range_eps,
-                fit_offset=fit_offset,
-            )
-
-    def __call__(self, x: Union[Dict, torch.Tensor, np.ndarray]) -> torch.Tensor:
-        return self.normalize(x)
-
-    def __getitem__(self, key: str):
-        return SingleFieldLinearNormalizer(self.params_dict[key])
-
-    def __setitem__(self, key: str, value: "SingleFieldLinearNormalizer"):
-        self.params_dict[key] = value.params_dict
-
-    def _normalize_impl(self, x, forward=True):
-        if isinstance(x, dict):
-            result = {}
-            for key, value in x.items():
-                params = self.params_dict[key]
-                result[key] = _normalize(value, params, forward=forward)
-            return result
-        else:
-            if "_default" not in self.params_dict:
-                raise RuntimeError("Not initialized")
-            params = self.params_dict["_default"]
-            return _normalize(x, params, forward=forward)
-
-    def normalize(self, x: Union[Dict, torch.Tensor, np.ndarray]) -> torch.Tensor:
-        return self._normalize_impl(x, forward=True)
-
-    def unnormalize(self, x: Union[Dict, torch.Tensor, np.ndarray]) -> torch.Tensor:
-        return self._normalize_impl(x, forward=False)
-
-    def get_input_stats(self) -> Dict:
-        if len(self.params_dict) == 0:
-            raise RuntimeError("Not initialized")
-        if len(self.params_dict) == 1 and "_default" in self.params_dict:
-            return self.params_dict["_default"]["input_stats"]
-
-        result = {}
-        for key, value in self.params_dict.items():
-            if key != "_default":
-                result[key] = value["input_stats"]
-        return result
-
-    def get_output_stats(self, key="_default"):
-        input_stats = self.get_input_stats()
-        if "min" in input_stats:
-            # no dict
-            return dict_apply(input_stats, self.normalize)
-
-        result = {}
-        for key, group in input_stats.items():
-            this_dict = {}
-            for name, value in group.items():
-                this_dict[name] = self.normalize({key: value})[key]
-            result[key] = this_dict
-        return result
-
-
-class SingleFieldLinearNormalizer(DictOfTensorMixin):
-    avaliable_modes = ["limits", "gaussian"]
-
-    @torch.no_grad()
-    def fit(
-        self,
-        data: Union[torch.Tensor, np.ndarray, zarr.Array],
-        last_n_dims=1,
-        dtype=torch.float32,
-        mode="limits",
-        output_max=1.0,
-        output_min=-1.0,
-        range_eps=1e-4,
-        fit_offset=True,
-    ):
-        self.params_dict = _fit(
-            data,
-            last_n_dims=last_n_dims,
-            dtype=dtype,
-            mode=mode,
-            output_max=output_max,
-            output_min=output_min,
-            range_eps=range_eps,
-            fit_offset=fit_offset,
-        )
-
-    @classmethod
-    def create_fit(cls, data: Union[torch.Tensor, np.ndarray, zarr.Array], **kwargs):
-        obj = cls()
-        obj.fit(data, **kwargs)
-        return obj
-
-    @classmethod
-    def create_manual(
-        cls,
-        scale: Union[torch.Tensor, np.ndarray],
-        offset: Union[torch.Tensor, np.ndarray],
-        input_stats_dict: Dict[str, Union[torch.Tensor, np.ndarray]],
-    ):
-        def to_tensor(x):
-            if not isinstance(x, torch.Tensor):
-                x = torch.from_numpy(x)
-            x = x.flatten()
-            return x
-
-        # check
-        for x in [offset] + list(input_stats_dict.values()):
-            assert x.shape == scale.shape
-            assert x.dtype == scale.dtype
-
-        params_dict = nn.ParameterDict(
-            {
-                "scale": to_tensor(scale),
-                "offset": to_tensor(offset),
-                "input_stats": nn.ParameterDict(dict_apply(input_stats_dict, to_tensor)),
-            }
-        )
-        return cls(params_dict)
-
-    @classmethod
-    def create_identity(cls, dtype=torch.float32):
-        scale = torch.tensor([1], dtype=dtype)
-        offset = torch.tensor([0], dtype=dtype)
-        input_stats_dict = {
-            "min": torch.tensor([-1], dtype=dtype),
-            "max": torch.tensor([1], dtype=dtype),
-            "mean": torch.tensor([0], dtype=dtype),
-            "std": torch.tensor([1], dtype=dtype),
-        }
-        return cls.create_manual(scale, offset, input_stats_dict)
-
-    def normalize(self, x: Union[torch.Tensor, np.ndarray]) -> torch.Tensor:
-        return _normalize(x, self.params_dict, forward=True)
-
-    def unnormalize(self, x: Union[torch.Tensor, np.ndarray]) -> torch.Tensor:
-        return _normalize(x, self.params_dict, forward=False)
-
-    def get_input_stats(self):
-        return self.params_dict["input_stats"]
-
-    def get_output_stats(self):
-        return dict_apply(self.params_dict["input_stats"], self.normalize)
-
-    def __call__(self, x: Union[torch.Tensor, np.ndarray]) -> torch.Tensor:
-        return self.normalize(x)
-
-
-def _fit(
-    data: Union[torch.Tensor, np.ndarray, zarr.Array],
-    last_n_dims=1,
-    dtype=torch.float32,
-    mode="limits",
-    output_max=1.0,
-    output_min=-1.0,
-    range_eps=1e-4,
-    fit_offset=True,
-):
-    assert mode in ["limits", "gaussian"]
-    assert last_n_dims >= 0
-    assert output_max > output_min
-
-    # convert data to torch and type
-    if isinstance(data, zarr.Array):
-        data = data[:]
-    if isinstance(data, np.ndarray):
-        data = torch.from_numpy(data)
-    if dtype is not None:
-        data = data.type(dtype)
-
-    # convert shape
-    dim = 1
-    if last_n_dims > 0:
-        dim = np.prod(data.shape[-last_n_dims:])
-    data = data.reshape(-1, dim)
-
-    # compute input stats min max mean std
-    input_min, _ = data.min(axis=0)
-    input_max, _ = data.max(axis=0)
-    input_mean = data.mean(axis=0)
-    input_std = data.std(axis=0)
-
-    # compute scale and offset
-    if mode == "limits":
-        if fit_offset:
-            # unit scale
-            input_range = input_max - input_min
-            ignore_dim = input_range < range_eps
-            input_range[ignore_dim] = output_max - output_min
-            scale = (output_max - output_min) / input_range
-            offset = output_min - scale * input_min
-            offset[ignore_dim] = (output_max + output_min) / 2 - input_min[ignore_dim]
-            # ignore dims scaled to mean of output max and min
-        else:
-            # use this when data is pre-zero-centered.
-            assert output_max > 0
-            assert output_min < 0
-            # unit abs
-            output_abs = min(abs(output_min), abs(output_max))
-            input_abs = torch.maximum(torch.abs(input_min), torch.abs(input_max))
-            ignore_dim = input_abs < range_eps
-            input_abs[ignore_dim] = output_abs
-            # don't scale constant channels
-            scale = output_abs / input_abs
-            offset = torch.zeros_like(input_mean)
-    elif mode == "gaussian":
-        ignore_dim = input_std < range_eps
-        scale = input_std.clone()
-        scale[ignore_dim] = 1
-        scale = 1 / scale
-
-        offset = -input_mean * scale if fit_offset else torch.zeros_like(input_mean)
-
-    # save
-    this_params = nn.ParameterDict(
-        {
-            "scale": scale,
-            "offset": offset,
-            "input_stats": nn.ParameterDict(
-                {"min": input_min, "max": input_max, "mean": input_mean, "std": input_std}
-            ),
-        }
-    )
-    for p in this_params.parameters():
-        p.requires_grad_(False)
-    return this_params
-
-
-def _normalize(x, params, forward=True):
-    assert "scale" in params
-    if isinstance(x, np.ndarray):
-        x = torch.from_numpy(x)
-    scale = params["scale"]
-    offset = params["offset"]
-    x = x.to(device=scale.device, dtype=scale.dtype)
-    src_shape = x.shape
-    x = x.reshape(-1, scale.shape[0])
-    x = x * scale + offset if forward else (x - offset) / scale
-    x = x.reshape(src_shape)
-    return x
-
-
-def test():
-    data = torch.zeros((100, 10, 9, 2)).uniform_()
-    data[..., 0, 0] = 0
-
-    normalizer = SingleFieldLinearNormalizer()
-    normalizer.fit(data, mode="limits", last_n_dims=2)
-    datan = normalizer.normalize(data)
-    assert datan.shape == data.shape
-    assert np.allclose(datan.max(), 1.0)
-    assert np.allclose(datan.min(), -1.0)
-    dataun = normalizer.unnormalize(datan)
-    assert torch.allclose(data, dataun, atol=1e-7)
-
-    _ = normalizer.get_input_stats()
-    _ = normalizer.get_output_stats()
-
-    normalizer = SingleFieldLinearNormalizer()
-    normalizer.fit(data, mode="limits", last_n_dims=1, fit_offset=False)
-    datan = normalizer.normalize(data)
-    assert datan.shape == data.shape
-    assert np.allclose(datan.max(), 1.0, atol=1e-3)
-    assert np.allclose(datan.min(), 0.0, atol=1e-3)
-    dataun = normalizer.unnormalize(datan)
-    assert torch.allclose(data, dataun, atol=1e-7)
-
-    data = torch.zeros((100, 10, 9, 2)).uniform_()
-    normalizer = SingleFieldLinearNormalizer()
-    normalizer.fit(data, mode="gaussian", last_n_dims=0)
-    datan = normalizer.normalize(data)
-    assert datan.shape == data.shape
-    assert np.allclose(datan.mean(), 0.0, atol=1e-3)
-    assert np.allclose(datan.std(), 1.0, atol=1e-3)
-    dataun = normalizer.unnormalize(datan)
-    assert torch.allclose(data, dataun, atol=1e-7)
-
-    # dict
-    data = torch.zeros((100, 10, 9, 2)).uniform_()
-    data[..., 0, 0] = 0
-
-    normalizer = LinearNormalizer()
-    normalizer.fit(data, mode="limits", last_n_dims=2)
-    datan = normalizer.normalize(data)
-    assert datan.shape == data.shape
-    assert np.allclose(datan.max(), 1.0)
-    assert np.allclose(datan.min(), -1.0)
-    dataun = normalizer.unnormalize(datan)
-    assert torch.allclose(data, dataun, atol=1e-7)
-
-    _ = normalizer.get_input_stats()
-    _ = normalizer.get_output_stats()
-
-    data = {
-        "obs": torch.zeros((1000, 128, 9, 2)).uniform_() * 512,
-        "action": torch.zeros((1000, 128, 2)).uniform_() * 512,
-    }
-    normalizer = LinearNormalizer()
-    normalizer.fit(data)
-    datan = normalizer.normalize(data)
-    dataun = normalizer.unnormalize(datan)
-    for key in data:
-        assert torch.allclose(data[key], dataun[key], atol=1e-4)
-
-    _ = normalizer.get_input_stats()
-    _ = normalizer.get_output_stats()
-
-    state_dict = normalizer.state_dict()
-    n = LinearNormalizer()
-    n.load_state_dict(state_dict)
-    datan = n.normalize(data)
-    dataun = n.unnormalize(datan)
-    for key in data:
-        assert torch.allclose(data[key], dataun[key], atol=1e-4)
--- a/lerobot/common/policies/diffusion/model/positional_embedding.py
+++ b/lerobot/common/policies/diffusion/model/positional_embedding.py
@@ -1,19 +0,0 @@
-import math
-
-import torch
-import torch.nn as nn
-
-
-class SinusoidalPosEmb(nn.Module):
-    def __init__(self, dim):
-        super().__init__()
-        self.dim = dim
-
-    def forward(self, x):
-        device = x.device
-        half_dim = self.dim // 2
-        emb = math.log(10000) / (half_dim - 1)
-        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
-        emb = x[:, None] * emb[None, :]
-        emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
-        return emb
--- a/lerobot/common/policies/diffusion/model/tensor_utils.py
+++ b/lerobot/common/policies/diffusion/model/tensor_utils.py
@@ -1,971 +0,0 @@
-"""
-A collection of utilities for working with nested tensor structures consisting
-of numpy arrays and torch tensors.
-"""
-import collections
-
-import numpy as np
-import torch
-
-
-def recursive_dict_list_tuple_apply(x, type_func_dict):
-    """
-    Recursively apply functions to a nested dictionary or list or tuple, given a dictionary of
-    {data_type: function_to_apply}.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        type_func_dict (dict): a mapping from data types to the functions to be
-            applied for each data type.
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    assert list not in type_func_dict
-    assert tuple not in type_func_dict
-    assert dict not in type_func_dict
-
-    if isinstance(x, (dict, collections.OrderedDict)):
-        new_x = collections.OrderedDict() if isinstance(x, collections.OrderedDict) else {}
-        for k, v in x.items():
-            new_x[k] = recursive_dict_list_tuple_apply(v, type_func_dict)
-        return new_x
-    elif isinstance(x, (list, tuple)):
-        ret = [recursive_dict_list_tuple_apply(v, type_func_dict) for v in x]
-        if isinstance(x, tuple):
-            ret = tuple(ret)
-        return ret
-    else:
-        for t, f in type_func_dict.items():
-            if isinstance(x, t):
-                return f(x)
-        else:
-            raise NotImplementedError("Cannot handle data type %s" % str(type(x)))
-
-
-def map_tensor(x, func):
-    """
-    Apply function @func to torch.Tensor objects in a nested dictionary or
-    list or tuple.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        func (function): function to apply to each tensor
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: func,
-            type(None): lambda x: x,
-        },
-    )
-
-
-def map_ndarray(x, func):
-    """
-    Apply function @func to np.ndarray objects in a nested dictionary or
-    list or tuple.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        func (function): function to apply to each array
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            np.ndarray: func,
-            type(None): lambda x: x,
-        },
-    )
-
-
-def map_tensor_ndarray(x, tensor_func, ndarray_func):
-    """
-    Apply function @tensor_func to torch.Tensor objects and @ndarray_func to
-    np.ndarray objects in a nested dictionary or list or tuple.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        tensor_func (function): function to apply to each tensor
-        ndarray_Func (function): function to apply to each array
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: tensor_func,
-            np.ndarray: ndarray_func,
-            type(None): lambda x: x,
-        },
-    )
-
-
-def clone(x):
-    """
-    Clones all torch tensors and numpy arrays in nested dictionary or list
-    or tuple and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x.clone(),
-            np.ndarray: lambda x: x.copy(),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def detach(x):
-    """
-    Detaches all torch tensors in nested dictionary or list
-    or tuple and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x.detach(),
-        },
-    )
-
-
-def to_batch(x):
-    """
-    Introduces a leading batch dimension of 1 for all torch tensors and numpy
-    arrays in nested dictionary or list or tuple and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x[None, ...],
-            np.ndarray: lambda x: x[None, ...],
-            type(None): lambda x: x,
-        },
-    )
-
-
-def to_sequence(x):
-    """
-    Introduces a time dimension of 1 at dimension 1 for all torch tensors and numpy
-    arrays in nested dictionary or list or tuple and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x[:, None, ...],
-            np.ndarray: lambda x: x[:, None, ...],
-            type(None): lambda x: x,
-        },
-    )
-
-
-def index_at_time(x, ind):
-    """
-    Indexes all torch tensors and numpy arrays in dimension 1 with index @ind in
-    nested dictionary or list or tuple and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        ind (int): index
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x[:, ind, ...],
-            np.ndarray: lambda x: x[:, ind, ...],
-            type(None): lambda x: x,
-        },
-    )
-
-
-def unsqueeze(x, dim):
-    """
-    Adds dimension of size 1 at dimension @dim in all torch tensors and numpy arrays
-    in nested dictionary or list or tuple and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        dim (int): dimension
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x.unsqueeze(dim=dim),
-            np.ndarray: lambda x: np.expand_dims(x, axis=dim),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def contiguous(x):
-    """
-    Makes all torch tensors and numpy arrays contiguous in nested dictionary or
-    list or tuple and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x.contiguous(),
-            np.ndarray: lambda x: np.ascontiguousarray(x),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def to_device(x, device):
-    """
-    Sends all torch tensors in nested dictionary or list or tuple to device
-    @device, and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        device (torch.Device): device to send tensors to
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x, d=device: x.to(d),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def to_tensor(x):
-    """
-    Converts all numpy arrays in nested dictionary or list or tuple to
-    torch tensors (and leaves existing torch Tensors as-is), and returns
-    a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x,
-            np.ndarray: lambda x: torch.from_numpy(x),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def to_numpy(x):
-    """
-    Converts all torch tensors in nested dictionary or list or tuple to
-    numpy (and leaves existing numpy arrays as-is), and returns
-    a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-
-    def f(tensor):
-        if tensor.is_cuda:
-            return tensor.detach().cpu().numpy()
-        else:
-            return tensor.detach().numpy()
-
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: f,
-            np.ndarray: lambda x: x,
-            type(None): lambda x: x,
-        },
-    )
-
-
-def to_list(x):
-    """
-    Converts all torch tensors and numpy arrays in nested dictionary or list
-    or tuple to a list, and returns a new nested structure. Useful for
-    json encoding.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-
-    def f(tensor):
-        if tensor.is_cuda:
-            return tensor.detach().cpu().numpy().tolist()
-        else:
-            return tensor.detach().numpy().tolist()
-
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: f,
-            np.ndarray: lambda x: x.tolist(),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def to_float(x):
-    """
-    Converts all torch tensors and numpy arrays in nested dictionary or list
-    or tuple to float type entries, and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x.float(),
-            np.ndarray: lambda x: x.astype(np.float32),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def to_uint8(x):
-    """
-    Converts all torch tensors and numpy arrays in nested dictionary or list
-    or tuple to uint8 type entries, and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x.byte(),
-            np.ndarray: lambda x: x.astype(np.uint8),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def to_torch(x, device):
-    """
-    Converts all numpy arrays and torch tensors in nested dictionary or list or tuple to
-    torch tensors on device @device and returns a new nested structure.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        device (torch.Device): device to send tensors to
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return to_device(to_float(to_tensor(x)), device)
-
-
-def to_one_hot_single(tensor, num_class):
-    """
-    Convert tensor to one-hot representation, assuming a certain number of total class labels.
-
-    Args:
-        tensor (torch.Tensor): tensor containing integer labels
-        num_class (int): number of classes
-
-    Returns:
-        x (torch.Tensor): tensor containing one-hot representation of labels
-    """
-    x = torch.zeros(tensor.size() + (num_class,)).to(tensor.device)
-    x.scatter_(-1, tensor.unsqueeze(-1), 1)
-    return x
-
-
-def to_one_hot(tensor, num_class):
-    """
-    Convert all tensors in nested dictionary or list or tuple to one-hot representation,
-    assuming a certain number of total class labels.
-
-    Args:
-        tensor (dict or list or tuple): a possibly nested dictionary or list or tuple
-        num_class (int): number of classes
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return map_tensor(tensor, func=lambda x, nc=num_class: to_one_hot_single(x, nc))
-
-
-def flatten_single(x, begin_axis=1):
-    """
-    Flatten a tensor in all dimensions from @begin_axis onwards.
-
-    Args:
-        x (torch.Tensor): tensor to flatten
-        begin_axis (int): which axis to flatten from
-
-    Returns:
-        y (torch.Tensor): flattened tensor
-    """
-    fixed_size = x.size()[:begin_axis]
-    _s = list(fixed_size) + [-1]
-    return x.reshape(*_s)
-
-
-def flatten(x, begin_axis=1):
-    """
-    Flatten all tensors in nested dictionary or list or tuple, from @begin_axis onwards.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        begin_axis (int): which axis to flatten from
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x, b=begin_axis: flatten_single(x, begin_axis=b),
-        },
-    )
-
-
-def reshape_dimensions_single(x, begin_axis, end_axis, target_dims):
-    """
-    Reshape selected dimensions in a tensor to a target dimension.
-
-    Args:
-        x (torch.Tensor): tensor to reshape
-        begin_axis (int): begin dimension
-        end_axis (int): end dimension
-        target_dims (tuple or list): target shape for the range of dimensions
-            (@begin_axis, @end_axis)
-
-    Returns:
-        y (torch.Tensor): reshaped tensor
-    """
-    assert begin_axis <= end_axis
-    assert begin_axis >= 0
-    assert end_axis < len(x.shape)
-    assert isinstance(target_dims, (tuple, list))
-    s = x.shape
-    final_s = []
-    for i in range(len(s)):
-        if i == begin_axis:
-            final_s.extend(target_dims)
-        elif i < begin_axis or i > end_axis:
-            final_s.append(s[i])
-    return x.reshape(*final_s)
-
-
-def reshape_dimensions(x, begin_axis, end_axis, target_dims):
-    """
-    Reshape selected dimensions for all tensors in nested dictionary or list or tuple
-    to a target dimension.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        begin_axis (int): begin dimension
-        end_axis (int): end dimension
-        target_dims (tuple or list): target shape for the range of dimensions
-            (@begin_axis, @end_axis)
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x, b=begin_axis, e=end_axis, t=target_dims: reshape_dimensions_single(
-                x, begin_axis=b, end_axis=e, target_dims=t
-            ),
-            np.ndarray: lambda x, b=begin_axis, e=end_axis, t=target_dims: reshape_dimensions_single(
-                x, begin_axis=b, end_axis=e, target_dims=t
-            ),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def join_dimensions(x, begin_axis, end_axis):
-    """
-    Joins all dimensions between dimensions (@begin_axis, @end_axis) into a flat dimension, for
-    all tensors in nested dictionary or list or tuple.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        begin_axis (int): begin dimension
-        end_axis (int): end dimension
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x, b=begin_axis, e=end_axis: reshape_dimensions_single(
-                x, begin_axis=b, end_axis=e, target_dims=[-1]
-            ),
-            np.ndarray: lambda x, b=begin_axis, e=end_axis: reshape_dimensions_single(
-                x, begin_axis=b, end_axis=e, target_dims=[-1]
-            ),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def expand_at_single(x, size, dim):
-    """
-    Expand a tensor at a single dimension @dim by @size
-
-    Args:
-        x (torch.Tensor): input tensor
-        size (int): size to expand
-        dim (int): dimension to expand
-
-    Returns:
-        y (torch.Tensor): expanded tensor
-    """
-    assert dim < x.ndimension()
-    assert x.shape[dim] == 1
-    expand_dims = [-1] * x.ndimension()
-    expand_dims[dim] = size
-    return x.expand(*expand_dims)
-
-
-def expand_at(x, size, dim):
-    """
-    Expand all tensors in nested dictionary or list or tuple at a single
-    dimension @dim by @size.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        size (int): size to expand
-        dim (int): dimension to expand
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return map_tensor(x, lambda t, s=size, d=dim: expand_at_single(t, s, d))
-
-
-def unsqueeze_expand_at(x, size, dim):
-    """
-    Unsqueeze and expand a tensor at a dimension @dim by @size.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        size (int): size to expand
-        dim (int): dimension to unsqueeze and expand
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    x = unsqueeze(x, dim)
-    return expand_at(x, size, dim)
-
-
-def repeat_by_expand_at(x, repeats, dim):
-    """
-    Repeat a dimension by combining expand and reshape operations.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        repeats (int): number of times to repeat the target dimension
-        dim (int): dimension to repeat on
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    x = unsqueeze_expand_at(x, repeats, dim + 1)
-    return join_dimensions(x, dim, dim + 1)
-
-
-def named_reduce_single(x, reduction, dim):
-    """
-    Reduce tensor at a dimension by named reduction functions.
-
-    Args:
-        x (torch.Tensor): tensor to be reduced
-        reduction (str): one of ["sum", "max", "mean", "flatten"]
-        dim (int): dimension to be reduced (or begin axis for flatten)
-
-    Returns:
-        y (torch.Tensor): reduced tensor
-    """
-    assert x.ndimension() > dim
-    assert reduction in ["sum", "max", "mean", "flatten"]
-    if reduction == "flatten":
-        x = flatten(x, begin_axis=dim)
-    elif reduction == "max":
-        x = torch.max(x, dim=dim)[0]  # [B, D]
-    elif reduction == "sum":
-        x = torch.sum(x, dim=dim)
-    else:
-        x = torch.mean(x, dim=dim)
-    return x
-
-
-def named_reduce(x, reduction, dim):
-    """
-    Reduces all tensors in nested dictionary or list or tuple at a dimension
-    using a named reduction function.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        reduction (str): one of ["sum", "max", "mean", "flatten"]
-        dim (int): dimension to be reduced (or begin axis for flatten)
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return map_tensor(x, func=lambda t, r=reduction, d=dim: named_reduce_single(t, r, d))
-
-
-def gather_along_dim_with_dim_single(x, target_dim, source_dim, indices):
-    """
-    This function indexes out a target dimension of a tensor in a structured way,
-    by allowing a different value to be selected for each member of a flat index
-    tensor (@indices) corresponding to a source dimension. This can be interpreted
-    as moving along the source dimension, using the corresponding index value
-    in @indices to select values for all other dimensions outside of the
-    source and target dimensions. A common use case is to gather values
-    in target dimension 1 for each batch member (target dimension 0).
-
-    Args:
-        x (torch.Tensor): tensor to gather values for
-        target_dim (int): dimension to gather values along
-        source_dim (int): dimension to hold constant and use for gathering values
-            from the other dimensions
-        indices (torch.Tensor): flat index tensor with same shape as tensor @x along
-            @source_dim
-
-    Returns:
-        y (torch.Tensor): gathered tensor, with dimension @target_dim indexed out
-    """
-    assert len(indices.shape) == 1
-    assert x.shape[source_dim] == indices.shape[0]
-
-    # unsqueeze in all dimensions except the source dimension
-    new_shape = [1] * x.ndimension()
-    new_shape[source_dim] = -1
-    indices = indices.reshape(*new_shape)
-
-    # repeat in all dimensions - but preserve shape of source dimension,
-    # and make sure target_dimension has singleton dimension
-    expand_shape = list(x.shape)
-    expand_shape[source_dim] = -1
-    expand_shape[target_dim] = 1
-    indices = indices.expand(*expand_shape)
-
-    out = x.gather(dim=target_dim, index=indices)
-    return out.squeeze(target_dim)
-
-
-def gather_along_dim_with_dim(x, target_dim, source_dim, indices):
-    """
-    Apply @gather_along_dim_with_dim_single to all tensors in a nested
-    dictionary or list or tuple.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        target_dim (int): dimension to gather values along
-        source_dim (int): dimension to hold constant and use for gathering values
-            from the other dimensions
-        indices (torch.Tensor): flat index tensor with same shape as tensor @x along
-            @source_dim
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple
-    """
-    return map_tensor(
-        x, lambda y, t=target_dim, s=source_dim, i=indices: gather_along_dim_with_dim_single(y, t, s, i)
-    )
-
-
-def gather_sequence_single(seq, indices):
-    """
-    Given a tensor with leading dimensions [B, T, ...], gather an element from each sequence in
-    the batch given an index for each sequence.
-
-    Args:
-        seq (torch.Tensor): tensor with leading dimensions [B, T, ...]
-        indices (torch.Tensor): tensor indices of shape [B]
-
-    Return:
-        y (torch.Tensor): indexed tensor of shape [B, ....]
-    """
-    return gather_along_dim_with_dim_single(seq, target_dim=1, source_dim=0, indices=indices)
-
-
-def gather_sequence(seq, indices):
-    """
-    Given a nested dictionary or list or tuple, gathers an element from each sequence of the batch
-    for tensors with leading dimensions [B, T, ...].
-
-    Args:
-        seq (dict or list or tuple): a possibly nested dictionary or list or tuple with tensors
-            of leading dimensions [B, T, ...]
-        indices (torch.Tensor): tensor indices of shape [B]
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple with tensors of shape [B, ...]
-    """
-    return gather_along_dim_with_dim(seq, target_dim=1, source_dim=0, indices=indices)
-
-
-def pad_sequence_single(seq, padding, batched=False, pad_same=True, pad_values=None):
-    """
-    Pad input tensor or array @seq in the time dimension (dimension 1).
-
-    Args:
-        seq (np.ndarray or torch.Tensor): sequence to be padded
-        padding (tuple): begin and end padding, e.g. [1, 1] pads both begin and end of the sequence by 1
-        batched (bool): if sequence has the batch dimension
-        pad_same (bool): if pad by duplicating
-        pad_values (scalar or (ndarray, Tensor)): values to be padded if not pad_same
-
-    Returns:
-        padded sequence (np.ndarray or torch.Tensor)
-    """
-    assert isinstance(seq, (np.ndarray, torch.Tensor))
-    assert pad_same or pad_values is not None
-    if pad_values is not None:
-        assert isinstance(pad_values, float)
-    repeat_func = np.repeat if isinstance(seq, np.ndarray) else torch.repeat_interleave
-    concat_func = np.concatenate if isinstance(seq, np.ndarray) else torch.cat
-    ones_like_func = np.ones_like if isinstance(seq, np.ndarray) else torch.ones_like
-    seq_dim = 1 if batched else 0
-
-    begin_pad = []
-    end_pad = []
-
-    if padding[0] > 0:
-        pad = seq[[0]] if pad_same else ones_like_func(seq[[0]]) * pad_values
-        begin_pad.append(repeat_func(pad, padding[0], seq_dim))
-    if padding[1] > 0:
-        pad = seq[[-1]] if pad_same else ones_like_func(seq[[-1]]) * pad_values
-        end_pad.append(repeat_func(pad, padding[1], seq_dim))
-
-    return concat_func(begin_pad + [seq] + end_pad, seq_dim)
-
-
-def pad_sequence(seq, padding, batched=False, pad_same=True, pad_values=None):
-    """
-    Pad a nested dictionary or list or tuple of sequence tensors in the time dimension (dimension 1).
-
-    Args:
-        seq (dict or list or tuple): a possibly nested dictionary or list or tuple with tensors
-            of leading dimensions [B, T, ...]
-        padding (tuple): begin and end padding, e.g. [1, 1] pads both begin and end of the sequence by 1
-        batched (bool): if sequence has the batch dimension
-        pad_same (bool): if pad by duplicating
-        pad_values (scalar or (ndarray, Tensor)): values to be padded if not pad_same
-
-    Returns:
-        padded sequence (dict or list or tuple)
-    """
-    return recursive_dict_list_tuple_apply(
-        seq,
-        {
-            torch.Tensor: lambda x, p=padding, b=batched, ps=pad_same, pv=pad_values: pad_sequence_single(
-                x, p, b, ps, pv
-            ),
-            np.ndarray: lambda x, p=padding, b=batched, ps=pad_same, pv=pad_values: pad_sequence_single(
-                x, p, b, ps, pv
-            ),
-            type(None): lambda x: x,
-        },
-    )
-
-
-def assert_size_at_dim_single(x, size, dim, msg):
-    """
-    Ensure that array or tensor @x has size @size in dim @dim.
-
-    Args:
-        x (np.ndarray or torch.Tensor): input array or tensor
-        size (int): size that tensors should have at @dim
-        dim (int): dimension to check
-        msg (str): text to display if assertion fails
-    """
-    assert x.shape[dim] == size, msg
-
-
-def assert_size_at_dim(x, size, dim, msg):
-    """
-    Ensure that arrays and tensors in nested dictionary or list or tuple have
-    size @size in dim @dim.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-        size (int): size that tensors should have at @dim
-        dim (int): dimension to check
-    """
-    map_tensor(x, lambda t, s=size, d=dim, m=msg: assert_size_at_dim_single(t, s, d, m))
-
-
-def get_shape(x):
-    """
-    Get all shapes of arrays and tensors in nested dictionary or list or tuple.
-
-    Args:
-        x (dict or list or tuple): a possibly nested dictionary or list or tuple
-
-    Returns:
-        y (dict or list or tuple): new nested dict-list-tuple that contains each array or
-            tensor's shape
-    """
-    return recursive_dict_list_tuple_apply(
-        x,
-        {
-            torch.Tensor: lambda x: x.shape,
-            np.ndarray: lambda x: x.shape,
-            type(None): lambda x: x,
-        },
-    )
-
-
-def list_of_flat_dict_to_dict_of_list(list_of_dict):
-    """
-    Helper function to go from a list of flat dictionaries to a dictionary of lists.
-    By "flat" we mean that none of the values are dictionaries, but are numpy arrays,
-    floats, etc.
-
-    Args:
-        list_of_dict (list): list of flat dictionaries
-
-    Returns:
-        dict_of_list (dict): dictionary of lists
-    """
-    assert isinstance(list_of_dict, list)
-    dic = collections.OrderedDict()
-    for i in range(len(list_of_dict)):
-        for k in list_of_dict[i]:
-            if k not in dic:
-                dic[k] = []
-            dic[k].append(list_of_dict[i][k])
-    return dic
-
-
-def flatten_nested_dict_list(d, parent_key="", sep="_", item_key=""):
-    """
-    Flatten a nested dict or list to a list.
-
-    For example, given a dict
-    {
-        a: 1
-        b: {
-            c: 2
-        }
-        c: 3
-    }
-
-    the function would return [(a, 1), (b_c, 2), (c, 3)]
-
-    Args:
-        d (dict, list): a nested dict or list to be flattened
-        parent_key (str): recursion helper
-        sep (str): separator for nesting keys
-        item_key (str): recursion helper
-    Returns:
-        list: a list of (key, value) tuples
-    """
-    items = []
-    if isinstance(d, (tuple, list)):
-        new_key = parent_key + sep + item_key if len(parent_key) > 0 else item_key
-        for i, v in enumerate(d):
-            items.extend(flatten_nested_dict_list(v, new_key, sep=sep, item_key=str(i)))
-        return items
-    elif isinstance(d, dict):
-        new_key = parent_key + sep + item_key if len(parent_key) > 0 else item_key
-        for k, v in d.items():
-            assert isinstance(k, str)
-            items.extend(flatten_nested_dict_list(v, new_key, sep=sep, item_key=k))
-        return items
-    else:
-        new_key = parent_key + sep + item_key if len(parent_key) > 0 else item_key
-        return [(new_key, d)]
-
-
-def time_distributed(inputs, op, activation=None, inputs_as_kwargs=False, inputs_as_args=False, **kwargs):
-    """
-    Apply function @op to all tensors in nested dictionary or list or tuple @inputs in both the
-    batch (B) and time (T) dimension, where the tensors are expected to have shape [B, T, ...].
-    Will do this by reshaping tensors to [B * T, ...], passing through the op, and then reshaping
-    outputs to [B, T, ...].
-
-    Args:
-        inputs (list or tuple or dict): a possibly nested dictionary or list or tuple with tensors
-            of leading dimensions [B, T, ...]
-        op: a layer op that accepts inputs
-        activation: activation to apply at the output
-        inputs_as_kwargs (bool): whether to feed input as a kwargs dict to the op
-        inputs_as_args (bool) whether to feed input as a args list to the op
-        kwargs (dict): other kwargs to supply to the op
-
-    Returns:
-        outputs (dict or list or tuple): new nested dict-list-tuple with tensors of leading dimension [B, T].
-    """
-    batch_size, seq_len = flatten_nested_dict_list(inputs)[0][1].shape[:2]
-    inputs = join_dimensions(inputs, 0, 1)
-    if inputs_as_kwargs:
-        outputs = op(**inputs, **kwargs)
-    elif inputs_as_args:
-        outputs = op(*inputs, **kwargs)
-    else:
-        outputs = op(inputs, **kwargs)
-
-    if activation is not None:
-        outputs = map_tensor(outputs, activation)
-    outputs = reshape_dimensions(outputs, begin_axis=0, end_axis=0, target_dims=(batch_size, seq_len))
-    return outputs
--- a/lerobot/common/policies/diffusion/model/multi_image_obs_encoder.py
+++ b/lerobot/common/policies/diffusion/model/multi_image_obs_encoder.py
@@ -5,9 +5,9 @@ import torch
 import torch.nn as nn
 import torchvision

-from lerobot.common.policies.diffusion.model.crop_randomizer import CropRandomizer
-from lerobot.common.policies.diffusion.model.module_attr_mixin import ModuleAttrMixin
-from lerobot.common.policies.diffusion.pytorch_utils import replace_submodules
+from diffusion_policy.common.pytorch_util import replace_submodules
+from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
+from diffusion_policy.model.vision.crop_randomizer import CropRandomizer


 class MultiImageObsEncoder(ModuleAttrMixin):
--- a/lerobot/common/policies/diffusion/policy.py
+++ b/lerobot/common/policies/diffusion/policy.py
@@ -4,10 +4,10 @@ import time
 import hydra
 import torch
 import torch.nn as nn
+from diffusion_policy.model.common.lr_scheduler import get_scheduler

-from lerobot.common.policies.diffusion.diffusion_unet_image_policy import DiffusionUnetImagePolicy
-from lerobot.common.policies.diffusion.model.lr_scheduler import get_scheduler
-from lerobot.common.policies.diffusion.model.multi_image_obs_encoder import MultiImageObsEncoder
+from .diffusion_unet_image_policy import DiffusionUnetImagePolicy
+from .multi_image_obs_encoder import MultiImageObsEncoder


 class DiffusionPolicy(nn.Module):
--- a/lerobot/common/policies/diffusion/pytorch_utils.py
+++ b/lerobot/common/policies/diffusion/pytorch_utils.py
@@ -1,76 +0,0 @@
-from typing import Callable, Dict
-
-import torch
-import torch.nn as nn
-import torchvision
-
-
-def get_resnet(name, weights=None, **kwargs):
-    """
-    name: resnet18, resnet34, resnet50
-    weights: "IMAGENET1K_V1", "r3m"
-    """
-    # load r3m weights
-    if (weights == "r3m") or (weights == "R3M"):
-        return get_r3m(name=name, **kwargs)
-
-    func = getattr(torchvision.models, name)
-    resnet = func(weights=weights, **kwargs)
-    resnet.fc = torch.nn.Identity()
-    return resnet
-
-
-def get_r3m(name, **kwargs):
-    """
-    name: resnet18, resnet34, resnet50
-    """
-    import r3m
-
-    r3m.device = "cpu"
-    model = r3m.load_r3m(name)
-    r3m_model = model.module
-    resnet_model = r3m_model.convnet
-    resnet_model = resnet_model.to("cpu")
-    return resnet_model
-
-
-def dict_apply(
-    x: Dict[str, torch.Tensor], func: Callable[[torch.Tensor], torch.Tensor]
-) -> Dict[str, torch.Tensor]:
-    result = {}
-    for key, value in x.items():
-        if isinstance(value, dict):
-            result[key] = dict_apply(value, func)
-        else:
-            result[key] = func(value)
-    return result
-
-
-def replace_submodules(
-    root_module: nn.Module, predicate: Callable[[nn.Module], bool], func: Callable[[nn.Module], nn.Module]
-) -> nn.Module:
-    """
-    predicate: Return true if the module is to be replaced.
-    func: Return new module to use.
-    """
-    if predicate(root_module):
-        return func(root_module)
-
-    bn_list = [k.split(".") for k, m in root_module.named_modules(remove_duplicate=True) if predicate(m)]
-    for *parent, k in bn_list:
-        parent_module = root_module
-        if len(parent) > 0:
-            parent_module = root_module.get_submodule(".".join(parent))
-        if isinstance(parent_module, nn.Sequential):
-            src_module = parent_module[int(k)]
-        else:
-            src_module = getattr(parent_module, k)
-        tgt_module = func(src_module)
-        if isinstance(parent_module, nn.Sequential):
-            parent_module[int(k)] = tgt_module
-        else:
-            setattr(parent_module, k, tgt_module)
-    # verify that all BN are replaced
-    bn_list = [k.split(".") for k, m in root_module.named_modules(remove_duplicate=True) if predicate(m)]
-    assert len(bn_list) == 0
-    return root_module
--- a/lerobot/common/policies/diffusion/replay_buffer.py
+++ b/lerobot/common/policies/diffusion/replay_buffer.py
@@ -1,614 +0,0 @@
-from __future__ import annotations
-
-import math
-import numbers
-import os
-from functools import cached_property
-
-import numcodecs
-import numpy as np
-import zarr
-
-
-def check_chunks_compatible(chunks: tuple, shape: tuple):
-    assert len(shape) == len(chunks)
-    for c in chunks:
-        assert isinstance(c, numbers.Integral)
-        assert c > 0
-
-
-def rechunk_recompress_array(group, name, chunks=None, chunk_length=None, compressor=None, tmp_key="_temp"):
-    old_arr = group[name]
-    if chunks is None:
-        chunks = (chunk_length,) + old_arr.chunks[1:] if chunk_length is not None else old_arr.chunks
-    check_chunks_compatible(chunks, old_arr.shape)
-
-    if compressor is None:
-        compressor = old_arr.compressor
-
-    if (chunks == old_arr.chunks) and (compressor == old_arr.compressor):
-        # no change
-        return old_arr
-
-    # rechunk recompress
-    group.move(name, tmp_key)
-    old_arr = group[tmp_key]
-    n_copied, n_skipped, n_bytes_copied = zarr.copy(
-        source=old_arr,
-        dest=group,
-        name=name,
-        chunks=chunks,
-        compressor=compressor,
-    )
-    del group[tmp_key]
-    arr = group[name]
-    return arr
-
-
-def get_optimal_chunks(shape, dtype, target_chunk_bytes=2e6, max_chunk_length=None):
-    """
-    Common shapes
-    T,D
-    T,N,D
-    T,H,W,C
-    T,N,H,W,C
-    """
-    itemsize = np.dtype(dtype).itemsize
-    # reversed
-    rshape = list(shape[::-1])
-    if max_chunk_length is not None:
-        rshape[-1] = int(max_chunk_length)
-    split_idx = len(shape) - 1
-    for i in range(len(shape) - 1):
-        this_chunk_bytes = itemsize * np.prod(rshape[:i])
-        next_chunk_bytes = itemsize * np.prod(rshape[: i + 1])
-        if this_chunk_bytes <= target_chunk_bytes and next_chunk_bytes > target_chunk_bytes:
-            split_idx = i
-
-    rchunks = rshape[:split_idx]
-    item_chunk_bytes = itemsize * np.prod(rshape[:split_idx])
-    this_max_chunk_length = rshape[split_idx]
-    next_chunk_length = min(this_max_chunk_length, math.ceil(target_chunk_bytes / item_chunk_bytes))
-    rchunks.append(next_chunk_length)
-    len_diff = len(shape) - len(rchunks)
-    rchunks.extend([1] * len_diff)
-    chunks = tuple(rchunks[::-1])
-    # print(np.prod(chunks) * itemsize / target_chunk_bytes)
-    return chunks
-
-
-class ReplayBuffer:
-    """
-    Zarr-based temporal datastructure.
-    Assumes first dimension to be time. Only chunk in time dimension.
-    """
-
-    def __init__(self, root: zarr.Group | dict[str, dict]):
-        """
-        Dummy constructor. Use copy_from* and create_from* class methods instead.
-        """
-        assert "data" in root
-        assert "meta" in root
-        assert "episode_ends" in root["meta"]
-        for value in root["data"].values():
-            assert value.shape[0] == root["meta"]["episode_ends"][-1]
-        self.root = root
-
-    # ============= create constructors ===============
-    @classmethod
-    def create_empty_zarr(cls, storage=None, root=None):
-        if root is None:
-            if storage is None:
-                storage = zarr.MemoryStore()
-            root = zarr.group(store=storage)
-        root.require_group("data", overwrite=False)
-        meta = root.require_group("meta", overwrite=False)
-        if "episode_ends" not in meta:
-            meta.zeros("episode_ends", shape=(0,), dtype=np.int64, compressor=None, overwrite=False)
-        return cls(root=root)
-
-    @classmethod
-    def create_empty_numpy(cls):
-        root = {"data": {}, "meta": {"episode_ends": np.zeros((0,), dtype=np.int64)}}
-        return cls(root=root)
-
-    @classmethod
-    def create_from_group(cls, group, **kwargs):
-        if "data" not in group:
-            # create from stratch
-            buffer = cls.create_empty_zarr(root=group, **kwargs)
-        else:
-            # already exist
-            buffer = cls(root=group, **kwargs)
-        return buffer
-
-    @classmethod
-    def create_from_path(cls, zarr_path, mode="r", **kwargs):
-        """
-        Open a on-disk zarr directly (for dataset larger than memory).
-        Slower.
-        """
-        group = zarr.open(os.path.expanduser(zarr_path), mode)
-        return cls.create_from_group(group, **kwargs)
-
-    # ============= copy constructors ===============
-    @classmethod
-    def copy_from_store(
-        cls,
-        src_store,
-        store=None,
-        keys=None,
-        chunks: dict[str, tuple] | None = None,
-        compressors: dict | str | numcodecs.abc.Codec | None = None,
-        if_exists="replace",
-        **kwargs,
-    ):
-        """
-        Load to memory.
-        """
-        src_root = zarr.group(src_store)
-        if chunks is None:
-            chunks = {}
-        if compressors is None:
-            compressors = {}
-        root = None
-        if store is None:
-            # numpy backend
-            meta = {}
-            for key, value in src_root["meta"].items():
-                if len(value.shape) == 0:
-                    meta[key] = np.array(value)
-                else:
-                    meta[key] = value[:]
-
-            if keys is None:
-                keys = src_root["data"].keys()
-            data = {}
-            for key in keys:
-                arr = src_root["data"][key]
-                data[key] = arr[:]
-
-            root = {"meta": meta, "data": data}
-        else:
-            root = zarr.group(store=store)
-            # copy without recompression
-            n_copied, n_skipped, n_bytes_copied = zarr.copy_store(
-                source=src_store, dest=store, source_path="/meta", dest_path="/meta", if_exists=if_exists
-            )
-            data_group = root.create_group("data", overwrite=True)
-            if keys is None:
-                keys = src_root["data"].keys()
-            for key in keys:
-                value = src_root["data"][key]
-                cks = cls._resolve_array_chunks(chunks=chunks, key=key, array=value)
-                cpr = cls._resolve_array_compressor(compressors=compressors, key=key, array=value)
-                if cks == value.chunks and cpr == value.compressor:
-                    # copy without recompression
-                    this_path = "/data/" + key
-                    n_copied, n_skipped, n_bytes_copied = zarr.copy_store(
-                        source=src_store,
-                        dest=store,
-                        source_path=this_path,
-                        dest_path=this_path,
-                        if_exists=if_exists,
-                    )
-                else:
-                    # copy with recompression
-                    n_copied, n_skipped, n_bytes_copied = zarr.copy(
-                        source=value,
-                        dest=data_group,
-                        name=key,
-                        chunks=cks,
-                        compressor=cpr,
-                        if_exists=if_exists,
-                    )
-        buffer = cls(root=root)
-        return buffer
-
-    @classmethod
-    def copy_from_path(
-        cls,
-        zarr_path,
-        backend=None,
-        store=None,
-        keys=None,
-        chunks: dict[str, tuple] | None = None,
-        compressors: dict | str | numcodecs.abc.Codec | None = None,
-        if_exists="replace",
-        **kwargs,
-    ):
-        """
-        Copy a on-disk zarr to in-memory compressed.
-        Recommended
-        """
-        if chunks is None:
-            chunks = {}
-        if compressors is None:
-            compressors = {}
-        if backend == "numpy":
-            print("backend argument is deprecated!")
-            store = None
-        group = zarr.open(os.path.expanduser(zarr_path), "r")
-        return cls.copy_from_store(
-            src_store=group.store,
-            store=store,
-            keys=keys,
-            chunks=chunks,
-            compressors=compressors,
-            if_exists=if_exists,
-            **kwargs,
-        )
-
-    # ============= save methods ===============
-    def save_to_store(
-        self,
-        store,
-        chunks: dict[str, tuple] | None = None,
-        compressors: str | numcodecs.abc.Codec | dict | None = None,
-        if_exists="replace",
-        **kwargs,
-    ):
-        root = zarr.group(store)
-        if chunks is None:
-            chunks = {}
-        if compressors is None:
-            compressors = {}
-        if self.backend == "zarr":
-            # recompression free copy
-            n_copied, n_skipped, n_bytes_copied = zarr.copy_store(
-                source=self.root.store,
-                dest=store,
-                source_path="/meta",
-                dest_path="/meta",
-                if_exists=if_exists,
-            )
-        else:
-            meta_group = root.create_group("meta", overwrite=True)
-            # save meta, no chunking
-            for key, value in self.root["meta"].items():
-                _ = meta_group.array(name=key, data=value, shape=value.shape, chunks=value.shape)
-
-        # save data, chunk
-        data_group = root.create_group("data", overwrite=True)
-        for key, value in self.root["data"].items():
-            cks = self._resolve_array_chunks(chunks=chunks, key=key, array=value)
-            cpr = self._resolve_array_compressor(compressors=compressors, key=key, array=value)
-            if isinstance(value, zarr.Array):
-                if cks == value.chunks and cpr == value.compressor:
-                    # copy without recompression
-                    this_path = "/data/" + key
-                    n_copied, n_skipped, n_bytes_copied = zarr.copy_store(
-                        source=self.root.store,
-                        dest=store,
-                        source_path=this_path,
-                        dest_path=this_path,
-                        if_exists=if_exists,
-                    )
-                else:
-                    # copy with recompression
-                    n_copied, n_skipped, n_bytes_copied = zarr.copy(
-                        source=value,
-                        dest=data_group,
-                        name=key,
-                        chunks=cks,
-                        compressor=cpr,
-                        if_exists=if_exists,
-                    )
-            else:
-                # numpy
-                _ = data_group.array(name=key, data=value, chunks=cks, compressor=cpr)
-        return store
-
-    def save_to_path(
-        self,
-        zarr_path,
-        chunks: dict[str, tuple] | None = None,
-        compressors: str | numcodecs.abc.Codec | dict | None = None,
-        if_exists="replace",
-        **kwargs,
-    ):
-        if chunks is None:
-            chunks = {}
-        if compressors is None:
-            compressors = {}
-        store = zarr.DirectoryStore(os.path.expanduser(zarr_path))
-        return self.save_to_store(
-            store, chunks=chunks, compressors=compressors, if_exists=if_exists, **kwargs
-        )
-
-    @staticmethod
-    def resolve_compressor(compressor="default"):
-        if compressor == "default":
-            compressor = numcodecs.Blosc(cname="lz4", clevel=5, shuffle=numcodecs.Blosc.NOSHUFFLE)
-        elif compressor == "disk":
-            compressor = numcodecs.Blosc("zstd", clevel=5, shuffle=numcodecs.Blosc.BITSHUFFLE)
-        return compressor
-
-    @classmethod
-    def _resolve_array_compressor(cls, compressors: dict | str | numcodecs.abc.Codec, key, array):
-        # allows compressor to be explicitly set to None
-        cpr = "nil"
-        if isinstance(compressors, dict):
-            if key in compressors:
-                cpr = cls.resolve_compressor(compressors[key])
-            elif isinstance(array, zarr.Array):
-                cpr = array.compressor
-        else:
-            cpr = cls.resolve_compressor(compressors)
-        # backup default
-        if cpr == "nil":
-            cpr = cls.resolve_compressor("default")
-        return cpr
-
-    @classmethod
-    def _resolve_array_chunks(cls, chunks: dict | tuple, key, array):
-        cks = None
-        if isinstance(chunks, dict):
-            if key in chunks:
-                cks = chunks[key]
-            elif isinstance(array, zarr.Array):
-                cks = array.chunks
-        elif isinstance(chunks, tuple):
-            cks = chunks
-        else:
-            raise TypeError(f"Unsupported chunks type {type(chunks)}")
-        # backup default
-        if cks is None:
-            cks = get_optimal_chunks(shape=array.shape, dtype=array.dtype)
-        # check
-        check_chunks_compatible(chunks=cks, shape=array.shape)
-        return cks
-
-    # ============= properties =================
-    @cached_property
-    def data(self):
-        return self.root["data"]
-
-    @cached_property
-    def meta(self):
-        return self.root["meta"]
-
-    def update_meta(self, data):
-        # sanitize data
-        np_data = {}
-        for key, value in data.items():
-            if isinstance(value, np.ndarray):
-                np_data[key] = value
-            else:
-                arr = np.array(value)
-                if arr.dtype == object:
-                    raise TypeError(f"Invalid value type {type(value)}")
-                np_data[key] = arr
-
-        meta_group = self.meta
-        if self.backend == "zarr":
-            for key, value in np_data.items():
-                _ = meta_group.array(
-                    name=key, data=value, shape=value.shape, chunks=value.shape, overwrite=True
-                )
-        else:
-            meta_group.update(np_data)
-
-        return meta_group
-
-    @property
-    def episode_ends(self):
-        return self.meta["episode_ends"]
-
-    def get_episode_idxs(self):
-        import numba
-
-        numba.jit(nopython=True)
-
-        def _get_episode_idxs(episode_ends):
-            result = np.zeros((episode_ends[-1],), dtype=np.int64)
-            for i in range(len(episode_ends)):
-                start = 0
-                if i > 0:
-                    start = episode_ends[i - 1]
-                end = episode_ends[i]
-                for idx in range(start, end):
-                    result[idx] = i
-            return result
-
-        return _get_episode_idxs(self.episode_ends)
-
-    @property
-    def backend(self):
-        backend = "numpy"
-        if isinstance(self.root, zarr.Group):
-            backend = "zarr"
-        return backend
-
-    # =========== dict-like API ==============
-    def __repr__(self) -> str:
-        if self.backend == "zarr":
-            return str(self.root.tree())
-        else:
-            return super().__repr__()
-
-    def keys(self):
-        return self.data.keys()
-
-    def values(self):
-        return self.data.values()
-
-    def items(self):
-        return self.data.items()
-
-    def __getitem__(self, key):
-        return self.data[key]
-
-    def __contains__(self, key):
-        return key in self.data
-
-    # =========== our API ==============
-    @property
-    def n_steps(self):
-        if len(self.episode_ends) == 0:
-            return 0
-        return self.episode_ends[-1]
-
-    @property
-    def n_episodes(self):
-        return len(self.episode_ends)
-
-    @property
-    def chunk_size(self):
-        if self.backend == "zarr":
-            return next(iter(self.data.arrays()))[-1].chunks[0]
-        return None
-
-    @property
-    def episode_lengths(self):
-        ends = self.episode_ends[:]
-        ends = np.insert(ends, 0, 0)
-        lengths = np.diff(ends)
-        return lengths
-
-    def add_episode(
-        self,
-        data: dict[str, np.ndarray],
-        chunks: dict[str, tuple] | None = None,
-        compressors: str | numcodecs.abc.Codec | dict | None = None,
-    ):
-        if chunks is None:
-            chunks = {}
-        if compressors is None:
-            compressors = {}
-        assert len(data) > 0
-        is_zarr = self.backend == "zarr"
-
-        curr_len = self.n_steps
-        episode_length = None
-        for value in data.values():
-            assert len(value.shape) >= 1
-            if episode_length is None:
-                episode_length = len(value)
-            else:
-                assert episode_length == len(value)
-        new_len = curr_len + episode_length
-
-        for key, value in data.items():
-            new_shape = (new_len,) + value.shape[1:]
-            # create array
-            if key not in self.data:
-                if is_zarr:
-                    cks = self._resolve_array_chunks(chunks=chunks, key=key, array=value)
-                    cpr = self._resolve_array_compressor(compressors=compressors, key=key, array=value)
-                    arr = self.data.zeros(
-                        name=key, shape=new_shape, chunks=cks, dtype=value.dtype, compressor=cpr
-                    )
-                else:
-                    # copy data to prevent modify
-                    arr = np.zeros(shape=new_shape, dtype=value.dtype)
-                    self.data[key] = arr
-            else:
-                arr = self.data[key]
-                assert value.shape[1:] == arr.shape[1:]
-                # same method for both zarr and numpy
-                if is_zarr:
-                    arr.resize(new_shape)
-                else:
-                    arr.resize(new_shape, refcheck=False)
-            # copy data
-            arr[-value.shape[0] :] = value
-
-        # append to episode ends
-        episode_ends = self.episode_ends
-        if is_zarr:
-            episode_ends.resize(episode_ends.shape[0] + 1)
-        else:
-            episode_ends.resize(episode_ends.shape[0] + 1, refcheck=False)
-        episode_ends[-1] = new_len
-
-        # rechunk
-        if is_zarr and episode_ends.chunks[0] < episode_ends.shape[0]:
-            rechunk_recompress_array(self.meta, "episode_ends", chunk_length=int(episode_ends.shape[0] * 1.5))
-
-    def drop_episode(self):
-        is_zarr = self.backend == "zarr"
-        episode_ends = self.episode_ends[:].copy()
-        assert len(episode_ends) > 0
-        start_idx = 0
-        if len(episode_ends) > 1:
-            start_idx = episode_ends[-2]
-        for value in self.data.values():
-            new_shape = (start_idx,) + value.shape[1:]
-            if is_zarr:
-                value.resize(new_shape)
-            else:
-                value.resize(new_shape, refcheck=False)
-        if is_zarr:
-            self.episode_ends.resize(len(episode_ends) - 1)
-        else:
-            self.episode_ends.resize(len(episode_ends) - 1, refcheck=False)
-
-    def pop_episode(self):
-        assert self.n_episodes > 0
-        episode = self.get_episode(self.n_episodes - 1, copy=True)
-        self.drop_episode()
-        return episode
-
-    def extend(self, data):
-        self.add_episode(data)
-
-    def get_episode(self, idx, copy=False):
-        idx = list(range(len(self.episode_ends)))[idx]
-        start_idx = 0
-        if idx > 0:
-            start_idx = self.episode_ends[idx - 1]
-        end_idx = self.episode_ends[idx]
-        result = self.get_steps_slice(start_idx, end_idx, copy=copy)
-        return result
-
-    def get_episode_slice(self, idx):
-        start_idx = 0
-        if idx > 0:
-            start_idx = self.episode_ends[idx - 1]
-        end_idx = self.episode_ends[idx]
-        return slice(start_idx, end_idx)
-
-    def get_steps_slice(self, start, stop, step=None, copy=False):
-        _slice = slice(start, stop, step)
-
-        result = {}
-        for key, value in self.data.items():
-            x = value[_slice]
-            if copy and isinstance(value, np.ndarray):
-                x = x.copy()
-            result[key] = x
-        return result
-
-    # =========== chunking =============
-    def get_chunks(self) -> dict:
-        assert self.backend == "zarr"
-        chunks = {}
-        for key, value in self.data.items():
-            chunks[key] = value.chunks
-        return chunks
-
-    def set_chunks(self, chunks: dict):
-        assert self.backend == "zarr"
-        for key, value in chunks.items():
-            if key in self.data:
-                arr = self.data[key]
-                if value != arr.chunks:
-                    check_chunks_compatible(chunks=value, shape=arr.shape)
-                    rechunk_recompress_array(self.data, key, chunks=value)
-
-    def get_compressors(self) -> dict:
-        assert self.backend == "zarr"
-        compressors = {}
-        for key, value in self.data.items():
-            compressors[key] = value.compressor
-        return compressors
-
-    def set_compressors(self, compressors: dict):
-        assert self.backend == "zarr"
-        for key, value in compressors.items():
-            if key in self.data:
-                arr = self.data[key]
-                compressor = self.resolve_compressor(value)
-                if compressor != arr.compressor:
-                    rechunk_recompress_array(self.data, key, compressor=compressor)
--- a/lerobot/common/policies/factory.py
+++ b/lerobot/common/policies/factory.py
@@ -1,6 +1,6 @@
 def make_policy(cfg):
    if cfg.policy.name == "tdmpc":
-        from lerobot.common.policies.tdmpc.policy import TDMPC
+        from lerobot.common.policies.tdmpc import TDMPC

        policy = TDMPC(cfg.policy, cfg.device)
    elif cfg.policy.name == "diffusion":
--- a/lerobot/common/policies/tdmpc/policy.py
+++ b/lerobot/common/policies/tdmpc/policy.py
@@ -8,7 +8,7 @@ import numpy as np
 import torch
 import torch.nn as nn

-import lerobot.common.policies.tdmpc.helper as h
+import lerobot.common.policies.tdmpc_helper as h

 FIRST_FRAME = 0

--- a/lerobot/common/policies/tdmpc_helper.py
+++ b/lerobot/common/policies/tdmpc_helper.py
--- a/lerobot/configs/default.yaml
+++ b/lerobot/configs/default.yaml
@@ -31,4 +31,5 @@ policy: ???
 wandb:
  enable: true
  project: lerobot
+  entity: rcadene  # insert your own
  notes: ""
--- a/lerobot/configs/policy/diffusion.yaml
+++ b/lerobot/configs/policy/diffusion.yaml
@@ -74,6 +74,7 @@ noise_scheduler:
  prediction_type: epsilon # or sample

 obs_encoder:
+  # _target_: diffusion_policy.model.vision.multi_image_obs_encoder.MultiImageObsEncoder
  shape_meta: ${shape_meta}
  # resize_shape: null
  # crop_shape: [76, 76]
@@ -84,12 +85,12 @@ obs_encoder:
  imagenet_norm: True

 rgb_model:
-  _target_: lerobot.common.policies.diffusion.pytorch_utils.get_resnet
+  _target_: diffusion_policy.model.vision.model_getter.get_resnet
  name: resnet18
  weights: null

 ema:
-  _target_: lerobot.common.policies.diffusion.model.ema_model.EMAModel
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
  update_after_step: 0
  inv_gamma: 1.0
  power: 0.75
--- a/lerobot/scripts/eval.py
+++ b/lerobot/scripts/eval.py
@@ -119,7 +119,7 @@ def eval(cfg: dict, out_dir=None):
    offline_buffer = make_offline_buffer(cfg)

    logging.info("make_env")
-    env = make_env(cfg, transform=offline_buffer.transform)
+    env = make_env(cfg, transform=offline_buffer._transform)

    if cfg.policy.pretrained_model_path:
        policy = make_policy(cfg)
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -143,11 +143,11 @@ def train(cfg: dict, out_dir=None, job_name=None):
        online_buffer = TensorDictReplayBuffer(
            storage=LazyMemmapStorage(100_000),
            sampler=online_sampler,
-            transform=offline_buffer.transform,
+            transform=offline_buffer._transform,
        )

    logging.info("make_env")
-    env = make_env(cfg, transform=offline_buffer.transform)
+    env = make_env(cfg, transform=offline_buffer._transform)

    logging.info("make_policy")
    policy = make_policy(cfg)
--- a/poetry.lock
+++ b/poetry.lock
@@ -340,69 +340,69 @@ files = [

 [[package]]
 name = "cython"
-version = "3.0.9"
+version = "3.0.8"
 description = "The Cython compiler for writing C extensions in the Python language."
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
-    {file = "Cython-3.0.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:296bd30d4445ac61b66c9d766567f6e81a6e262835d261e903c60c891a6729d3"},
-    {file = "Cython-3.0.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f496b52845cb45568a69d6359a2c335135233003e708ea02155c10ce3548aa89"},
-    {file = "Cython-3.0.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:858c3766b9aa3ab8a413392c72bbab1c144a9766b7c7bfdef64e2e414363fa0c"},
-    {file = "Cython-3.0.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c0eb1e6ef036028a52525fd9a012a556f6dd4788a0e8755fe864ba0e70cde2ff"},
-    {file = "Cython-3.0.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c8191941073ea5896321de3c8c958fd66e5f304b0cd1f22c59edd0b86c4dd90d"},
-    {file = "Cython-3.0.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e32b016030bc72a8a22a1f21f470a2f57573761a4f00fbfe8347263f4fbdb9f1"},
-    {file = "Cython-3.0.9-cp310-cp310-win32.whl", hash = "sha256:d6f3ff1cd6123973fe03e0fb8ee936622f976c0c41138969975824d08886572b"},
-    {file = "Cython-3.0.9-cp310-cp310-win_amd64.whl", hash = "sha256:56f3b643dbe14449248bbeb9a63fe3878a24256664bc8c8ef6efd45d102596d8"},
-    {file = "Cython-3.0.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:35e6665a20d6b8a152d72b7fd87dbb2af6bb6b18a235b71add68122d594dbd41"},
-    {file = "Cython-3.0.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92f4960c40ad027bd8c364c50db11104eadc59ffeb9e5b7f605ca2f05946e20"},
-    {file = "Cython-3.0.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38df37d0e732fbd9a2fef898788492e82b770c33d1e4ed12444bbc8a3b3f89c0"},
-    {file = "Cython-3.0.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad7fd88ebaeaf2e76fd729a8919fae80dab3d6ac0005e28494261d52ff347a8f"},
-    {file = "Cython-3.0.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1365d5f76bf4d19df3d19ce932584c9bb76e9fb096185168918ef9b36e06bfa4"},
-    {file = "Cython-3.0.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c232e7f279388ac9625c3e5a5a9f0078a9334959c5d6458052c65bbbba895e1e"},
-    {file = "Cython-3.0.9-cp311-cp311-win32.whl", hash = "sha256:357e2fad46a25030b0c0496487e01a9dc0fdd0c09df0897f554d8ba3c1bc4872"},
-    {file = "Cython-3.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:1315aee506506e8d69cf6631d8769e6b10131fdcc0eb66df2698f2a3ddaeeff2"},
-    {file = "Cython-3.0.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:157973807c2796addbed5fbc4d9c882ab34bbc60dc297ca729504901479d5df7"},
-    {file = "Cython-3.0.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00b105b5d050645dd59e6767bc0f18b48a4aa11c85f42ec7dd8181606f4059e3"},
-    {file = "Cython-3.0.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac5536d09bef240cae0416d5a703d298b74c7bbc397da803ac9d344e732d4369"},
-    {file = "Cython-3.0.9-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09c44501d476d16aaa4cbc29c87f8c0f54fc20e69b650d59cbfa4863426fc70c"},
-    {file = "Cython-3.0.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:cc9c3b9f20d8e298618e5ccd32083ca386e785b08f9893fbec4c50b6b85be772"},
-    {file = "Cython-3.0.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a30d96938c633e3ec37000ac3796525da71254ef109e66bdfd78f29891af6454"},
-    {file = "Cython-3.0.9-cp312-cp312-win32.whl", hash = "sha256:757ca93bdd80702546df4d610d2494ef2e74249cac4d5ba9464589fb464bd8a3"},
-    {file = "Cython-3.0.9-cp312-cp312-win_amd64.whl", hash = "sha256:1dc320a9905ab95414013f6de805efbff9e17bb5fb3b90bbac533f017bec8136"},
-    {file = "Cython-3.0.9-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4ae349960ebe0da0d33724eaa7f1eb866688fe5434cc67ce4dbc06d6a719fbfc"},
-    {file = "Cython-3.0.9-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63d2537bf688247f76ded6dee28ebd26274f019309aef1eb4f2f9c5c482fde2d"},
-    {file = "Cython-3.0.9-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36f5a2dfc724bea1f710b649f02d802d80fc18320c8e6396684ba4a48412445a"},
-    {file = "Cython-3.0.9-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:deaf4197d4b0bcd5714a497158ea96a2bd6d0f9636095437448f7e06453cc83d"},
-    {file = "Cython-3.0.9-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:000af6deb7412eb7ac0c635ff5e637fb8725dd0a7b88cc58dfc2b3de14e701c4"},
-    {file = "Cython-3.0.9-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:15c7f5c2d35bed9aa5f2a51eaac0df23ae72f2dbacf62fc672dd6bfaa75d2d6f"},
-    {file = "Cython-3.0.9-cp36-cp36m-win32.whl", hash = "sha256:f49aa4970cd3bec66ac22e701def16dca2a49c59cceba519898dd7526e0be2c0"},
-    {file = "Cython-3.0.9-cp36-cp36m-win_amd64.whl", hash = "sha256:4558814fa025b193058d42eeee498a53d6b04b2980d01339fc2444b23fd98e58"},
-    {file = "Cython-3.0.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:539cd1d74fd61f6cfc310fa6bbbad5adc144627f2b7486a07075d4e002fd6aad"},
-    {file = "Cython-3.0.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3232926cd406ee02eabb732206f6e882c3aed9d58f0fea764013d9240405bcf"},
-    {file = "Cython-3.0.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33b6ac376538a7fc8c567b85d3c71504308a9318702ec0485dd66c059f3165cb"},
-    {file = "Cython-3.0.9-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2cc92504b5d22ac66031ffb827bd3a967fc75a5f0f76ab48bce62df19be6fdfd"},
-    {file = "Cython-3.0.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:22b8fae756c5c0d8968691bed520876de452f216c28ec896a00739a12dba3bd9"},
-    {file = "Cython-3.0.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9cda0d92a09f3520f29bd91009f1194ba9600777c02c30c6d2d4ac65fb63e40d"},
-    {file = "Cython-3.0.9-cp37-cp37m-win32.whl", hash = "sha256:ec612418490941ed16c50c8d3784c7bdc4c4b2a10c361259871790b02ec8c1db"},
-    {file = "Cython-3.0.9-cp37-cp37m-win_amd64.whl", hash = "sha256:976c8d2bedc91ff6493fc973d38b2dc01020324039e2af0e049704a8e1b22936"},
-    {file = "Cython-3.0.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5055988b007c92256b6e9896441c3055556038c3497fcbf8c921a6c1fce90719"},
-    {file = "Cython-3.0.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9360606d964c2d0492a866464efcf9d0a92715644eede3f6a2aa696de54a137"},
-    {file = "Cython-3.0.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c6e809f060bed073dc7cba1648077fe3b68208863d517c8b39f3920eecf9dd"},
-    {file = "Cython-3.0.9-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95ed792c966f969cea7489c32ff90150b415c1f3567db8d5a9d489c7c1602dac"},
-    {file = "Cython-3.0.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8edd59d22950b400b03ca78d27dc694d2836a92ef0cac4f64cb4b2ff902f7e25"},
-    {file = "Cython-3.0.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4cf0ed273bf60e97922fcbbdd380c39693922a597760160b4b4355e6078ca188"},
-    {file = "Cython-3.0.9-cp38-cp38-win32.whl", hash = "sha256:5eb9bd4ae12ebb2bc79a193d95aacf090fbd8d7013e11ed5412711650cb34934"},
-    {file = "Cython-3.0.9-cp38-cp38-win_amd64.whl", hash = "sha256:44457279da56e0f829bb1fc5a5dc0836e5d498dbcf9b2324f32f7cc9d2ec6569"},
-    {file = "Cython-3.0.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4b419a1adc2af43f4660e2f6eaf1e4fac2dbac59490771eb8ac3d6063f22356"},
-    {file = "Cython-3.0.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f836192140f033b2319a0128936367c295c2b32e23df05b03b672a6015757ea"},
-    {file = "Cython-3.0.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fd198c1a7f8e9382904d622cc0efa3c184605881fd5262c64cbb7168c4c1ec5"},
-    {file = "Cython-3.0.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a274fe9ca5c53fafbcf5c8f262f8ad6896206a466f0eeb40aaf36a7951e957c0"},
-    {file = "Cython-3.0.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:158c38360bbc5063341b1e78d3737f1251050f89f58a3df0d10fb171c44262be"},
-    {file = "Cython-3.0.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8bf30b045f7deda0014b042c1b41c1d272facc762ab657529e3b05505888e878"},
-    {file = "Cython-3.0.9-cp39-cp39-win32.whl", hash = "sha256:9a001fd95c140c94d934078544ff60a3c46aca2dc86e75a76e4121d3cd1f4b33"},
-    {file = "Cython-3.0.9-cp39-cp39-win_amd64.whl", hash = "sha256:530c01c4aebba709c0ec9c7ecefe07177d0b9fd7ffee29450a118d92192ccbdf"},
-    {file = "Cython-3.0.9-py2.py3-none-any.whl", hash = "sha256:bf96417714353c5454c2e3238fca9338599330cf51625cdc1ca698684465646f"},
-    {file = "Cython-3.0.9.tar.gz", hash = "sha256:a2d354f059d1f055d34cfaa62c5b68bc78ac2ceab6407148d47fb508cf3ba4f3"},
+    {file = "Cython-3.0.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a846e0a38e2b24e9a5c5dc74b0e54c6e29420d88d1dafabc99e0fc0f3e338636"},
+    {file = "Cython-3.0.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45523fdc2b78d79b32834cc1cc12dc2ca8967af87e22a3ee1bff20e77c7f5520"},
+    {file = "Cython-3.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa0b7f3f841fe087410cab66778e2d3fb20ae2d2078a2be3dffe66c6574be39"},
+    {file = "Cython-3.0.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e87294e33e40c289c77a135f491cd721bd089f193f956f7b8ed5aa2d0b8c558f"},
+    {file = "Cython-3.0.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a1df7a129344b1215c20096d33c00193437df1a8fcca25b71f17c23b1a44f782"},
+    {file = "Cython-3.0.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:13c2a5e57a0358da467d97667297bf820b62a1a87ae47c5f87938b9bb593acbd"},
+    {file = "Cython-3.0.8-cp310-cp310-win32.whl", hash = "sha256:96b028f044f5880e3cb18ecdcfc6c8d3ce9d0af28418d5ab464509f26d8adf12"},
+    {file = "Cython-3.0.8-cp310-cp310-win_amd64.whl", hash = "sha256:8140597a8b5cc4f119a1190f5a2228a84f5ca6d8d9ec386cfce24663f48b2539"},
+    {file = "Cython-3.0.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aae26f9663e50caf9657148403d9874eea41770ecdd6caf381d177c2b1bb82ba"},
+    {file = "Cython-3.0.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:547eb3cdb2f8c6f48e6865d5a741d9dd051c25b3ce076fbca571727977b28ac3"},
+    {file = "Cython-3.0.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a567d4b9ba70b26db89d75b243529de9e649a2f56384287533cf91512705bee"},
+    {file = "Cython-3.0.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:51d1426263b0e82fb22bda8ea60dc77a428581cc19e97741011b938445d383f1"},
+    {file = "Cython-3.0.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c26daaeccda072459b48d211415fd1e5507c06bcd976fa0d5b8b9f1063467d7b"},
+    {file = "Cython-3.0.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:289ce7838208211cd166e975865fd73b0649bf118170b6cebaedfbdaf4a37795"},
+    {file = "Cython-3.0.8-cp311-cp311-win32.whl", hash = "sha256:c8aa05f5e17f8042a3be052c24f2edc013fb8af874b0bf76907d16c51b4e7871"},
+    {file = "Cython-3.0.8-cp311-cp311-win_amd64.whl", hash = "sha256:000dc9e135d0eec6ecb2b40a5b02d0868a2f8d2e027a41b0fe16a908a9e6de02"},
+    {file = "Cython-3.0.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:90d3fe31db55685d8cb97d43b0ec39ef614fcf660f83c77ed06aa670cb0e164f"},
+    {file = "Cython-3.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e24791ddae2324e88e3c902a765595c738f19ae34ee66bfb1a6dac54b1833419"},
+    {file = "Cython-3.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f020fa1c0552052e0660790b8153b79e3fc9a15dbd8f1d0b841fe5d204a6ae6"},
+    {file = "Cython-3.0.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18bfa387d7a7f77d7b2526af69a65dbd0b731b8d941aaff5becff8e21f6d7717"},
+    {file = "Cython-3.0.8-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fe81b339cffd87c0069c6049b4d33e28bdd1874625ee515785bf42c9fdff3658"},
+    {file = "Cython-3.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:80fd94c076e1e1b1ee40a309be03080b75f413e8997cddcf401a118879863388"},
+    {file = "Cython-3.0.8-cp312-cp312-win32.whl", hash = "sha256:85077915a93e359a9b920280d214dc0cf8a62773e1f3d7d30fab8ea4daed670c"},
+    {file = "Cython-3.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:0cb2dcc565c7851f75d496f724a384a790fab12d1b82461b663e66605bec429a"},
+    {file = "Cython-3.0.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:870d2a0a7e3cbd5efa65aecdb38d715ea337a904ea7bb22324036e78fb7068e7"},
+    {file = "Cython-3.0.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e8f2454128974905258d86534f4fd4f91d2f1343605657ecab779d80c9d6d5e"},
+    {file = "Cython-3.0.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1949d6aa7bc792554bee2b67a9fe41008acbfe22f4f8df7b6ec7b799613a4b3"},
+    {file = "Cython-3.0.8-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9f2c6e1b8f3bcd6cb230bac1843f85114780bb8be8614855b1628b36bb510e0"},
+    {file = "Cython-3.0.8-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:05d7eddc668ae7993643f32c7661f25544e791edb745758672ea5b1a82ecffa6"},
+    {file = "Cython-3.0.8-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bfabe115deef4ada5d23c87bddb11289123336dcc14347011832c07db616dd93"},
+    {file = "Cython-3.0.8-cp36-cp36m-win32.whl", hash = "sha256:0c38c9f0bcce2df0c3347285863621be904ac6b64c5792d871130569d893efd7"},
+    {file = "Cython-3.0.8-cp36-cp36m-win_amd64.whl", hash = "sha256:6c46939c3983217d140999de7c238c3141f56b1ea349e47ca49cae899969aa2c"},
+    {file = "Cython-3.0.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:115f0a50f752da6c99941b103b5cb090da63eb206abbc7c2ad33856ffc73f064"},
+    {file = "Cython-3.0.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9c0f29246734561c90f36e70ed0506b61aa3d044e4cc4cba559065a2a741fae"},
+    {file = "Cython-3.0.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ab75242869ff71e5665fe5c96f3378e79e792fa3c11762641b6c5afbbbbe026"},
+    {file = "Cython-3.0.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6717c06e9cfc6c1df18543cd31a21f5d8e378a40f70c851fa2d34f0597037abc"},
+    {file = "Cython-3.0.8-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9d3f74388db378a3c6fd06e79a809ed98df3f56484d317b81ee762dbf3c263e0"},
+    {file = "Cython-3.0.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ae7ac561fd8253a9ae96311e91d12af5f701383564edc11d6338a7b60b285a6f"},
+    {file = "Cython-3.0.8-cp37-cp37m-win32.whl", hash = "sha256:97b2a45845b993304f1799664fa88da676ee19442b15fdcaa31f9da7e1acc434"},
+    {file = "Cython-3.0.8-cp37-cp37m-win_amd64.whl", hash = "sha256:9e2be2b340fea46fb849d378f9b80d3c08ff2e81e2bfbcdb656e2e3cd8c6b2dc"},
+    {file = "Cython-3.0.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2cde23c555470db3f149ede78b518e8274853745289c956a0e06ad8d982e4db9"},
+    {file = "Cython-3.0.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7990ca127e1f1beedaf8fc8bf66541d066ef4723ad7d8d47a7cbf842e0f47580"},
+    {file = "Cython-3.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b983c8e6803f016146c26854d9150ddad5662960c804ea7f0c752c9266752f0"},
+    {file = "Cython-3.0.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a973268d7ca1a2bdf78575e459a94a78e1a0a9bb62a7db0c50041949a73b02ff"},
+    {file = "Cython-3.0.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:61a237bc9dd23c7faef0fcfce88c11c65d0c9bb73c74ccfa408b3a012073c20e"},
+    {file = "Cython-3.0.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3a3d67f079598af49e90ff9655bf85bd358f093d727eb21ca2708f467c489cae"},
+    {file = "Cython-3.0.8-cp38-cp38-win32.whl", hash = "sha256:17a642bb01a693e34c914106566f59844b4461665066613913463a719e0dd15d"},
+    {file = "Cython-3.0.8-cp38-cp38-win_amd64.whl", hash = "sha256:2cdfc32252f3b6dc7c94032ab744dcedb45286733443c294d8f909a4854e7f83"},
+    {file = "Cython-3.0.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa97893d99385386925d00074654aeae3a98867f298d1e12ceaf38a9054a9bae"},
+    {file = "Cython-3.0.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f05c0bf9d085c031df8f583f0d506aa3be1692023de18c45d0aaf78685bbb944"},
+    {file = "Cython-3.0.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de892422582f5758bd8de187e98ac829330ec1007bc42c661f687792999988a7"},
+    {file = "Cython-3.0.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:314f2355a1f1d06e3c431eaad4708cf10037b5e91e4b231d89c913989d0bdafd"},
+    {file = "Cython-3.0.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:78825a3774211e7d5089730f00cdf7f473042acc9ceb8b9eeebe13ed3a5541de"},
+    {file = "Cython-3.0.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:df8093deabc55f37028190cf5e575c26aad23fc673f34b85d5f45076bc37ce39"},
+    {file = "Cython-3.0.8-cp39-cp39-win32.whl", hash = "sha256:1aca1b97e0095b3a9a6c33eada3f661a4ed0d499067d121239b193e5ba3bb4f0"},
+    {file = "Cython-3.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:16873d78be63bd38ffb759da7ab82814b36f56c769ee02b1d5859560e4c3ac3c"},
+    {file = "Cython-3.0.8-py2.py3-none-any.whl", hash = "sha256:171b27051253d3f9108e9759e504ba59ff06e7f7ba944457f94deaf9c21bf0b6"},
+    {file = "Cython-3.0.8.tar.gz", hash = "sha256:8333423d8fd5765e7cceea3a9985dd1e0a5dfeb2734629e1a2ed2d6233d39de6"},
 ]

 [[package]]
@@ -477,6 +477,21 @@ test = ["GitPython (<3.1.19)", "Jinja2", "compel (==0.1.8)", "datasets", "invisi
 torch = ["accelerate (>=0.11.0)", "torch (>=1.4,<2.2.0)"]
 training = ["Jinja2", "accelerate (>=0.11.0)", "datasets", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "tensorboard"]

+[[package]]
+name = "diffusion_policy"
+version = "0.0.0"
+description = ""
+optional = false
+python-versions = "*"
+files = []
+develop = false
+
+[package.source]
+type = "git"
+url = "https://github.com/real-stanford/diffusion_policy"
+reference = "HEAD"
+resolved_reference = "548a52bbb105518058e27bf34dcf90bf6f73681a"
+
 [[package]]
 name = "distlib"
 version = "0.3.8"
@@ -838,93 +853,20 @@ files = [
 [package.dependencies]
 numpy = ">=1.17.3"

-[[package]]
-name = "hf-transfer"
-version = "0.1.6"
-description = ""
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "hf_transfer-0.1.6-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:6fd3d61f9229d27def007e53540412507b74ac2fdb1a29985ae0b6a5137749a2"},
-    {file = "hf_transfer-0.1.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b043bb78df1225de043eb041de9d97783fcca14a0bdc1b1d560fc172fc21b648"},
-    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7db60dd18eae4fa6ea157235fb82196cde5313995b396d1b591aad3b790a7f8f"},
-    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:30d31dbab9b5a558cce407b8728e39d87d7af1ef8745ddb90187e9ae0b9e1e90"},
-    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f6b368bddd757efc7af3126ba81f9ac8f9435e2cc00902cb3d64f2be28d8f719"},
-    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa2086d8aefaaa3e144e167324574882004c0cec49bf2d0638ec4b74732d8da0"},
-    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:45d8985a0940bfe1535cb4ca781f5c11e47c83798ef3373ee1f5d57bbe527a9c"},
-    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f42b89735f1cde22f2a795d1f0915741023235666be7de45879e533c7d6010c"},
-    {file = "hf_transfer-0.1.6-cp310-none-win32.whl", hash = "sha256:2d2c4c4613f3ad45b6ce6291e347b2d3ba1b86816635681436567e461cb3c961"},
-    {file = "hf_transfer-0.1.6-cp310-none-win_amd64.whl", hash = "sha256:78b0eed8d8dce60168a46e584b9742b816af127d7e410a713e12c31249195342"},
-    {file = "hf_transfer-0.1.6-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f1d8c172153f9a6cdaecf137612c42796076f61f6bea1072c90ac2e17c1ab6fa"},
-    {file = "hf_transfer-0.1.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2c601996351f90c514a75a0eeb02bf700b1ad1db2d946cbfe4b60b79e29f0b2f"},
-    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e585c808405557d3f5488f385706abb696997bbae262ea04520757e30836d9d"},
-    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ec51af1e8cf4268c268bd88932ade3d7ca895a3c661b42493503f02610ae906b"},
-    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d106fdf996332f6df3ed3fab6d6332df82e8c1fb4b20fd81a491ca4d2ab5616a"},
-    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e9c2ee9e9fde5a0319cc0e8ddfea10897482bc06d5709b10a238f1bc2ebcbc0b"},
-    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f394ea32bc7802b061e549d3133efc523b4ae4fd19bf4b74b183ca6066eef94e"},
-    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4282f09902114cd67fca98a1a1bad569a44521a8395fedf327e966714f68b977"},
-    {file = "hf_transfer-0.1.6-cp311-none-win32.whl", hash = "sha256:276dbf307d5ab6f1bcbf57b5918bfcf9c59d6848ccb28242349e1bb5985f983b"},
-    {file = "hf_transfer-0.1.6-cp311-none-win_amd64.whl", hash = "sha256:fa475175c51451186bea804471995fa8e7b2a48a61dcca55534911dc25955527"},
-    {file = "hf_transfer-0.1.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:23d157a67acfa00007799323a1c441b2bbacc7dee625b016b7946fe0e25e6c89"},
-    {file = "hf_transfer-0.1.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6067342a2864b988f861cd2d31bd78eb1e84d153a3f6df38485b6696d9ad3013"},
-    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91cfcb3070e205b58fa8dc8bcb6a62ccc40913fcdb9cd1ff7c364c8e3aa85345"},
-    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb76064ac5165d5eeaaf8d0903e8bf55477221ecc2a4a4d69f0baca065ab905b"},
-    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dabd3a177d83028f164984cf4dd859f77ec1e20c97a6f307ff8fcada0785ef1"},
-    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d0bf4254e44f64a26e0a5b73b5d7e8d91bb36870718fb4f8e126ec943ff4c805"},
-    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d32c1b106f38f336ceb21531f4db9b57d777b9a33017dafdb6a5316388ebe50"},
-    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff05aba3c83921e5c7635ba9f07c693cc893350c447644824043aeac27b285f5"},
-    {file = "hf_transfer-0.1.6-cp312-none-win32.whl", hash = "sha256:051ef0c55607652cb5974f59638da035773254b9a07d7ee5b574fe062de4c9d1"},
-    {file = "hf_transfer-0.1.6-cp312-none-win_amd64.whl", hash = "sha256:716fb5c574fcbdd8092ce73f9b6c66f42e3544337490f77c60ec07df02bd081b"},
-    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c0c981134a55965e279cb7be778c1ccaf93f902fc9ebe31da4f30caf824cc4d"},
-    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ef1f145f04c5b573915bcb1eb5db4039c74f6b46fce73fc473c4287e613b623"},
-    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0a7609b004db3347dbb7796df45403eceb171238210d054d93897d6d84c63a4"},
-    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60f0864bf5996773dbd5f8ae4d1649041f773fe9d5769f4c0eeb5553100acef3"},
-    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d01e55d630ffe70a4f5d0ed576a04c6a48d7c65ca9a7d18f2fca385f20685a9"},
-    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d855946c5062b665190de15b2bdbd4c8eddfee35350bfb7564592e23d36fbbd3"},
-    {file = "hf_transfer-0.1.6-cp37-none-win32.whl", hash = "sha256:fd40b2409cfaf3e8aba20169ee09552f69140e029adeec261b988903ff0c8f6f"},
-    {file = "hf_transfer-0.1.6-cp37-none-win_amd64.whl", hash = "sha256:0e0eba49d46d3b5481919aea0794aec625fbc6ecdf13fe7e0e9f3fc5d5ad5971"},
-    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e669fecb29fc454449739f9f53ed9253197e7c19e6a6eaa0f08334207af4287"},
-    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:89f701802892e5eb84f89f402686861f87dc227d6082b05f4e9d9b4e8015a3c3"},
-    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6f2b0c8b95b01409275d789a9b74d5f2e146346f985d384bf50ec727caf1ccc"},
-    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa855a2fa262792a230f9efcdb5da6d431b747d1861d2a69fe7834b19aea077e"},
-    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa8ca349afb2f0713475426946261eb2035e4efb50ebd2c1d5ad04f395f4217"},
-    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01255f043996bc7d1bae62d8afc5033a90c7e36ce308b988eeb84afe0a69562f"},
-    {file = "hf_transfer-0.1.6-cp38-none-win32.whl", hash = "sha256:60b1db183e8a7540cd4f8b2160ff4de55f77cb0c3fc6a10be1e7c30eb1b2bdeb"},
-    {file = "hf_transfer-0.1.6-cp38-none-win_amd64.whl", hash = "sha256:fb8be3cba6aaa50ab2e9dffbd25c8eb2046785eeff642cf0cdd0dd9ae6be3539"},
-    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d09af35e3e3f09b664e6429e9a0dc200f29c5bdfd88bdd9666de51183b1fe202"},
-    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a4505bd707cc14d85c800f961fad8ca76f804a8ad22fbb7b1a217d8d0c15e6a5"},
-    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c453fd8b0be9740faa23cecd1f28ee9ead7d900cefa64ff836960c503a744c9"},
-    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13cb8884e718a78c3b81a8cdec9c7ac196dd42961fce55c3ccff3dd783e5ad7a"},
-    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39cd39df171a2b5404de69c4e6cd14eee47f6fe91c1692f939bfb9e59a0110d8"},
-    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ff0629ee9f98df57a783599602eb498f9ec3619dc69348b12e4d9d754abf0e9"},
-    {file = "hf_transfer-0.1.6-cp39-none-win32.whl", hash = "sha256:164a6ce445eb0cc7c645f5b6e1042c003d33292520c90052b6325f30c98e4c5f"},
-    {file = "hf_transfer-0.1.6-cp39-none-win_amd64.whl", hash = "sha256:11b8b4b73bf455f13218c5f827698a30ae10998ca31b8264b51052868c7a9f11"},
-    {file = "hf_transfer-0.1.6-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16957ba057376a99ea361074ce1094f61b58e769defa6be2422ae59c0b6a6530"},
-    {file = "hf_transfer-0.1.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7db952112e3b8ee1a5cbf500d2443e9ce4fb893281c5310a3e31469898628005"},
-    {file = "hf_transfer-0.1.6-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d39d826a7344f5e39f438d62632acd00467aa54a083b66496f61ef67a9885a56"},
-    {file = "hf_transfer-0.1.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4e2653fbfa92e7651db73d99b697c8684e7345c479bd6857da80bed6138abb2"},
-    {file = "hf_transfer-0.1.6-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:144277e6a86add10b90ec3b583253aec777130312256bfc8d5ade5377e253807"},
-    {file = "hf_transfer-0.1.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bb53bcd16365313b2aa0dbdc28206f577d70770f31249cdabc387ac5841edcc"},
-    {file = "hf_transfer-0.1.6-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:990d73a5a68d8261980f146c51f4c5f9995314011cb225222021ad7c39f3af2d"},
-    {file = "hf_transfer-0.1.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:652406037029ab9b4097b4c5f29321bad5f64c2b46fbff142509d918aec87c29"},
-    {file = "hf_transfer-0.1.6.tar.gz", hash = "sha256:deb505a7d417d7055fd7b3549eadb91dfe782941261f3344025c486c16d1d2f9"},
-]
-
 [[package]]
 name = "huggingface-hub"
-version = "0.21.4"
+version = "0.21.3"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.21.4-py3-none-any.whl", hash = "sha256:df37c2c37fc6c82163cdd8a67ede261687d80d1e262526d6c0ce73b6b3630a7b"},
-    {file = "huggingface_hub-0.21.4.tar.gz", hash = "sha256:e1f4968c93726565a80edf6dc309763c7b546d0cfe79aa221206034d50155531"},
+    {file = "huggingface_hub-0.21.3-py3-none-any.whl", hash = "sha256:b183144336fdf2810a8c109822e0bb6ef1fd61c65da6fb60e8c3f658b7144016"},
+    {file = "huggingface_hub-0.21.3.tar.gz", hash = "sha256:26a15b604e4fc7bad37c467b76456543ec849386cbca9cd7e1e135f53e500423"},
 ]

 [package.dependencies]
 filelock = "*"
 fsspec = ">=2023.5.0"
-hf-transfer = {version = ">=0.1.4", optional = true, markers = "extra == \"hf_transfer\""}
 packaging = ">=20.9"
 pyyaml = ">=5.1"
 requests = "*"
@@ -1038,22 +980,22 @@ setuptools = "*"

 [[package]]
 name = "importlib-metadata"
-version = "7.0.2"
+version = "7.0.1"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "importlib_metadata-7.0.2-py3-none-any.whl", hash = "sha256:f4bc4c0c070c490abf4ce96d715f68e95923320370efb66143df00199bb6c100"},
-    {file = "importlib_metadata-7.0.2.tar.gz", hash = "sha256:198f568f3230878cb1b44fbd7975f87906c22336dba2e4a7f05278c281fbd792"},
+    {file = "importlib_metadata-7.0.1-py3-none-any.whl", hash = "sha256:4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e"},
+    {file = "importlib_metadata-7.0.1.tar.gz", hash = "sha256:f238736bb06590ae52ac1fab06a3a9ef1d8dce2b7a35b5ab329371d6c8f5d2cc"},
 ]

 [package.dependencies]
 zipp = ">=0.5"

 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
 perf = ["ipython"]
-testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"]
+testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"]

 [[package]]
 name = "iniconfig"
@@ -1706,13 +1648,13 @@ files = [

 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.4.99"
+version = "12.3.101"
 description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nvjitlink_cu12-12.4.99-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c6428836d20fe7e327191c175791d38570e10762edc588fb46749217cd444c74"},
-    {file = "nvidia_nvjitlink_cu12-12.4.99-py3-none-win_amd64.whl", hash = "sha256:991905ffa2144cb603d8ca7962d75c35334ae82bf92820b6ba78157277da1ad2"},
+    {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"},
+    {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"},
 ]

 [[package]]
@@ -1767,13 +1709,13 @@ numpy = [

 [[package]]
 name = "packaging"
-version = "24.0"
+version = "23.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"},
-    {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
+    {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
+    {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
 ]

 [[package]]
@@ -2231,13 +2173,13 @@ files = [

 [[package]]
 name = "pytest"
-version = "8.1.1"
+version = "8.1.0"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pytest-8.1.1-py3-none-any.whl", hash = "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7"},
-    {file = "pytest-8.1.1.tar.gz", hash = "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044"},
+    {file = "pytest-8.1.0-py3-none-any.whl", hash = "sha256:ee32db7af8de4629a455806befa90559f307424c07b8413ccfc30bf5b221dd7e"},
+    {file = "pytest-8.1.0.tar.gz", hash = "sha256:f8fa04ab8f98d185113ae60ea6d79c22f8143b14bc1caeced44a0ab844928323"},
 ]

 [package.dependencies]
@@ -2253,13 +2195,13 @@ testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygm

 [[package]]
 name = "python-dateutil"
-version = "2.9.0.post0"
+version = "2.8.2"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
-    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
-    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
+    {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
+    {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
 ]

 [package.dependencies]
@@ -2684,13 +2626,13 @@ test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov",

 [[package]]
 name = "sentry-sdk"
-version = "1.41.0"
+version = "1.40.6"
 description = "Python client for Sentry (https://sentry.io)"
 optional = false
 python-versions = "*"
 files = [
-    {file = "sentry-sdk-1.41.0.tar.gz", hash = "sha256:4f2d6c43c07925d8cd10dfbd0970ea7cb784f70e79523cca9dbcd72df38e5a46"},
-    {file = "sentry_sdk-1.41.0-py2.py3-none-any.whl", hash = "sha256:be4f8f4b29a80b6a3b71f0f31487beb9e296391da20af8504498a328befed53f"},
+    {file = "sentry-sdk-1.40.6.tar.gz", hash = "sha256:f143f3fb4bb57c90abef6e2ad06b5f6f02b2ca13e4060ec5c0549c7a9ccce3fa"},
+    {file = "sentry_sdk-1.40.6-py2.py3-none-any.whl", hash = "sha256:becda09660df63e55f307570e9817c664392655a7328bbc414b507e9cb874c67"},
 ]

 [package.dependencies]
@@ -2716,7 +2658,7 @@ huey = ["huey (>=2)"]
 loguru = ["loguru (>=0.5)"]
 opentelemetry = ["opentelemetry-distro (>=0.35b0)"]
 opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"]
-pure-eval = ["asttokens", "executing", "pure-eval"]
+pure-eval = ["asttokens", "executing", "pure_eval"]
 pymongo = ["pymongo (>=3.1)"]
 pyspark = ["pyspark (>=2.4.4)"]
 quart = ["blinker (>=1.1)", "quart (>=0.16.1)"]
@@ -2970,7 +2912,7 @@ tests = ["pytest", "pytest-benchmark", "pytest-instafail", "pytest-rerunfailures
 type = "git"
 url = "https://github.com/pytorch/tensordict"
 reference = "HEAD"
-resolved_reference = "ed22554d6860731610df784b2f5d09f31d3dbc7a"
+resolved_reference = "551331d83e2979dd4505db1f49895740e6e5c95f"

 [[package]]
 name = "termcolor"
@@ -3089,13 +3031,13 @@ tensordict = ">=0.4.0"
 torch = ">=2.1.0"

 [package.extras]
-all = ["ale-py", "atari-py", "dm_control", "git", "gym", "gym[accept-rom-license]", "gymnasium", "h5py", "huggingface_hub", "hydra-core (>=1.1)", "hydra-submitit-launcher", "minari", "moviepy", "mujoco", "pandas", "pettingzoo (>=1.24.1)", "pillow", "pygame", "pytest", "pytest-instafail", "pyyaml", "requests", "scikit-learn", "scipy", "tensorboard", "torchsnapshot", "torchvision", "tqdm", "vmas (>=1.2.10)", "wandb"]
+all = ["ale-py", "atari-py", "dm-control", "git", "gym", "gym[accept-rom-license]", "gymnasium", "h5py", "huggingface-hub", "hydra-core (>=1.1)", "hydra-submitit-launcher", "minari", "moviepy", "mujoco", "pandas", "pettingzoo (>=1.24.1)", "pillow", "pygame", "pytest", "pytest-instafail", "pyyaml", "requests", "scikit-learn", "scipy", "tensorboard", "torchsnapshot", "torchvision", "tqdm", "vmas (>=1.2.10)", "wandb"]
 atari = ["ale-py", "atari-py", "gym", "gym[accept-rom-license]", "pygame"]
 checkpointing = ["torchsnapshot"]
-dm-control = ["dm_control"]
+dm-control = ["dm-control"]
 gym-continuous = ["gymnasium", "mujoco"]
 marl = ["pettingzoo (>=1.24.1)", "vmas (>=1.2.10)"]
-offline-data = ["h5py", "huggingface_hub", "minari", "pandas", "pillow", "requests", "scikit-learn", "torchvision", "tqdm"]
+offline-data = ["h5py", "huggingface-hub", "minari", "pandas", "pillow", "requests", "scikit-learn", "torchvision", "tqdm"]
 rendering = ["moviepy"]
 tests = ["pytest", "pytest-instafail", "pyyaml", "scipy"]
 utils = ["git", "hydra-core (>=1.1)", "hydra-submitit-launcher", "tensorboard", "tqdm", "wandb"]
@@ -3252,13 +3194,13 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess

 [[package]]
 name = "wandb"
-version = "0.16.4"
+version = "0.16.3"
 description = "A CLI and library for interacting with the Weights & Biases API."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "wandb-0.16.4-py3-none-any.whl", hash = "sha256:bb9eb5aa2c2c85e11c76040c4271366f54d4975167aa6320ba86c3f2d97fe5fa"},
-    {file = "wandb-0.16.4.tar.gz", hash = "sha256:8752c67d1347a4c29777e64dc1e1a742a66c5ecde03aebadf2b0d62183fa307c"},
+    {file = "wandb-0.16.3-py3-none-any.whl", hash = "sha256:b8907ddd775c27dc6c12687386a86b5d6acf291060f9ae680bbc61cc8fc03237"},
+    {file = "wandb-0.16.3.tar.gz", hash = "sha256:d789acda32053b18b7a160d0595837e45a3c8a79d25e1fe1f051875303f480ec"},
 ]

 [package.dependencies]
@@ -3279,9 +3221,8 @@ async = ["httpx (>=0.23.0)"]
 aws = ["boto3"]
 azure = ["azure-identity", "azure-storage-blob"]
 gcp = ["google-cloud-storage"]
-importers = ["filelock", "mlflow", "polars", "rich", "tenacity"]
 kubeflow = ["google-cloud-storage", "kubernetes", "minio", "sh"]
-launch = ["PyYAML (>=6.0.0)", "awscli", "azure-containerregistry", "azure-identity", "azure-storage-blob", "boto3", "botocore", "chardet", "google-auth", "google-cloud-aiplatform", "google-cloud-artifact-registry", "google-cloud-compute", "google-cloud-storage", "iso8601", "kubernetes", "kubernetes-asyncio", "nbconvert", "nbformat", "optuna", "pydantic", "tomli", "typing-extensions"]
+launch = ["PyYAML (>=6.0.0)", "awscli", "azure-containerregistry", "azure-identity", "azure-storage-blob", "boto3", "botocore", "chardet", "google-auth", "google-cloud-aiplatform", "google-cloud-artifact-registry", "google-cloud-compute", "google-cloud-storage", "iso8601", "kubernetes", "kubernetes-asyncio", "nbconvert", "nbformat", "optuna", "pydantic", "typing-extensions"]
 media = ["bokeh", "moviepy", "numpy", "pillow", "plotly (>=5.18.0)", "rdkit-pypi", "soundfile"]
 models = ["cloudpickle"]
 perf = ["orjson"]
@@ -3290,13 +3231,13 @@ sweeps = ["sweeps (>=0.2.0)"]

 [[package]]
 name = "zarr"
-version = "2.17.1"
+version = "2.17.0"
 description = "An implementation of chunked, compressed, N-dimensional arrays for Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "zarr-2.17.1-py3-none-any.whl", hash = "sha256:e25df2741a6e92645f3890f30f3136d5b57a0f8f831094b024bbcab5f2797bc7"},
-    {file = "zarr-2.17.1.tar.gz", hash = "sha256:564b3aa072122546fe69a0fa21736f466b20fad41754334b62619f088ce46261"},
+    {file = "zarr-2.17.0-py3-none-any.whl", hash = "sha256:d287cb61019c4a0a0f386f76eeaa7f0b1160b1cb90cf96173a4b6cbc135df6e1"},
+    {file = "zarr-2.17.0.tar.gz", hash = "sha256:6390a2b8af31babaab4c963efc45bf1da7f9500c9aafac193f84cf019a7c66b0"},
 ]

 [package.dependencies]
@@ -3327,4 +3268,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "ee86b84a795e6a3e9c2d79f244a87b55589adbe46d549ac38adf48be27c04cf9"
+content-hash = "84cda58ab0670dcb1e2429b342f4f1b3c35f261d1201fc17acad5cc1ef2c6aa8"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,6 @@ classifiers=[
 ]
 packages = [{include = "lerobot"}]

-
 [tool.poetry.dependencies]
 python = "^3.10"
 cython = "^3.0.8"
@@ -46,11 +45,11 @@ mujoco = "2.3.7"
 mujoco-py = "^2.1.2.14"
 gym = "^0.26.2"
 opencv-python = "^4.9.0.80"
+diffusion-policy = {git = "https://github.com/real-stanford/diffusion_policy"}
 diffusers = "^0.26.3"
 torchvision = "^0.17.1"
 h5py = "^3.10.0"
 dm-control = "1.0.14"
-huggingface-hub = {extras = ["hf-transfer"], version = "^0.21.4"}


 [tool.poetry.group.dev.dependencies]
@@ -58,6 +57,9 @@ pre-commit = "^3.6.2"
 debugpy = "^1.8.1"
 pytest = "^8.1.0"

+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

 [tool.ruff]
 line-length = 110
@@ -86,15 +88,5 @@ exclude = [
    "venv",
 ]

-
 [tool.ruff.lint]
 select = ["E4", "E7", "E9", "F", "I", "N", "B", "C4", "SIM"]
-
-
-[tool.poetry-dynamic-versioning]
-enable = true
-
-
-[build-system]
-requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]
-build-backend = "poetry_dynamic_versioning.backend"
--- a/tests/data/aloha_sim_insertion_human/action.memmap
+++ b/tests/data/aloha_sim_insertion_human/action.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d789deddb081a9f4b626342391de8f48949d38fb5fdead87b5c0737b46c0877a
-size 2800
--- a/tests/data/aloha_sim_insertion_human/episode.memmap
+++ b/tests/data/aloha_sim_insertion_human/episode.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7a12e561363385e9dfeeab326368731c030ed4b374e7f5897ac819159d2884c5
-size 400
--- a/tests/data/aloha_sim_insertion_human/frame_id.memmap
+++ b/tests/data/aloha_sim_insertion_human/frame_id.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c202d9cfc7858fd49d522047e16948359bbbb2eda2d3825d552e45a78d5f8585
-size 400
--- a/tests/data/aloha_sim_insertion_human/meta.json
+++ b/tests/data/aloha_sim_insertion_human/meta.json
@@ -1 +0,0 @@
-{"action": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "episode": {"device": "cpu", "shape": [50], "dtype": "torch.int64"}, "frame_id": {"device": "cpu", "shape": [50], "dtype": "torch.int64"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_human/next/done.memmap
+++ b/tests/data/aloha_sim_insertion_human/next/done.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cc2786e1f9910a9d811400edcddaf7075195f7a16b216dcbefba3bc7c4f2ae51
-size 50
--- a/tests/data/aloha_sim_insertion_human/next/meta.json
+++ b/tests/data/aloha_sim_insertion_human/next/meta.json
@@ -1 +0,0 @@
-{"done": {"device": "cpu", "shape": [50, 1], "dtype": "torch.bool"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_human/next/observation/image/meta.json
+++ b/tests/data/aloha_sim_insertion_human/next/observation/image/meta.json
@@ -1 +0,0 @@
-{"top": {"device": "cpu", "shape": [50, 3, 480, 640], "dtype": "torch.uint8"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_human/next/observation/image/top.memmap
+++ b/tests/data/aloha_sim_insertion_human/next/observation/image/top.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5c632e3cb06be729e5d673e3ecca1d6f6527b0f48cfe3dc03d7eea4f9eb3bbd7
-size 46080000
--- a/tests/data/aloha_sim_insertion_human/next/observation/meta.json
+++ b/tests/data/aloha_sim_insertion_human/next/observation/meta.json
@@ -1 +0,0 @@
-{"state": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_human/next/observation/state.memmap
+++ b/tests/data/aloha_sim_insertion_human/next/observation/state.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e231f2e07e1cd030137ea2e938b570b112db2c694c6d21b37ceb8f8559e19088
-size 2800
--- a/tests/data/aloha_sim_insertion_human/observation/image/meta.json
+++ b/tests/data/aloha_sim_insertion_human/observation/image/meta.json
@@ -1 +0,0 @@
-{"top": {"device": "cpu", "shape": [50, 3, 480, 640], "dtype": "torch.uint8"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_human/observation/image/top.memmap
+++ b/tests/data/aloha_sim_insertion_human/observation/image/top.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a1ba64c89f4fcf9135fe34c26abf582dd5f0d573506db5c96af3ffe40a52c818
-size 46080000
--- a/tests/data/aloha_sim_insertion_human/observation/meta.json
+++ b/tests/data/aloha_sim_insertion_human/observation/meta.json
@@ -1 +0,0 @@
-{"state": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_human/observation/state.memmap
+++ b/tests/data/aloha_sim_insertion_human/observation/state.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:85405686bc065c6ab6c915907920a0391a57cf097b74de058a8c30be0548ade5
-size 2800
--- a/tests/data/aloha_sim_insertion_human/stats.pth
+++ b/tests/data/aloha_sim_insertion_human/stats.pth
--- a/tests/data/aloha_sim_insertion_scripted/action.memmap
+++ b/tests/data/aloha_sim_insertion_scripted/action.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1f5fe053b760e8471885b82c10f4a6ea40874098036337ae5cc300c4775546be
-size 2800
--- a/tests/data/aloha_sim_insertion_scripted/episode.memmap
+++ b/tests/data/aloha_sim_insertion_scripted/episode.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7a12e561363385e9dfeeab326368731c030ed4b374e7f5897ac819159d2884c5
-size 400
--- a/tests/data/aloha_sim_insertion_scripted/frame_id.memmap
+++ b/tests/data/aloha_sim_insertion_scripted/frame_id.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c202d9cfc7858fd49d522047e16948359bbbb2eda2d3825d552e45a78d5f8585
-size 400
--- a/tests/data/aloha_sim_insertion_scripted/meta.json
+++ b/tests/data/aloha_sim_insertion_scripted/meta.json
@@ -1 +0,0 @@
-{"action": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "episode": {"device": "cpu", "shape": [50], "dtype": "torch.int64"}, "frame_id": {"device": "cpu", "shape": [50], "dtype": "torch.int64"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_scripted/next/done.memmap
+++ b/tests/data/aloha_sim_insertion_scripted/next/done.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cc2786e1f9910a9d811400edcddaf7075195f7a16b216dcbefba3bc7c4f2ae51
-size 50
--- a/tests/data/aloha_sim_insertion_scripted/next/meta.json
+++ b/tests/data/aloha_sim_insertion_scripted/next/meta.json
@@ -1 +0,0 @@
-{"done": {"device": "cpu", "shape": [50, 1], "dtype": "torch.bool"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_scripted/next/observation/image/meta.json
+++ b/tests/data/aloha_sim_insertion_scripted/next/observation/image/meta.json
@@ -1 +0,0 @@
-{"top": {"device": "cpu", "shape": [50, 3, 480, 640], "dtype": "torch.uint8"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_scripted/next/observation/image/top.memmap
+++ b/tests/data/aloha_sim_insertion_scripted/next/observation/image/top.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:daed2bb10498ba2557983d0d7e89399882fea7585e7ceff910e23c621bfdbf88
-size 46080000
--- a/tests/data/aloha_sim_insertion_scripted/next/observation/meta.json
+++ b/tests/data/aloha_sim_insertion_scripted/next/observation/meta.json
@@ -1 +0,0 @@
-{"state": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_scripted/next/observation/state.memmap
+++ b/tests/data/aloha_sim_insertion_scripted/next/observation/state.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bbad0302af70112ee312efe0eb0f44a2f1c8f6c5ef82ea4fb34625cdafbef057
-size 2800
--- a/tests/data/aloha_sim_insertion_scripted/observation/image/meta.json
+++ b/tests/data/aloha_sim_insertion_scripted/observation/image/meta.json
@@ -1 +0,0 @@
-{"top": {"device": "cpu", "shape": [50, 3, 480, 640], "dtype": "torch.uint8"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_scripted/observation/image/top.memmap
+++ b/tests/data/aloha_sim_insertion_scripted/observation/image/top.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:aba55ebb9dd004bf68444b9ebf024ed7713436099c06a0b8e541100ecbc69290
-size 46080000
--- a/tests/data/aloha_sim_insertion_scripted/observation/meta.json
+++ b/tests/data/aloha_sim_insertion_scripted/observation/meta.json
@@ -1 +0,0 @@
-{"state": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_insertion_scripted/observation/state.memmap
+++ b/tests/data/aloha_sim_insertion_scripted/observation/state.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dd4e7e14abf57561ca9839c910581266be90956e41bfb3bb21362ea0c321e77d
-size 2800
--- a/tests/data/aloha_sim_insertion_scripted/stats.pth
+++ b/tests/data/aloha_sim_insertion_scripted/stats.pth
--- a/tests/data/aloha_sim_transfer_cube_human/action.memmap
+++ b/tests/data/aloha_sim_transfer_cube_human/action.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:14fed0eed3d529a8ac0dd25a6d41585020772d02f9137fc9d604713b2f0f7076
-size 2800
--- a/tests/data/aloha_sim_transfer_cube_human/episode.memmap
+++ b/tests/data/aloha_sim_transfer_cube_human/episode.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7a12e561363385e9dfeeab326368731c030ed4b374e7f5897ac819159d2884c5
-size 400
--- a/tests/data/aloha_sim_transfer_cube_human/frame_id.memmap
+++ b/tests/data/aloha_sim_transfer_cube_human/frame_id.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c202d9cfc7858fd49d522047e16948359bbbb2eda2d3825d552e45a78d5f8585
-size 400
--- a/tests/data/aloha_sim_transfer_cube_human/meta.json
+++ b/tests/data/aloha_sim_transfer_cube_human/meta.json
@@ -1 +0,0 @@
-{"action": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "episode": {"device": "cpu", "shape": [50], "dtype": "torch.int64"}, "frame_id": {"device": "cpu", "shape": [50], "dtype": "torch.int64"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_transfer_cube_human/next/done.memmap
+++ b/tests/data/aloha_sim_transfer_cube_human/next/done.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cc2786e1f9910a9d811400edcddaf7075195f7a16b216dcbefba3bc7c4f2ae51
-size 50
--- a/tests/data/aloha_sim_transfer_cube_human/next/meta.json
+++ b/tests/data/aloha_sim_transfer_cube_human/next/meta.json
@@ -1 +0,0 @@
-{"done": {"device": "cpu", "shape": [50, 1], "dtype": "torch.bool"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_transfer_cube_human/next/observation/image/meta.json
+++ b/tests/data/aloha_sim_transfer_cube_human/next/observation/image/meta.json
@@ -1 +0,0 @@
-{"top": {"device": "cpu", "shape": [50, 3, 480, 640], "dtype": "torch.uint8"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_transfer_cube_human/next/observation/image/top.memmap
+++ b/tests/data/aloha_sim_transfer_cube_human/next/observation/image/top.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2f713ea7fc19e592ea409a5e0bdfde403e5b86f834cbabe3463b791e8437fafc
-size 46080000
--- a/tests/data/aloha_sim_transfer_cube_human/next/observation/meta.json
+++ b/tests/data/aloha_sim_transfer_cube_human/next/observation/meta.json
@@ -1 +0,0 @@
-{"state": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_transfer_cube_human/next/observation/state.memmap
+++ b/tests/data/aloha_sim_transfer_cube_human/next/observation/state.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8c103e2c9d63c9f7cf9645bd24d9a2c4e8e08825dc75e230ebc793b8f9c213b0
-size 2800
--- a/tests/data/aloha_sim_transfer_cube_human/observation/image/meta.json
+++ b/tests/data/aloha_sim_transfer_cube_human/observation/image/meta.json
@@ -1 +0,0 @@
-{"top": {"device": "cpu", "shape": [50, 3, 480, 640], "dtype": "torch.uint8"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_transfer_cube_human/observation/image/top.memmap
+++ b/tests/data/aloha_sim_transfer_cube_human/observation/image/top.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7dbf4aa01b184d0eaa21ea999078d7cff86e1ca484a109614176fdc49f1ee05c
-size 46080000
--- a/tests/data/aloha_sim_transfer_cube_human/observation/meta.json
+++ b/tests/data/aloha_sim_transfer_cube_human/observation/meta.json
@@ -1 +0,0 @@
-{"state": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}
--- a/tests/data/aloha_sim_transfer_cube_human/observation/state.memmap
+++ b/tests/data/aloha_sim_transfer_cube_human/observation/state.memmap
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4fa0b9c870d4615037b6fee9e9e85e54d84352e173f2c7c1035232272fe2a3dd
-size 2800
--- a/tests/data/aloha_sim_transfer_cube_human/stats.pth
+++ b/tests/data/aloha_sim_transfer_cube_human/stats.pth
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Cadene	7bf36cd413	Add AbstractEnv, Refactor AlohaEnv, Add rendering_hook in env, Minor modifications, (TODO: Refactor Pusht and Simxarm)	2024-03-10 22:00:48 +00:00
Cadene	b49f7b70e2	Add tasks without end_effector that are compatible with dataset, Eval can run (TODO: training and pretrained model)	2024-03-10 10:52:12 +00:00
Cadene	f1230cdac0	Training can runs (TODO: eval)	2024-03-09 16:52:08 +00:00
Cadene	5395829596	Add act yaml (TODO: try train.py)	2024-03-08 18:08:28 +00:00
Cadene	a45802c281	Remove download.py add a WIP for Simxarm	2024-03-08 18:07:49 +00:00
Cadene	167a51cb69	Remove download.py add a WIP for Simxarm	2024-03-08 18:07:33 +00:00
Cadene	fbc66a082b	Copy past from act repo	2024-03-08 16:54:43 +00:00
Cadene	603455e313	Update README	2024-03-08 16:15:56 +00:00
Cadene	6500945be5	Rendering works (fps look fast tho? TODO action bounding is too wide [-1,1])	2024-03-08 15:33:35 +00:00
Cadene	ebbcad8c05	WIP Aloha env tests pass	2024-03-08 14:37:23 +00:00
Remi Cadene	d98b435b4c	WIP	2024-03-08 12:08:16 +00:00
				`@@ -1 +0,0 @@`
				`*.memmap filter=lfs diff=lfs merge=lfs -text`
				`@@ -1 +0,0 @@`
				`from lerobot.__version__ import __version__ # noqa: F401`
				`@@ -1 +0,0 @@`
				`{"action": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "episode": {"device": "cpu", "shape": [50], "dtype": "torch.int64"}, "frame_id": {"device": "cpu", "shape": [50], "dtype": "torch.int64"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}`
				`@@ -1 +0,0 @@`
				`{"done": {"device": "cpu", "shape": [50, 1], "dtype": "torch.bool"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}`
				`@@ -1 +0,0 @@`
				`{"top": {"device": "cpu", "shape": [50, 3, 480, 640], "dtype": "torch.uint8"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}`
				`@@ -1 +0,0 @@`
				`{"state": {"device": "cpu", "shape": [50, 14], "dtype": "torch.float32"}, "shape": [50], "device": "cpu", "_type": "<class 'tensordict._td.TensorDict'>"}`