added tdmpc2 to policy factory; shape fixes in tdmpc2

fixes and updated comments
config comments
2024-11-26 11:58:29 +00:00 · 2024-11-26 09:46:59 +00:00 · 2024-11-25 09:51:33 +00:00 · 2024-11-22 17:11:47 +00:00 · 2024-11-21 17:03:30 +00:00 · 2024-11-21 15:00:03 +00:00
831 changed files with 35383 additions and 33993 deletions
--- a/.cache/calibration/aloha_default/left_follower.json
+++ b/.cache/calibration/aloha_default/left_follower.json
@@ -0,0 +1,68 @@
+{
+    "homing_offset": [
+        2048,
+        3072,
+        3072,
+        -1024,
+        -1024,
+        2048,
+        -2048,
+        2048,
+        -2048
+    ],
+    "drive_mode": [
+        1,
+        1,
+        1,
+        0,
+        0,
+        1,
+        0,
+        1,
+        0
+    ],
+    "start_pos": [
+        2015,
+        3058,
+        3061,
+        1071,
+        1071,
+        2035,
+        2152,
+        2029,
+        2499
+    ],
+    "end_pos": [
+        -1008,
+        -1963,
+        -1966,
+        2141,
+        2143,
+        -971,
+        3043,
+        -1077,
+        3144
+    ],
+    "calib_mode": [
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "LINEAR"
+    ],
+    "motor_names": [
+        "waist",
+        "shoulder",
+        "shoulder_shadow",
+        "elbow",
+        "elbow_shadow",
+        "forearm_roll",
+        "wrist_angle",
+        "wrist_rotate",
+        "gripper"
+    ]
+}
--- a/.cache/calibration/aloha_default/left_leader.json
+++ b/.cache/calibration/aloha_default/left_leader.json
@@ -0,0 +1,68 @@
+{
+    "homing_offset": [
+        2048,
+        3072,
+        3072,
+        -1024,
+        -1024,
+        2048,
+        -2048,
+        2048,
+        -1024
+    ],
+    "drive_mode": [
+        1,
+        1,
+        1,
+        0,
+        0,
+        1,
+        0,
+        1,
+        0
+    ],
+    "start_pos": [
+        2035,
+        3024,
+        3019,
+        979,
+        981,
+        1982,
+        2166,
+        2124,
+        1968
+    ],
+    "end_pos": [
+        -990,
+        -2017,
+        -2015,
+        2078,
+        2076,
+        -1030,
+        3117,
+        -1016,
+        2556
+    ],
+    "calib_mode": [
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "LINEAR"
+    ],
+    "motor_names": [
+        "waist",
+        "shoulder",
+        "shoulder_shadow",
+        "elbow",
+        "elbow_shadow",
+        "forearm_roll",
+        "wrist_angle",
+        "wrist_rotate",
+        "gripper"
+    ]
+}
--- a/.cache/calibration/aloha_default/right_follower.json
+++ b/.cache/calibration/aloha_default/right_follower.json
@@ -0,0 +1,68 @@
+{
+    "homing_offset": [
+        2048,
+        3072,
+        3072,
+        -1024,
+        -1024,
+        2048,
+        -2048,
+        2048,
+        -2048
+    ],
+    "drive_mode": [
+        1,
+        1,
+        1,
+        0,
+        0,
+        1,
+        0,
+        1,
+        0
+    ],
+    "start_pos": [
+        2056,
+        2895,
+        2896,
+        1191,
+        1190,
+        2018,
+        2051,
+        2056,
+        2509
+    ],
+    "end_pos": [
+        -1040,
+        -2004,
+        -2006,
+        2126,
+        2127,
+        -1010,
+        3050,
+        -1117,
+        3143
+    ],
+    "calib_mode": [
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "LINEAR"
+    ],
+    "motor_names": [
+        "waist",
+        "shoulder",
+        "shoulder_shadow",
+        "elbow",
+        "elbow_shadow",
+        "forearm_roll",
+        "wrist_angle",
+        "wrist_rotate",
+        "gripper"
+    ]
+}
--- a/.cache/calibration/aloha_default/right_leader.json
+++ b/.cache/calibration/aloha_default/right_leader.json
@@ -0,0 +1,68 @@
+{
+    "homing_offset": [
+        2048,
+        3072,
+        3072,
+        -1024,
+        -1024,
+        2048,
+        -2048,
+        2048,
+        -2048
+    ],
+    "drive_mode": [
+        1,
+        1,
+        1,
+        0,
+        0,
+        1,
+        0,
+        1,
+        0
+    ],
+    "start_pos": [
+        2068,
+        3034,
+        3030,
+        1038,
+        1041,
+        1991,
+        1948,
+        2090,
+        1985
+    ],
+    "end_pos": [
+        -1025,
+        -2014,
+        -2015,
+        2058,
+        2060,
+        -955,
+        3091,
+        -940,
+        2576
+    ],
+    "calib_mode": [
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "DEGREE",
+        "LINEAR"
+    ],
+    "motor_names": [
+        "waist",
+        "shoulder",
+        "shoulder_shadow",
+        "elbow",
+        "elbow_shadow",
+        "forearm_roll",
+        "wrist_angle",
+        "wrist_rotate",
+        "gripper"
+    ]
+}
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,17 +1,3 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 # Misc
 .git
 tmp
@@ -73,7 +59,7 @@ pip-log.txt
 pip-delete-this-directory.txt

 # Unit test / coverage reports
-!tests/artifacts
+!tests/data
 htmlcov/
 .tox/
 .nox/
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,21 +1,6 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 *.memmap filter=lfs diff=lfs merge=lfs -text
 *.stl filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 *.mp4 filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.json !text !filter !merge !diff
-tests/artifacts/cameras/*.png filter=lfs diff=lfs merge=lfs -text
-*.bag filter=lfs diff=lfs merge=lfs -text
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -1,17 +1,3 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 name: "\U0001F41B Bug Report"
 description: Submit a bug report to help us improve LeRobot
 body:
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -21,7 +21,7 @@ Provide a simple way for the reviewer to try out your changes.

 Examples:
 ```bash
-pytest -sx tests/test_stuff.py::test_something
+DATA_DIR=tests/data pytest -sx tests/test_stuff.py::test_something
 ```
 ```bash
 python lerobot/scripts/train.py --some.option=true
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -1,17 +1,3 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 # Inspired by
 # https://github.com/huggingface/peft/blob/main/.github/workflows/build_docker_images.yml
 name: Builds
@@ -22,8 +8,6 @@ on:
  schedule:
    - cron: "0 1 * * *"

-permissions: {}
-
 env:
  PYTHON_VERSION: "3.10"

@@ -40,24 +24,21 @@ jobs:
          git lfs install

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
-        with:
-          cache-binary: false
+        uses: docker/setup-buildx-action@v3

      - name: Check out code
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@v4
        with:
          lfs: true
-          persist-credentials: false

      - name: Login to DockerHub
-        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}

      - name: Build and Push CPU
-        uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
+        uses: docker/build-push-action@v5
        with:
          context: .
          file: ./docker/lerobot-cpu/Dockerfile
@@ -78,24 +59,21 @@ jobs:
          git lfs install

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
-        with:
-          cache-binary: false
+        uses: docker/setup-buildx-action@v3

      - name: Check out code
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@v4
        with:
          lfs: true
-          persist-credentials: false

      - name: Login to DockerHub
-        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}

      - name: Build and Push GPU
-        uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
+        uses: docker/build-push-action@v5
        with:
          context: .
          file: ./docker/lerobot-gpu/Dockerfile
@@ -110,23 +88,19 @@ jobs:
      group: aws-general-8-plus
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
-        with:
-          cache-binary: false
+        uses: docker/setup-buildx-action@v3

      - name: Check out code
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          persist-credentials: false
+        uses: actions/checkout@v4

      - name: Login to DockerHub
-        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}

      - name: Build and Push GPU dev
-        uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
+        uses: docker/build-push-action@v5
        with:
          context: .
          file: ./docker/lerobot-gpu-dev/Dockerfile
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@@ -1,23 +0,0 @@
-name: Build documentation
-
-on:
-  workflow_dispatch:
-  push:
-    paths:
-      - "docs/**"
-    branches:
-    - main
-    - doc-builder*
-    - v*-release
-
-
-jobs:
-  build:  # zizmor: ignore[excessive-permissions] We follow the same pattern as in Transformers
-    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
-    with:
-      commit_sha: ${{ github.sha }}
-      package: lerobot
-      additional_args: --not_python_module
-    secrets:
-      token: ${{ secrets.HUGGINGFACE_PUSH }}
-      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@@ -1,19 +0,0 @@
-name: Build PR Documentation
-
-on:
-  pull_request:
-    paths:
-      - "docs/**"
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  build:  # zizmor: ignore[excessive-permissions] We follow the same pattern as in Transformers
-    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
-    with:
-      commit_sha: ${{ github.event.pull_request.head.sha }}
-      pr_number: ${{ github.event.number }}
-      package: lerobot
-      additional_args: --not_python_module
--- a/.github/workflows/nightly-tests.yml
+++ b/.github/workflows/nightly-tests.yml
@@ -1,17 +1,3 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 # Inspired by
 # https://github.com/huggingface/peft/blob/main/.github/workflows/nightly.yml
 name: Nightly
@@ -21,10 +7,10 @@ on:
  schedule:
    - cron: "0 2 * * *"

-permissions: {}
-
-# env:
+env:
+  DATA_DIR: tests/data
  # SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
+
 jobs:
  run_all_tests_cpu:
    name: CPU
@@ -33,7 +19,7 @@ jobs:
    runs-on:
      group: aws-general-8-plus
    container:
-      image: huggingface/lerobot-cpu:latest  # zizmor: ignore[unpinned-images]
+      image: huggingface/lerobot-cpu:latest
      options: --shm-size "16gb"
      credentials:
        username: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -44,9 +30,13 @@ jobs:
        working-directory: /lerobot
    steps:
      - name: Tests
+        env:
+          DATA_DIR: tests/data
        run: pytest -v --cov=./lerobot --disable-warnings tests

      - name: Tests end-to-end
+        env:
+          DATA_DIR: tests/data
        run: make test-end-to-end


@@ -60,7 +50,7 @@ jobs:
      CUDA_VISIBLE_DEVICES: "0"
      TEST_TYPE: "single_gpu"
    container:
-      image: huggingface/lerobot-gpu:latest  # zizmor: ignore[unpinned-images]
+      image: huggingface/lerobot-gpu:latest
      options: --gpus all --shm-size "16gb"
      credentials:
        username: ${{ secrets.DOCKERHUB_USERNAME }}
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -1,29 +1,15 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 name: Quality

 on:
  workflow_dispatch:
  workflow_call:
  pull_request:
+    branches:
+      - main
  push:
    branches:
      - main

-permissions: {}
-
 env:
  PYTHON_VERSION: "3.10"

@@ -33,12 +19,10 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          persist-credentials: false
+        uses: actions/checkout@v3

      - name: Set up Python
-        uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # v4.9.1
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.PYTHON_VERSION }}

@@ -46,27 +30,55 @@ jobs:
        id: get-ruff-version
        run: |
          RUFF_VERSION=$(awk '/repo: https:\/\/github.com\/astral-sh\/ruff-pre-commit/{flag=1;next}/rev:/{if(flag){print $2;exit}}' .pre-commit-config.yaml)
-          echo "ruff_version=${RUFF_VERSION}" >> $GITHUB_OUTPUT
+          echo "RUFF_VERSION=${RUFF_VERSION}" >> $GITHUB_ENV

      - name: Install Ruff
-        env:
-          RUFF_VERSION: ${{ steps.get-ruff-version.outputs.ruff_version }}
-        run: python -m pip install "ruff==${RUFF_VERSION}"
+        run: python -m pip install "ruff==${{ env.RUFF_VERSION }}"

      - name: Ruff check
-        run: ruff check --output-format=github
+        run: ruff check

      - name: Ruff format
        run: ruff format --diff

-  typos:
-    name: Typos
+
+  poetry_check:
+    name: Poetry check
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          persist-credentials: false
+        uses: actions/checkout@v3

-      - name: typos-action
-        uses: crate-ci/typos@db35ee91e80fbb447f33b0e5fbddb24d2a1a884f # v1.29.10
+      - name: Install poetry
+        run: pipx install poetry
+
+      - name: Poetry check
+        run: poetry check
+
+
+  poetry_relax:
+    name: Poetry relax
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v3
+
+      - name: Install poetry
+        run: pipx install poetry
+
+      - name: Install poetry-relax
+        run: poetry self add poetry-relax
+
+      - name: Poetry relax
+        id: poetry_relax
+        run: |
+          output=$(poetry relax --check 2>&1)
+          if echo "$output" | grep -q "Proposing updates"; then
+            echo "$output"
+            echo ""
+            echo "Some dependencies have caret '^' version requirement added by poetry by default."
+            echo "Please replace them with '>='. You can do this by hand or use poetry-relax to do this."
+            exit 1
+          else
+            echo "$output"
+          fi
--- a/.github/workflows/test-docker-build.yml
+++ b/.github/workflows/test-docker-build.yml
@@ -1,29 +1,15 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 # Inspired by
 # https://github.com/huggingface/peft/blob/main/.github/workflows/test-docker-build.yml
 name: Test Dockerfiles

 on:
  pull_request:
+    branches:
+      - main
    paths:
      # Run only when DockerFile files are modified
      - "docker/**"

-permissions: {}
-
 env:
  PYTHON_VERSION: "3.10"

@@ -35,46 +21,43 @@ jobs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Check out code
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          persist-credentials: false
+        uses: actions/checkout@v4

      - name: Get changed files
        id: changed-files
-        uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
+        uses: tj-actions/changed-files@v44
        with:
          files: docker/**
          json: "true"

-      - name: Run step if only the files listed above change  # zizmor: ignore[template-injection]
+      - name: Run step if only the files listed above change
        if: steps.changed-files.outputs.any_changed == 'true'
        id: set-matrix
+        env:
+          ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
        run: |
          echo "matrix=${{ steps.changed-files.outputs.all_changed_files}}" >> $GITHUB_OUTPUT

+
  build_modified_dockerfiles:
    name: Build modified Docker images
    needs: get_changed_files
    runs-on:
      group: aws-general-8-plus
-    if: needs.get_changed_files.outputs.matrix != ''
+    if: ${{ needs.get_changed_files.outputs.matrix }} != ''
    strategy:
      fail-fast: false
      matrix:
        docker-file: ${{ fromJson(needs.get_changed_files.outputs.matrix) }}
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
-        with:
-          cache-binary: false
+        uses: docker/setup-buildx-action@v3

      - name: Check out code
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          persist-credentials: false
+        uses: actions/checkout@v4

      - name: Build Docker image
-        uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
+        uses: docker/build-push-action@v5
        with:
          file: ${{ matrix.docker-file }}
          context: .
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,28 +1,15 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 name: Tests

 on:
  pull_request:
+    branches:
+      - main
    paths:
      - "lerobot/**"
      - "tests/**"
      - "examples/**"
      - ".github/**"
-      - "pyproject.toml"
-      - ".pre-commit-config.yaml"
+      - "poetry.lock"
      - "Makefile"
      - ".cache/**"
  push:
@@ -33,27 +20,21 @@ on:
      - "tests/**"
      - "examples/**"
      - ".github/**"
-      - "pyproject.toml"
-      - ".pre-commit-config.yaml"
+      - "poetry.lock"
      - "Makefile"
      - ".cache/**"

-permissions: {}
-
-env:
-  UV_VERSION: "0.6.0"
-
 jobs:
  pytest:
    name: Pytest
    runs-on: ubuntu-latest
    env:
+      DATA_DIR: tests/data
      MUJOCO_GL: egl
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - uses: actions/checkout@v4
        with:
          lfs: true  # Ensure LFS files are pulled
-          persist-credentials: false

      - name: Install apt dependencies
      # portaudio19-dev is needed to install pyaudio
@@ -61,19 +42,25 @@ jobs:
          sudo apt-get update && \
          sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev

-      - name: Install uv and python
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
-        with:
-          enable-cache: true
-          version: ${{ env.UV_VERSION }}
-          python-version: "3.10"
+      - name: Install poetry
+        run: |
+          pipx install poetry && poetry config virtualenvs.in-project true
+          echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH

-      - name: Install lerobot (all extras)
-        run: uv sync --all-extras
+      # TODO(rcadene, aliberts): python 3.12 seems to be used in the tests, not python 3.10
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: "poetry"
+
+      - name: Install poetry dependencies
+        run: |
+          poetry install --all-extras

      - name: Test with pytest
        run: |
-          uv run pytest tests -v --cov=./lerobot --durations=0 \
+          pytest tests -v --cov=./lerobot --durations=0 \
            -W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
            -W ignore::UserWarning:torch.utils.data.dataloader:558 \
            -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
@@ -83,66 +70,71 @@ jobs:
    name: Pytest (minimal install)
    runs-on: ubuntu-latest
    env:
+      DATA_DIR: tests/data
      MUJOCO_GL: egl
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - uses: actions/checkout@v4
        with:
          lfs: true  # Ensure LFS files are pulled
-          persist-credentials: false

      - name: Install apt dependencies
        run: sudo apt-get update && sudo apt-get install -y ffmpeg

-      - name: Install uv and python
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
+      - name: Install poetry
+        run: |
+          pipx install poetry && poetry config virtualenvs.in-project true
+          echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
+
+      # TODO(rcadene, aliberts): python 3.12 seems to be used in the tests, not python 3.10
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
        with:
-          enable-cache: true
-          version: ${{ env.UV_VERSION }}
          python-version: "3.10"

-      - name: Install lerobot
-        run: uv sync --extra "test"
+      - name: Install poetry dependencies
+        run: |
+          poetry install --extras "test"

      - name: Test with pytest
        run: |
-          uv run pytest tests -v --cov=./lerobot --durations=0 \
+          pytest tests -v --cov=./lerobot --durations=0 \
            -W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
            -W ignore::UserWarning:torch.utils.data.dataloader:558 \
            -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
            && rm -rf tests/outputs outputs

+
  end-to-end:
    name: End-to-end
    runs-on: ubuntu-latest
    env:
+      DATA_DIR: tests/data
      MUJOCO_GL: egl
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - uses: actions/checkout@v4
        with:
          lfs: true  # Ensure LFS files are pulled
-          persist-credentials: false

      - name: Install apt dependencies
      # portaudio19-dev is needed to install pyaudio
        run: |
          sudo apt-get update && \
-          sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev
+          sudo apt-get install -y libegl1-mesa-dev portaudio19-dev

-      - name: Install uv and python
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
+      - name: Install poetry
+        run: |
+          pipx install poetry && poetry config virtualenvs.in-project true
+          echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
        with:
-          enable-cache: true
-          version: ${{ env.UV_VERSION }}
          python-version: "3.10"
+          cache: "poetry"

-      - name: Install lerobot (all extras)
+      - name: Install poetry dependencies
        run: |
-          uv venv
-          uv sync --all-extras
-
-      - name: venv
-        run: |
-          echo "PYTHON_PATH=${{ github.workspace }}/.venv/bin/python" >> $GITHUB_ENV
+          poetry install --all-extras

      - name: Test end-to-end
        run: |
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@@ -1,35 +1,20 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 on:
  push:

 name: Secret Leaks

-permissions: {}
+permissions:
+  contents: read

 jobs:
  trufflehog:
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
-      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      uses: actions/checkout@v4
      with:
        fetch-depth: 0
-        persist-credentials: false
-
    - name: Secret Scanning
-      uses: trufflesecurity/trufflehog@90694bf9af66e7536abc5824e7a87246dbf933cb # v3.88.35
+      uses: trufflesecurity/trufflehog@main
      with:
        extra_args: --only-verified
--- a/.github/workflows/upload_pr_documentation.yml
+++ b/.github/workflows/upload_pr_documentation.yml
@@ -1,16 +0,0 @@
-name: Upload PR Documentation
-
-on: # zizmor: ignore[dangerous-triggers] We follow the same pattern as in Transformers
-  workflow_run:
-    workflows: [ "Build PR Documentation" ]
-    types:
-    - completed
-
-jobs:
-  build:  # zizmor: ignore[excessive-permissions] We follow the same pattern as in Transformers
-    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
-    with:
-      package_name: lerobot
-    secrets:
-      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
-      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -1,20 +1,3 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Dev scripts
-.dev
-
 # Logging
 logs
 tmp
@@ -66,10 +49,6 @@ share/python-wheels/
 *.egg
 MANIFEST

-# uv/poetry lock files
-poetry.lock
-uv.lock
-
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
@@ -81,7 +60,7 @@ pip-log.txt
 pip-delete-this-directory.txt

 # Unit test / coverage reports
-!tests/artifacts
+!tests/data
 htmlcov/
 .tox/
 .nox/
@@ -94,8 +73,10 @@ coverage.xml
 .hypothesis/
 .pytest_cache/

-# Ignore .cache
+# Ignore .cache except calibration
 .cache/*
+!.cache/calibration/
+!.cache/calibration/**

 # Translations
 *.mo
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,31 +1,9 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-exclude: "tests/artifacts/.*\\.safetensors$"
+exclude: ^(tests/data)
 default_language_version:
    python: python3.10
 repos:
-  ##### Meta #####
-  - repo: meta
-    hooks:
-      - id: check-useless-excludes
-      - id: check-hooks-apply
-
-
-  ##### Style / Misc. #####
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v4.6.0
    hooks:
      - id: check-added-large-files
      - id: debug-statements
@@ -35,40 +13,25 @@ repos:
      - id: check-toml
      - id: end-of-file-fixer
      - id: trailing-whitespace
-
-  - repo: https://github.com/adhtruong/mirrors-typos
-    rev: v1.32.0
-    hooks:
-      - id: typos
-        args: [--force-exclude]
-
  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.20.0
+    rev: v3.16.0
    hooks:
    -   id: pyupgrade
-
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.11
+    rev: v0.5.2
    hooks:
      - id: ruff
        args: [--fix]
      - id: ruff-format
-
-
-  ##### Security #####
+  - repo: https://github.com/python-poetry/poetry
+    rev: 1.8.0
+    hooks:
+      - id: poetry-check
+      - id: poetry-lock
+        args:
+          - "--check"
+          - "--no-update"
  - repo: https://github.com/gitleaks/gitleaks
-    rev: v8.26.0
+    rev: v8.18.4
    hooks:
      - id: gitleaks
-
-  - repo: https://github.com/woodruffw/zizmor-pre-commit
-    rev: v1.8.0
-    hooks:
-      - id: zizmor
-
-  - repo: https://github.com/PyCQA/bandit
-    rev: 1.8.3
-    hooks:
-    - id: bandit
-      args: ["-c", "pyproject.toml"]
-      additional_dependencies: ["bandit[toml]"]
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -129,71 +129,38 @@ Follow these steps to start contributing:

   🚨 **Do not** work on the `main` branch.

-4. for development, we advise to use a tool like `poetry` or `uv` instead of just `pip` to easily track our dependencies.
-   Follow the instructions to [install poetry](https://python-poetry.org/docs/#installation) (use a version >=2.1.0) or to [install uv](https://docs.astral.sh/uv/getting-started/installation/#installation-methods) if you don't have one of them already.
+4. for development, we use `poetry` instead of just `pip` to easily track our dependencies.
+   If you don't have it already, follow the [instructions](https://python-poetry.org/docs/#installation) to install it.

   Set up a development environment with conda or miniconda:
   ```bash
   conda create -y -n lerobot-dev python=3.10 && conda activate lerobot-dev
   ```

-   If you're using `uv`, it can manage python versions so you can instead do:
-   ```bash
-   uv venv --python 3.10 && source .venv/bin/activate
-   ```
-
   To develop on 🤗 LeRobot, you will at least need to install the `dev` and `test` extras dependencies along with the core library:
-
-   using `poetry`
   ```bash
-   poetry sync --extras "dev test"
-   ```
-
-   using `uv`
-   ```bash
-   uv sync --extra dev --extra test
+   poetry install --sync --extras "dev test"
   ```

   You can also install the project with all its dependencies (including environments):
-
-   using `poetry`
   ```bash
-   poetry sync --all-extras
-   ```
-
-   using `uv`
-   ```bash
-   uv sync --all-extras
+   poetry install --sync --all-extras
   ```

   > **Note:** If you don't install simulation environments with `--all-extras`, the tests that require them will be skipped when running the pytest suite locally. However, they *will* be tested in the CI. In general, we advise you to install everything and test locally before pushing.

-   Whichever command you chose to install the project (e.g. `poetry sync --all-extras`), you should run it again when pulling code with an updated version of `pyproject.toml` and `poetry.lock` in order to synchronize your virtual environment with the new dependencies.
+   Whichever command you chose to install the project (e.g. `poetry install --sync --all-extras`), you should run it again when pulling code with an updated version of `pyproject.toml` and `poetry.lock` in order to synchronize your virtual environment with the new dependencies.

   The equivalent of `pip install some-package`, would just be:
-
-   using `poetry`
   ```bash
   poetry add some-package
   ```

-   using `uv`
-   ```bash
-   uv add some-package
-   ```
-
   When making changes to the poetry sections of the `pyproject.toml`, you should run the following command to lock dependencies.
-   using `poetry`
   ```bash
-   poetry lock
+   poetry lock --no-update
   ```

-   using `uv`
-   ```bash
-   uv lock
-   ```
-
-
 5. Develop the features on your branch.

   As you work on the features, you should make sure that the test suite
@@ -228,7 +195,7 @@ Follow these steps to start contributing:
   git commit
   ```

-   Note, if you already committed some changes that have a wrong formatting, you can use:
+   Note, if you already commited some changes that have a wrong formatting, you can use:
   ```bash
   pre-commit run --all-files
   ```
@@ -269,6 +236,9 @@ Follow these steps to start contributing:
   the PR as a draft PR. These are useful to avoid duplicated work, and to differentiate
   it from PRs ready to be merged;
 4. Make sure existing tests pass;
+<!-- 5. Add high-coverage tests. No quality testing = no merge.
+
+See an example of a good PR here: https://github.com/huggingface/lerobot/pull/ -->

 ### Tests

@@ -288,7 +258,7 @@ sudo apt-get install git-lfs
 git lfs install
 ```

-Pull artifacts if they're not in [tests/artifacts](tests/artifacts)
+Pull artifacts if they're not in [tests/data](tests/data)
 ```bash
 git lfs pull
 ```
@@ -297,7 +267,7 @@ We use `pytest` in order to run the tests. From the root of the
 repository, here's how to run tests with `pytest` for the library:

 ```bash
-python -m pytest -sv ./tests
+DATA_DIR="tests/data" python -m pytest -sv ./tests
 ```


--- a/250
+++ b/250
@@ -1,25 +1,11 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 .PHONY: tests

 PYTHON_PATH := $(shell which python)

-# If uv is installed and a virtual environment exists, use it
-UV_CHECK := $(shell command -v uv)
-ifneq ($(UV_CHECK),)
-	PYTHON_PATH := $(shell .venv/bin/python)
+# If Poetry is installed, redefine PYTHON_PATH to use the Poetry-managed Python
+POETRY_CHECK := $(shell command -v poetry)
+ifneq ($(POETRY_CHECK),)
+	PYTHON_PATH := $(shell poetry run which python)
 endif

 export PATH := $(dir $(PYTHON_PATH)):$(PATH)
@@ -34,109 +20,171 @@ build-gpu:

 test-end-to-end:
 	${MAKE} DEVICE=$(DEVICE) test-act-ete-train
-	${MAKE} DEVICE=$(DEVICE) test-act-ete-train-resume
 	${MAKE} DEVICE=$(DEVICE) test-act-ete-eval
+	${MAKE} DEVICE=$(DEVICE) test-act-ete-train-amp
+	${MAKE} DEVICE=$(DEVICE) test-act-ete-eval-amp
 	${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-train
 	${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval
 	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train
+	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train-with-online
 	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval
+	${MAKE} DEVICE=$(DEVICE) test-default-ete-eval
+	${MAKE} DEVICE=$(DEVICE) test-act-pusht-tutorial

 test-act-ete-train:
 	python lerobot/scripts/train.py \
-		--policy.type=act \
-		--policy.dim_model=64 \
-		--policy.n_action_steps=20 \
-		--policy.chunk_size=20 \
-		--policy.device=$(DEVICE) \
-		--env.type=aloha \
-		--env.episode_length=5 \
-		--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
-		--dataset.image_transforms.enable=true \
-		--dataset.episodes="[0]" \
-		--batch_size=2 \
-		--steps=4 \
-		--eval_freq=2 \
-		--eval.n_episodes=1 \
-		--eval.batch_size=1 \
-		--save_freq=2 \
-		--save_checkpoint=true \
-		--log_freq=1 \
-		--wandb.enable=false \
-		--output_dir=tests/outputs/act/
-
-test-act-ete-train-resume:
-	python lerobot/scripts/train.py \
-		--config_path=tests/outputs/act/checkpoints/000002/pretrained_model/train_config.json \
-		--resume=true
+		policy=act \
+		policy.dim_model=64 \
+		env=aloha \
+		wandb.enable=False \
+		training.offline_steps=2 \
+		training.online_steps=0 \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		device=$(DEVICE) \
+		training.save_checkpoint=true \
+		training.save_freq=2 \
+		policy.n_action_steps=20 \
+		policy.chunk_size=20 \
+		training.batch_size=2 \
+		training.image_transforms.enable=true \
+		hydra.run.dir=tests/outputs/act/

 test-act-ete-eval:
 	python lerobot/scripts/eval.py \
-		--policy.path=tests/outputs/act/checkpoints/000004/pretrained_model \
-		--policy.device=$(DEVICE) \
-		--env.type=aloha \
-		--env.episode_length=5 \
-		--eval.n_episodes=1 \
-		--eval.batch_size=1
+		-p tests/outputs/act/checkpoints/000002/pretrained_model \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		env.episode_length=8 \
+		device=$(DEVICE) \
+
+test-act-ete-train-amp:
+	python lerobot/scripts/train.py \
+		policy=act \
+		policy.dim_model=64 \
+		env=aloha \
+		wandb.enable=False \
+		training.offline_steps=2 \
+		training.online_steps=0 \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		device=$(DEVICE) \
+		training.save_checkpoint=true \
+		training.save_freq=2 \
+		policy.n_action_steps=20 \
+		policy.chunk_size=20 \
+		training.batch_size=2 \
+		hydra.run.dir=tests/outputs/act_amp/ \
+		training.image_transforms.enable=true \
+		use_amp=true
+
+test-act-ete-eval-amp:
+	python lerobot/scripts/eval.py \
+		-p tests/outputs/act_amp/checkpoints/000002/pretrained_model \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		env.episode_length=8 \
+		device=$(DEVICE) \
+		use_amp=true

 test-diffusion-ete-train:
 	python lerobot/scripts/train.py \
-		--policy.type=diffusion \
-		--policy.down_dims='[64,128,256]' \
-		--policy.diffusion_step_embed_dim=32 \
-		--policy.num_inference_steps=10 \
-		--policy.device=$(DEVICE) \
-		--env.type=pusht \
-		--env.episode_length=5 \
-		--dataset.repo_id=lerobot/pusht \
-		--dataset.image_transforms.enable=true \
-		--dataset.episodes="[0]" \
-		--batch_size=2 \
-		--steps=2 \
-		--eval_freq=2 \
-		--eval.n_episodes=1 \
-		--eval.batch_size=1 \
-		--save_checkpoint=true \
-		--save_freq=2 \
-		--log_freq=1 \
-		--wandb.enable=false \
-		--output_dir=tests/outputs/diffusion/
+		policy=diffusion \
+		policy.down_dims=\[64,128,256\] \
+		policy.diffusion_step_embed_dim=32 \
+		policy.num_inference_steps=10 \
+		env=pusht \
+		wandb.enable=False \
+		training.offline_steps=2 \
+		training.online_steps=0 \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		device=$(DEVICE) \
+		training.save_checkpoint=true \
+		training.save_freq=2 \
+		training.batch_size=2 \
+		training.image_transforms.enable=true \
+		hydra.run.dir=tests/outputs/diffusion/

 test-diffusion-ete-eval:
 	python lerobot/scripts/eval.py \
-		--policy.path=tests/outputs/diffusion/checkpoints/000002/pretrained_model \
-		--policy.device=$(DEVICE) \
-		--env.type=pusht \
-		--env.episode_length=5 \
-		--eval.n_episodes=1 \
-		--eval.batch_size=1
+		-p tests/outputs/diffusion/checkpoints/000002/pretrained_model \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		env.episode_length=8 \
+		device=$(DEVICE) \

 test-tdmpc-ete-train:
 	python lerobot/scripts/train.py \
-		--policy.type=tdmpc \
-		--policy.device=$(DEVICE) \
-		--env.type=xarm \
-		--env.task=XarmLift-v0 \
-		--env.episode_length=5 \
-		--dataset.repo_id=lerobot/xarm_lift_medium \
-		--dataset.image_transforms.enable=true \
-		--dataset.episodes="[0]" \
-		--batch_size=2 \
-		--steps=2 \
-		--eval_freq=2 \
-		--eval.n_episodes=1 \
-		--eval.batch_size=1 \
-		--save_checkpoint=true \
-		--save_freq=2 \
-		--log_freq=1 \
-		--wandb.enable=false \
-		--output_dir=tests/outputs/tdmpc/
+		policy=tdmpc \
+		env=xarm \
+		env.task=XarmLift-v0 \
+		dataset_repo_id=lerobot/xarm_lift_medium \
+		wandb.enable=False \
+		training.offline_steps=2 \
+		training.online_steps=0 \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		env.episode_length=2 \
+		device=$(DEVICE) \
+		training.save_checkpoint=true \
+		training.save_freq=2 \
+		training.batch_size=2 \
+		training.image_transforms.enable=true \
+		hydra.run.dir=tests/outputs/tdmpc/
+
+test-tdmpc-ete-train-with-online:
+	python lerobot/scripts/train.py \
+		env=pusht \
+		env.gym.obs_type=environment_state_agent_pos \
+		policy=tdmpc_pusht_keypoints \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		env.episode_length=10 \
+		device=$(DEVICE) \
+		training.offline_steps=2 \
+		training.online_steps=20 \
+		training.save_checkpoint=false \
+		training.save_freq=10 \
+		training.batch_size=2 \
+		training.online_rollout_n_episodes=2 \
+		training.online_rollout_batch_size=2 \
+		training.online_steps_between_rollouts=10 \
+		training.online_buffer_capacity=15 \
+		eval.use_async_envs=true \
+		hydra.run.dir=tests/outputs/tdmpc_online/
+

 test-tdmpc-ete-eval:
 	python lerobot/scripts/eval.py \
-		--policy.path=tests/outputs/tdmpc/checkpoints/000002/pretrained_model \
-		--policy.device=$(DEVICE) \
-		--env.type=xarm \
-		--env.episode_length=5 \
-		--env.task=XarmLift-v0 \
-		--eval.n_episodes=1 \
-		--eval.batch_size=1
+		-p tests/outputs/tdmpc/checkpoints/000002/pretrained_model \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		env.episode_length=8 \
+		device=$(DEVICE) \
+
+test-default-ete-eval:
+	python lerobot/scripts/eval.py \
+		--config lerobot/configs/default.yaml \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		env.episode_length=8 \
+		device=$(DEVICE) \
+
+test-act-pusht-tutorial:
+	cp examples/advanced/1_train_act_pusht/act_pusht.yaml lerobot/configs/policy/created_by_Makefile.yaml
+	python lerobot/scripts/train.py \
+		policy=created_by_Makefile.yaml \
+		env=pusht \
+		wandb.enable=False \
+		training.offline_steps=2 \
+		eval.n_episodes=1 \
+		eval.batch_size=1 \
+		env.episode_length=2 \
+		device=$(DEVICE) \
+		training.save_model=true \
+		training.save_freq=2 \
+		training.batch_size=2 \
+		training.image_transforms.enable=true \
+		hydra.run.dir=tests/outputs/act_pusht/
+	rm lerobot/configs/policy/created_by_Makefile.yaml
--- a/README.md
+++ b/README.md
@@ -23,38 +23,15 @@
 </div>

 <h2 align="center">
-    <p><a href="https://github.com/huggingface/lerobot/blob/main/examples/12_use_so101.md">
-        Build Your Own SO-101 Robot!</a></p>
+    <p><a href="https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md">New robot in town: SO-100</a></p>
 </h2>

 <div align="center">
-  <div style="display: flex; gap: 1rem; justify-content: center; align-items: center;" >
-    <img
-      src="media/so101/so101.webp?raw=true"
-      alt="SO-101 follower arm"
-      title="SO-101 follower arm"
-      style="width: 40%;"
-    />
-    <img
-      src="media/so101/so101-leader.webp?raw=true"
-      alt="SO-101 leader arm"
-      title="SO-101 leader arm"
-      style="width: 40%;"
-    />
-  </div>
-
-
-  <p><strong>Meet the updated SO100, the SO-101 – Just €114 per arm!</strong></p>
-  <p>Train it in minutes with a few simple moves on your laptop.</p>
-  <p>Then sit back and watch your creation act autonomously! 🤯</p>
-
-  <p><a href="https://github.com/huggingface/lerobot/blob/main/examples/12_use_so101.md">
-      See the full SO-101 tutorial here.</a></p>
-
-  <p>Want to take it to the next level? Make your SO-101 mobile by building LeKiwi!</p>
-  <p>Check out the <a href="https://github.com/huggingface/lerobot/blob/main/examples/11_use_lekiwi.md">LeKiwi tutorial</a> and bring your robot to life on wheels.</p>
-
-  <img src="media/lekiwi/kiwi.webp?raw=true" alt="LeKiwi mobile robot" title="LeKiwi mobile robot" width="50%">
+    <img src="media/so100/leader_follower.webp?raw=true" alt="SO-100 leader and follower arms" title="SO-100 leader and follower arms" width="50%">
+    <p>We just added a new tutorial on how to build a more affordable robot, at the price of $110 per arm!</p>
+    <p>Teach it new skills by showing it a few moves with just a laptop.</p>
+    <p>Then watch your homemade robot act autonomously 🤯</p>
+    <p>Follow the link to the <a href="https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md">full tutorial for SO-100</a>.</p>
 </div>

 <br/>
@@ -65,6 +42,7 @@

 ---

+
 🤗 LeRobot aims to provide models, datasets, and tools for real-world robotics in PyTorch. The goal is to lower the barrier to entry to robotics so that everyone can contribute and benefit from sharing datasets and pretrained models.

 🤗 LeRobot contains state-of-the-art approaches that have been shown to transfer to the real-world with a focus on imitation learning and reinforcement learning.
@@ -90,7 +68,7 @@

 ### Acknowledgment

- Thanks to Tony Zhao, Zipeng Fu and colleagues for open sourcing ACT policy, ALOHA environments and datasets. Ours are adapted from [ALOHA](https://tonyzhaozh.github.io/aloha) and [Mobile ALOHA](https://mobile-aloha.github.io).
+- Thanks to Tony Zaho, Zipeng Fu and colleagues for open sourcing ACT policy, ALOHA environments and datasets. Ours are adapted from [ALOHA](https://tonyzhaozh.github.io/aloha) and [Mobile ALOHA](https://mobile-aloha.github.io).
 - Thanks to Cheng Chi, Zhenjia Xu and colleagues for open sourcing Diffusion policy, Pusht environment and datasets, as well as UMI datasets. Ours are adapted from [Diffusion Policy](https://diffusion-policy.cs.columbia.edu) and [UMI Gripper](https://umi-gripper.github.io).
 - Thanks to Nicklas Hansen, Yunhai Feng and colleagues for open sourcing TDMPC policy, Simxarm environments and datasets. Ours are adapted from [TDMPC](https://github.com/nicklashansen/tdmpc) and [FOWM](https://www.yunhaifeng.com/FOWM).
 - Thanks to Antonio Loquercio and Ashish Kumar for their early support.
@@ -111,25 +89,14 @@ conda create -y -n lerobot python=3.10
 conda activate lerobot
 ```

-When using `miniconda`, install `ffmpeg` in your environment:
-```bash
-conda install ffmpeg -c conda-forge
-```
-
-> **NOTE:** This usually installs `ffmpeg 7.X` for your platform compiled with the `libsvtav1` encoder. If `libsvtav1` is not supported (check supported encoders with `ffmpeg -encoders`), you can:
->  - _[On any platform]_ Explicitly install `ffmpeg 7.X` using:
->  ```bash
->  conda install ffmpeg=7.1.1 -c conda-forge
->  ```
->  - _[On Linux only]_ Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.
-
 Install 🤗 LeRobot:
 ```bash
 pip install -e .
 ```

-> **NOTE:** If you encounter build errors, you may need to install additional dependencies (`cmake`, `build-essential`, and `ffmpeg libs`). On Linux, run:
-`sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev pkg-config`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg)
+> **NOTE:** Depending on your platform, If you encounter any build errors during this step
+you may need to install `cmake` and `build-essential` for building some of our dependencies.
+On linux: `sudo apt-get install cmake build-essential`

 For simulations, 🤗 LeRobot comes with gymnasium environments that can be installed as extras:
 - [aloha](https://github.com/huggingface/gym-aloha)
@@ -155,7 +122,10 @@ wandb login
 ├── examples             # contains demonstration examples, start here to learn about LeRobot
 |   └── advanced         # contains even more examples for those who have mastered the basics
 ├── lerobot
-|   ├── configs          # contains config classes with all options that you can override in the command line
+|   ├── configs          # contains hydra yaml files with all options that you can override in the command line
+|   |   ├── default.yaml   # selected by default, it loads pusht environment and diffusion policy
+|   |   ├── env            # various sim environments and their datasets: aloha.yaml, pusht.yaml, xarm.yaml
+|   |   └── policy         # various policies: act.yaml, diffusion.yaml, tdmpc.yaml
 |   ├── common           # contains classes and utilities
 |   |   ├── datasets       # various datasets of human demonstrations: aloha, pusht, xarm
 |   |   ├── envs           # various sim environments: aloha, pusht, xarm
@@ -183,12 +153,10 @@ python lerobot/scripts/visualize_dataset.py \
    --episode-index 0
 ```

-or from a dataset in a local folder with the `root` option and the `--local-files-only` (in the following case the dataset will be searched for in `./my_local_data_dir/lerobot/pusht`)
+or from a dataset in a local folder with the root `DATA_DIR` environment variable (in the following case the dataset will be searched for in `./my_local_data_dir/lerobot/pusht`)
 ```bash
-python lerobot/scripts/visualize_dataset.py \
+DATA_DIR='./my_local_data_dir' python lerobot/scripts/visualize_dataset.py \
    --repo-id lerobot/pusht \
-    --root ./my_local_data_dir \
-    --local-files-only 1 \
    --episode-index 0
 ```

@@ -221,7 +189,7 @@ dataset attributes:
  │  ├ episode_index (int64): index of the episode for this sample
  │  ├ frame_index (int64): index of the frame for this sample in the episode ; starts at 0 for each episode
  │  ├ timestamp (float32): timestamp in the episode
-  │  ├ next.done (bool): indicates the end of an episode ; True for the last frame in each episode
+  │  ├ next.done (bool): indicates the end of en episode ; True for the last frame in each episode
  │  └ index (int64): general index in the whole dataset
  ├ episode_data_index: contains 2 tensors with the start and end indices of each episode
  │  ├ from (1D int64 tensor): first frame index for each episode — shape (num episodes,) starts with 0
@@ -240,10 +208,12 @@ dataset attributes:

 A `LeRobotDataset` is serialised using several widespread file formats for each of its parts, namely:
 - hf_dataset stored using Hugging Face datasets library serialization to parquet
- videos are stored in mp4 format to save space
- metadata are stored in plain json/jsonl files
+- videos are stored in mp4 format to save space or png files
+- episode_data_index saved using `safetensor` tensor serialization format
+- stats saved using `safetensor` tensor serialization format
+- info are saved using JSON

-Dataset can be uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can specify its location with the `root` argument if it's not in the default `~/.cache/huggingface/lerobot` location.
+Dataset can be uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can set the `DATA_DIR` environment variable to your root dataset folder as illustrated in the above section on dataset visualization.

 ### Evaluate a pretrained policy

@@ -252,18 +222,15 @@ Check out [example 2](./examples/2_evaluate_pretrained_policy.py) that illustrat
 We also provide a more capable script to parallelize the evaluation over multiple environments during the same rollout. Here is an example with a pretrained model hosted on [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht):
 ```bash
 python lerobot/scripts/eval.py \
-    --policy.path=lerobot/diffusion_pusht \
-    --env.type=pusht \
-    --eval.batch_size=10 \
-    --eval.n_episodes=10 \
-    --policy.use_amp=false \
-    --policy.device=cuda
+    -p lerobot/diffusion_pusht \
+    eval.n_episodes=10 \
+    eval.batch_size=10
 ```

 Note: After training your own policy, you can re-evaluate the checkpoints with:

 ```bash
-python lerobot/scripts/eval.py --policy.path={OUTPUT_DIR}/checkpoints/last/pretrained_model
+python lerobot/scripts/eval.py -p {OUTPUT_DIR}/checkpoints/last/pretrained_model
 ```

 See `python lerobot/scripts/eval.py --help` for more instructions.
@@ -272,28 +239,70 @@ See `python lerobot/scripts/eval.py --help` for more instructions.

 Check out [example 3](./examples/3_train_policy.py) that illustrates how to train a model using our core library in python, and [example 4](./examples/4_train_policy_with_script.md) that shows how to use our training script from command line.

-To use wandb for logging training and evaluation curves, make sure you've run `wandb login` as a one-time setup step. Then, when running the training command above, enable WandB in the configuration by adding `--wandb.enable=true`.
+In general, you can use our training script to easily train any policy. Here is an example of training the ACT policy on trajectories collected by humans on the Aloha simulation environment for the insertion task:

-A link to the wandb logs for the run will also show up in yellow in your terminal. Here is an example of what they look like in your browser. Please also check [here](./examples/4_train_policy_with_script.md#typical-logs-and-metrics) for the explanation of some commonly used metrics in logs.
+```bash
+python lerobot/scripts/train.py \
+    policy=act \
+    env=aloha \
+    env.task=AlohaInsertion-v0 \
+    dataset_repo_id=lerobot/aloha_sim_insertion_human \
+```
+
+The experiment directory is automatically generated and will show up in yellow in your terminal. It looks like `outputs/train/2024-05-05/20-21-12_aloha_act_default`. You can manually specify an experiment directory by adding this argument to the `train.py` python command:
+```bash
+    hydra.run.dir=your/new/experiment/dir
+```
+
+In the experiment directory there will be a folder called `checkpoints` which will have the following structure:
+
+```bash
+checkpoints
+├── 000250  # checkpoint_dir for training step 250
+│   ├── pretrained_model  # Hugging Face pretrained model dir
+│   │   ├── config.json  # Hugging Face pretrained model config
+│   │   ├── config.yaml  # consolidated Hydra config
+│   │   ├── model.safetensors  # model weights
+│   │   └── README.md  # Hugging Face model card
+│   └── training_state.pth  # optimizer/scheduler/rng state and training step
+```
+
+To resume training from a checkpoint, you can add these to the `train.py` python command:
+```bash
+    hydra.run.dir=your/original/experiment/dir resume=true
+```
+
+It will load the pretrained model, optimizer and scheduler states for training. For more information please see our tutorial on training resumption [here](https://github.com/huggingface/lerobot/blob/main/examples/5_resume_training.md).
+
+To use wandb for logging training and evaluation curves, make sure you've run `wandb login` as a one-time setup step. Then, when running the training command above, enable WandB in the configuration by adding:
+
+```bash
+    wandb.enable=true
+```
+
+A link to the wandb logs for the run will also show up in yellow in your terminal. Here is an example of what they look like in your browser. Please also check [here](https://github.com/huggingface/lerobot/blob/main/examples/4_train_policy_with_script.md#typical-logs-and-metrics) for the explanation of some commonly used metrics in logs.

 ![](media/wandb.png)

-Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `--eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python lerobot/scripts/eval.py --help` for more instructions.
+Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python lerobot/scripts/eval.py --help` for more instructions.

 #### Reproduce state-of-the-art (SOTA)

-We provide some pretrained policies on our [hub page](https://huggingface.co/lerobot) that can achieve state-of-the-art performances.
-You can reproduce their training by loading the config from their run. Simply running:
+We have organized our configuration files (found under [`lerobot/configs`](./lerobot/configs)) such that they reproduce SOTA results from a given model variant in their respective original works. Simply running:
+
 ```bash
-python lerobot/scripts/train.py --config_path=lerobot/diffusion_pusht
+python lerobot/scripts/train.py policy=diffusion env=pusht
 ```
+
 reproduces SOTA results for Diffusion Policy on the PushT task.

+Pretrained policies, along with reproduction details, can be found under the "Models" section of https://huggingface.co/lerobot.
+
 ## Contribute

 If you would like to contribute to 🤗 LeRobot, please check out our [contribution guide](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md).

-<!-- ### Add a new dataset
+### Add a new dataset

 To add a dataset to the hub, you need to login using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
 ```bash
@@ -311,7 +320,7 @@ python lerobot/scripts/push_dataset_to_hub.py \

 See `python lerobot/scripts/push_dataset_to_hub.py --help` for more instructions.

-If your dataset format is not supported, implement your own in `lerobot/common/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py). -->
+If your dataset format is not supported, implement your own in `lerobot/common/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py).


 ### Add a pretrained policy
@@ -321,7 +330,7 @@ Once you have trained a policy you may upload it to the Hugging Face hub using a
 You first need to find the checkpoint folder located inside your experiment directory (e.g. `outputs/train/2024-05-05/20-21-12_aloha_act_default/checkpoints/002500`). Within that there is a `pretrained_model` directory which should contain:
 - `config.json`: A serialized version of the policy configuration (following the policy's dataclass config).
 - `model.safetensors`: A set of `torch.nn.Module` parameters, saved in [Hugging Face Safetensors](https://huggingface.co/docs/safetensors/index) format.
- `train_config.json`: A consolidated configuration containing all parameters used for training. The policy configuration should match `config.json` exactly. This is useful for anyone who wants to evaluate your policy or for reproducibility.
+- `config.yaml`: A consolidated Hydra training configuration containing the policy, environment, and dataset configs. The policy configuration should match `config.json` exactly. The environment config is useful for anyone who wants to evaluate your policy. The dataset config just serves as a paper trail for reproducibility.

 To upload these to the hub, run the following:
 ```bash
@@ -360,7 +369,7 @@ with profile(
 If you want, you can cite this work with:
 ```bibtex
@misc{cadene2024lerobot,
-    author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Gallouedec, Quentin and Zouitine, Adil and Palma, Steven and Kooijmans, Pepijn and Aractingi, Michel and Shukor, Mustafa and Aubakirova, Dana and Russi, Martino and Capuano, Francesco and Pascale, Caroline and Choghari, Jade and Moss, Jess and Wolf, Thomas},
+    author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Gallouedec, Quentin and Zouitine, Adil and Wolf, Thomas},
    title = {LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch},
    howpublished = "\url{https://github.com/huggingface/lerobot}",
    year = {2024}
@@ -408,6 +417,3 @@ Additionally, if you are using any of the particular policy architecture, pretra
  year={2024}
 }
 ```
-## Star History
-
-[![Star History Chart](https://api.star-history.com/svg?repos=huggingface/lerobot&type=Timeline)](https://star-history.com/#huggingface/lerobot&Timeline)
--- a/benchmarks/video/README.md
+++ b/benchmarks/video/README.md
@@ -21,7 +21,7 @@ How to decode videos?

 ## Variables
 **Image content & size**
-We don't expect the same optimal settings for a dataset of images from a simulation, or from real-world in an apartment, or in a factory, or outdoor, or with lots of moving objects in the scene, etc. Similarly, loading times might not vary linearly with the image size (resolution).
+We don't expect the same optimal settings for a dataset of images from a simulation, or from real-world in an appartment, or in a factory, or outdoor, or with lots of moving objects in the scene, etc. Similarly, loading times might not vary linearly with the image size (resolution).
 For these reasons, we run this benchmark on four representative datasets:
 - `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera.
 - `aliberts/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
@@ -51,7 +51,7 @@ For a comprehensive list and documentation of these parameters, see the ffmpeg d
 ### Decoding parameters
 **Decoder**
 We tested two video decoding backends from torchvision:
- `pyav`
+- `pyav` (default)
 - `video_reader` (requires to build torchvision from source)

 **Requested timestamps**
@@ -63,7 +63,7 @@ This of course is affected by the `-g` parameter during encoding, which specifie

 Note that this differs significantly from a typical use case like watching a movie, in which every frame is loaded sequentially from the beginning to the end and it's acceptable to have big values for `-g`.

-Additionally, because some policies might request single timestamps that are a few frames apart, we also have the following scenario:
+Additionally, because some policies might request single timestamps that are a few frames appart, we also have the following scenario:
 - `2_frames_4_space`: 2 frames with 4 consecutive frames of spacing in between (e.g `[t, t + 5 / fps]`),

 However, due to how video decoding is implemented with `pyav`, we don't have access to an accurate seek so in practice this scenario is essentially the same as `6_frames` since all 6 frames between `t` and `t + 5 / fps` will be decoded.
@@ -85,8 +85,8 @@ However, due to how video decoding is implemented with `pyav`, we don't have acc
 **Average Structural Similarity Index Measure (higher is better)**
 `avg_ssim` evaluates the perceived quality of images by comparing luminance, contrast, and structure. SSIM values range from -1 to 1, where 1 indicates perfect similarity.

-One aspect that can't be measured here with those metrics is the compatibility of the encoding across platforms, in particular on web browser, for visualization purposes.
-h264, h265 and AV1 are all commonly used codecs and should not pose an issue. However, the chroma subsampling (`pix_fmt`) format might affect compatibility:
+One aspect that can't be measured here with those metrics is the compatibility of the encoding accross platforms, in particular on web browser, for visualization purposes.
+h264, h265 and AV1 are all commonly used codecs and should not be pose an issue. However, the chroma subsampling (`pix_fmt`) format might affect compatibility:
 - `yuv420p` is more widely supported across various platforms, including web browsers.
 - `yuv444p` offers higher color fidelity but might not be supported as broadly.

@@ -114,9 +114,9 @@ We tried to measure the most impactful parameters for both encoding and decoding

 Additional encoding parameters exist that are not included in this benchmark. In particular:
 - `-preset` which allows for selecting encoding presets. This represents a collection of options that will provide a certain encoding speed to compression ratio. By leaving this parameter unspecified, it is considered to be `medium` for libx264 and libx265 and `8` for libsvtav1.
- `-tune` which allows to optimize the encoding for certain aspects (e.g. film quality, fast decoding, etc.).
+- `-tune` which allows to optimize the encoding for certains aspects (e.g. film quality, fast decoding, etc.).

-See the documentation mentioned above for more detailed info on these settings and for a more comprehensive list of other parameters.
+See the documentation mentioned above for more detailled info on these settings and for a more comprehensive list of other parameters.

 Similarly on the decoding side, other decoders exist but are not implemented in our current benchmark. To name a few:
 - `torchaudio`
--- a/benchmarks/video/capture_camera_feed.py
+++ b/benchmarks/video/capture_camera_feed.py
@@ -17,21 +17,12 @@

 import argparse
 import datetime as dt
-import os
-import time
 from pathlib import Path

 import cv2
-import rerun as rr
-
-# see https://rerun.io/docs/howto/visualization/limit-ram
-RERUN_MEMORY_LIMIT = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "5%")


-def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height: int, duration: int):
-    rr.init("lerobot_capture_camera_feed")
-    rr.spawn(memory_limit=RERUN_MEMORY_LIMIT)
-
+def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height: int):
    now = dt.datetime.now()
    capture_dir = output_dir / f"{now:%Y-%m-%d}" / f"{now:%H-%M-%S}"
    if not capture_dir.exists():
@@ -48,21 +39,24 @@ def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

    frame_index = 0
-    start_time = time.time()
-    while time.time() - start_time < duration:
+    while True:
        ret, frame = cap.read()

        if not ret:
            print("Error: Could not read frame.")
            break
-        rr.log("video/stream", rr.Image(frame.numpy()), static=True)
+
+        cv2.imshow("Video Stream", frame)
        cv2.imwrite(str(capture_dir / f"frame_{frame_index:06d}.png"), frame)
        frame_index += 1

-    # Release the capture
-    cap.release()
+        # Break the loop on 'q' key press
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break

-    # TODO(Steven): Add a graceful shutdown via a close() method for the Viewer context, though not currently supported in the Rerun API.
+    # Release the capture and destroy all windows
+    cap.release()
+    cv2.destroyAllWindows()


 if __name__ == "__main__":
@@ -92,11 +86,5 @@ if __name__ == "__main__":
        default=720,
        help="Height of the captured images.",
    )
-    parser.add_argument(
-        "--duration",
-        type=int,
-        default=20,
-        help="Duration in seconds for which the video stream should be captured.",
-    )
    args = parser.parse_args()
    display_and_save_video_stream(**vars(args))
--- a/benchmarks/video/run_video_benchmark.py
+++ b/benchmarks/video/run_video_benchmark.py
@@ -67,7 +67,7 @@ def parse_int_or_none(value) -> int | None:
 def check_datasets_formats(repo_ids: list) -> None:
    for repo_id in repo_ids:
        dataset = LeRobotDataset(repo_id)
-        if len(dataset.meta.video_keys) > 0:
+        if dataset.video:
            raise ValueError(
                f"Use only image dataset for running this benchmark. Video dataset provided: {repo_id}"
            )
@@ -266,7 +266,7 @@ def benchmark_encoding_decoding(
        )

    ep_num_images = dataset.episode_data_index["to"][0].item()
-    width, height = tuple(dataset[0][dataset.meta.camera_keys[0]].shape[-2:])
+    width, height = tuple(dataset[0][dataset.camera_keys[0]].shape[-2:])
    num_pixels = width * height
    video_size_bytes = video_path.stat().st_size
    images_size_bytes = get_directory_size(imgs_dir)
@@ -416,7 +416,7 @@ if __name__ == "__main__":
        "--vcodec",
        type=str,
        nargs="*",
-        default=["libx264", "hevc", "libsvtav1"],
+        default=["libx264", "libx265", "libsvtav1"],
        help="Video codecs to be tested",
    )
    parser.add_argument(
@@ -446,7 +446,7 @@ if __name__ == "__main__":
    #     nargs="*",
    #     default=[0, 1],
    #     help="Use the fastdecode tuning option. 0 disables it. "
-    #         "For libx264 and libx265/hevc, only 1 is possible. "
+    #         "For libx264 and libx265, only 1 is possible. "
    #         "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
    # )
    parser.add_argument(
--- a/docker/lerobot-cpu/Dockerfile
+++ b/docker/lerobot-cpu/Dockerfile
@@ -1,29 +1,32 @@
 # Configure image
 ARG PYTHON_VERSION=3.10
+
 FROM python:${PYTHON_VERSION}-slim
-
-# Configure environment variables
 ARG PYTHON_VERSION
-ENV DEBIAN_FRONTEND=noninteractive
-ENV MUJOCO_GL="egl"
-ENV PATH="/opt/venv/bin:$PATH"
+ARG DEBIAN_FRONTEND=noninteractive

-# Install dependencies and set up Python in a single layer
+# Install apt dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential cmake git \
+    build-essential cmake \
    libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
-    speech-dispatcher libgeos-dev \
-    && ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
-    && python -m venv /opt/venv \
-    && apt-get clean && rm -rf /var/lib/apt/lists/* \
-    && echo "source /opt/venv/bin/activate" >> /root/.bashrc
+    speech-dispatcher \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*

-# Clone repository and install LeRobot in a single layer
+# Create virtual environment
+RUN ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+RUN echo "source /opt/venv/bin/activate" >> /root/.bashrc
+
+# Install LeRobot
 COPY . /lerobot
 WORKDIR /lerobot
-RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \
-    && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht]" \
-        --extra-index-url https://download.pytorch.org/whl/cpu
+RUN pip install --upgrade --no-cache-dir pip
+RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]" \
+    --extra-index-url https://download.pytorch.org/whl/cpu
+
+# Set EGL as the rendering backend for MuJoCo
+ENV MUJOCO_GL="egl"

 # Execute in bash shell rather than python
 CMD ["/bin/bash"]
--- a/docker/lerobot-gpu-dev/Dockerfile
+++ b/docker/lerobot-gpu-dev/Dockerfile
@@ -13,8 +13,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    sed gawk grep curl wget zip unzip \
    tcpdump sysstat screen tmux \
    libglib2.0-0 libgl1-mesa-glx libegl1-mesa \
-    speech-dispatcher portaudio19-dev libgeos-dev \
-    python${PYTHON_VERSION} python${PYTHON_VERSION}-venv python${PYTHON_VERSION}-dev \
+    speech-dispatcher \
+    python${PYTHON_VERSION} python${PYTHON_VERSION}-venv \
    && apt-get clean && rm -rf /var/lib/apt/lists/*

 # Install ffmpeg build dependencies. See:
--- a/docker/lerobot-gpu/Dockerfile
+++ b/docker/lerobot-gpu/Dockerfile
@@ -1,24 +1,30 @@
 FROM nvidia/cuda:12.4.1-base-ubuntu22.04

-# Configure environment variables
+# Configure image
 ARG PYTHON_VERSION=3.10
-ENV DEBIAN_FRONTEND=noninteractive
-ENV MUJOCO_GL="egl"
-ENV PATH="/opt/venv/bin:$PATH"
+ARG DEBIAN_FRONTEND=noninteractive

-# Install dependencies and set up Python in a single layer
+
+# Install apt dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential cmake git \
+    build-essential cmake \
    libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
-    speech-dispatcher libgeos-dev \
+    speech-dispatcher \
    python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
-    && ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
-    && python -m venv /opt/venv \
-    && apt-get clean && rm -rf /var/lib/apt/lists/* \
-    && echo "source /opt/venv/bin/activate" >> /root/.bashrc
+    && apt-get clean && rm -rf /var/lib/apt/lists/*

-# Clone repository and install LeRobot in a single layer
+
+# Create virtual environment
+RUN ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+RUN echo "source /opt/venv/bin/activate" >> /root/.bashrc
+
+# Install LeRobot
 COPY . /lerobot
 WORKDIR /lerobot
-RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \
-    && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]"
+RUN pip install --upgrade --no-cache-dir pip
+RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]"
+
+# Set EGL as the rendering backend for MuJoCo
+ENV MUJOCO_GL="egl"
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,137 +0,0 @@
-<!---
-Copyright 2020 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-->
-
-# Generating the documentation
-
-To generate the documentation, you first have to build it. Several packages are necessary to build the doc,
-you can install them with the following command, at the root of the code repository:
-
-```bash
-pip install -e ".[docs]"
-```
-
-You will also need `nodejs`. Please refer to their [installation page](https://nodejs.org/en/download)
-
---
-**NOTE**
-
-You only need to generate the documentation to inspect it locally (if you're planning changes and want to
-check how they look before committing for instance). You don't have to `git commit` the built documentation.
-
---
-
-## Building the documentation
-
-Once you have setup the `doc-builder` and additional packages, you can generate the documentation by
-typing the following command:
-
-```bash
-doc-builder build lerobot docs/source/ --build_dir ~/tmp/test-build
-```
-
-You can adapt the `--build_dir` to set any temporary folder that you prefer. This command will create it and generate
-the MDX files that will be rendered as the documentation on the main website. You can inspect them in your favorite
-Markdown editor.
-
-## Previewing the documentation
-
-To preview the docs, first install the `watchdog` module with:
-
-```bash
-pip install watchdog
-```
-
-Then run the following command:
-
-```bash
-doc-builder preview lerobot docs/source/
-```
-
-The docs will be viewable at [http://localhost:3000](http://localhost:3000). You can also preview the docs once you have opened a PR. You will see a bot add a comment to a link where the documentation with your changes lives.
-
---
-**NOTE**
-
-The `preview` command only works with existing doc files. When you add a completely new file, you need to update `_toctree.yml` & restart `preview` command (`ctrl-c` to stop it & call `doc-builder preview ...` again).
-
---
-
-## Adding a new element to the navigation bar
-
-Accepted files are Markdown (.md).
-
-Create a file with its extension and put it in the source directory. You can then link it to the toc-tree by putting
-the filename without the extension in the [`_toctree.yml`](https://github.com/huggingface/lerobot/blob/main/docs/source/_toctree.yml) file.
-
-## Renaming section headers and moving sections
-
-It helps to keep the old links working when renaming the section header and/or moving sections from one document to another. This is because the old links are likely to be used in Issues, Forums, and Social media and it'd make for a much more superior user experience if users reading those months later could still easily navigate to the originally intended information.
-
-Therefore, we simply keep a little map of moved sections at the end of the document where the original section was. The key is to preserve the original anchor.
-
-So if you renamed a section from: "Section A" to "Section B", then you can add at the end of the file:
-
-```
-Sections that were moved:
-
-[ <a href="#section-b">Section A</a><a id="section-a"></a> ]
-```
-and of course, if you moved it to another file, then:
-
-```
-Sections that were moved:
-
-[ <a href="../new-file#section-b">Section A</a><a id="section-a"></a> ]
-```
-
-Use the relative style to link to the new file so that the versioned docs continue to work.
-
-For an example of a rich moved sections set please see the very end of [the transformers Trainer doc](https://github.com/huggingface/transformers/blob/main/docs/source/en/main_classes/trainer.md).
-
-### Adding a new tutorial
-
-Adding a new tutorial or section is done in two steps:
-
- Add a new file under `./source`. This file can either be ReStructuredText (.rst) or Markdown (.md).
- Link that file in `./source/_toctree.yml` on the correct toc-tree.
-
-Make sure to put your new file under the proper section. If you have a doubt, feel free to ask in a Github Issue or PR.
-
-### Writing source documentation
-
-Values that should be put in `code` should either be surrounded by backticks: \`like so\`. Note that argument names
-and objects like True, None or any strings should usually be put in `code`.
-
-#### Writing a multi-line code block
-
-Multi-line code blocks can be useful for displaying examples. They are done between two lines of three backticks as usual in Markdown:
-
-
-````
-```
-# first line of code
-# second line
-# etc
-```
-````
-
-#### Adding an image
-
-Due to the rapidly growing repository, it is important to make sure that no files that would significantly weigh down the repository are added. This includes images, videos, and other non-text files. We prefer to leverage a hf.co hosted `dataset` like
-the ones hosted on [`hf-internal-testing`](https://huggingface.co/hf-internal-testing) in which to place these files and reference
-them by URL. We recommend putting them in the following dataset: [huggingface/documentation-images](https://huggingface.co/datasets/huggingface/documentation-images).
-If an external contribution, feel free to add the images to your PR and ask a Hugging Face member to migrate your images
-to this dataset.
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -1,26 +0,0 @@
- sections:
-  - local: index
-    title: LeRobot
-  - local: installation
-    title: Installation
-  title: Get started
- sections:
-  - local: getting_started_real_world_robot
-    title: Getting Started with Real-World Robots
-  - local: cameras
-    title: Cameras
-  title: "Tutorials"
- sections:
-  - local: so101
-    title: SO-101
-  - local: so100
-    title: SO-100
-  - local: koch
-    title: Koch v1.1
-  - local: lekiwi
-    title: LeKiwi
-  title: "Robots"
- sections:
-  - local: contributing
-    title: Contribute to LeRobot
-  title: "Contribute"
--- a/docs/source/cameras.mdx
+++ b/docs/source/cameras.mdx
@@ -1,173 +0,0 @@
-# Cameras
-
-LeRobot offers multiple options for video capture, including phone cameras, built-in laptop cameras, external webcams, and Intel RealSense cameras. To efficiently record frames from most cameras, you can use either the `OpenCVCamera` or `RealSenseCamera` class. For additional compatibility details on the `OpenCVCamera` class, refer to the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
-
-### Finding your camera
-
-To instantiate a camera, you need a camera identifier. This identifier might change if you reboot your computer or re-plug your camera, a behavior mostly dependant on your operating system.
-
-To find the camera indices of the cameras plugged into your system, run the following script:
-```bash
-python lerobot/find_cameras.py opencv # or realsense for Intel Realsense cameras
-```
-
-The output will look something like this if you have two cameras connected:
-```
--- Detected Cameras ---
-Camera #0:
-  Name: OpenCV Camera @ 0
-  Type: OpenCV
-  Id: 0
-  Backend api: AVFOUNDATION
-  Default stream profile:
-    Format: 16.0
-    Width: 1920
-    Height: 1080
-    Fps: 15.0
--------------------
-(more cameras ...)
-```
-
-> [!WARNING]
-> When using Intel RealSense cameras in `macOS`, you could get this [error](https://github.com/IntelRealSense/librealsense/issues/12307): `Error finding RealSense cameras: failed to set power state`, this can be solved by running the same command with `sudo` permissions. Note that using RealSense cameras in `macOS` is unstable.
-
-
-## Use Cameras
-
-Below are two examples, demonstrating how to work with the API.
-
- **Asynchronous frame capture** using an OpenCV-based camera
- **Color and depth capture** using an Intel RealSense camera
-
-
-<hfoptions id="shell_restart">
-<hfoption id="Open CV Camera">
-
-```python
-from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.common.cameras.opencv.camera_opencv import OpenCVCamera
-from lerobot.common.cameras.configs import ColorMode, Cv2Rotation
-
-# Construct an `OpenCVCameraConfig` with your desired FPS, resolution, color mode, and rotation.
-config = OpenCVCameraConfig(
-    index_or_path=0,
-    fps=15,
-    width=1920,
-    height=1080,
-    color_mode=ColorMode.RGB,
-    rotation=Cv2Rotation.NO_ROTATION
-)
-
-# Instantiate and connect an `OpenCVCamera`, performing a warm-up read (default).
-camera = OpenCVCamera(config)
-camera.connect()
-
-# Read frames asynchronously in a loop via `async_read(timeout_ms)`
-try:
-    for i in range(10):
-        frame = camera.async_read(timeout_ms=200)
-        print(f"Async frame {i} shape:", frame.shape)
-finally:
-    camera.disconnect()
-```
-
-</hfoption>
-<hfoption id="Intel Realsense Camera">
-
-```python
-from lerobot.common.cameras.intel.configuration_realsense import RealSenseCameraConfig
-from lerobot.common.cameras.intel.camera_realsense import RealSenseCamera
-from lerobot.common.cameras.configs import ColorMode, Cv2Rotation
-
-# Create a `RealSenseCameraConfig` specifying your camera’s serial number and enabling depth.
-config = RealSenseCameraConfig(
-    serial_number="233522074606",
-    fps=15,
-    width=640,
-    height=480,
-    color_mode=ColorMode.RGB,
-    use_depth=True,
-    rotation=Cv2Rotation.NO_ROTATION
-)
-
-# Instantiate and connect a `RealSenseCamera` with warm-up read (default).
-camera = RealSenseCamera(config)
-camera.connect()
-
-# Capture a color frame via `read()` and a depth map via `read_depth()`.
-try:
-    color_frame = camera.read()
-    depth_map = camera.read_depth()
-    print("Color frame shape:", color_frame.shape)
-    print("Depth map shape:", depth_map.shape)
-finally:
-    camera.disconnect()
-```
-</hfoption>
-</hfoptions>
-
-
-## Use your phone
-<hfoptions id="use phone">
-<hfoption id="Mac">
-
-To use your iPhone as a camera on macOS, enable the Continuity Camera feature:
- Ensure your Mac is running macOS 13 or later, and your iPhone is on iOS 16 or later.
- Sign in both devices with the same Apple ID.
- Connect your devices with a USB cable or turn on Wi-Fi and Bluetooth for a wireless connection.
-
-For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac).
-
-Your iPhone should be detected automatically when running the camera setup script in the next section.
-
-</hfoption>
-<hfoption id="Linux">
-
-If you want to use your phone as a camera on Linux, follow these steps to set up a virtual camera
-
-1. *Install `v4l2loopback-dkms` and `v4l-utils`*. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using:
-```python
-sudo apt install v4l2loopback-dkms v4l-utils
-```
-2. *Install [DroidCam](https://droidcam.app) on your phone*. This app is available for both iOS and Android.
-3. *Install [OBS Studio](https://obsproject.com)*. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org):
-```python
-flatpak install flathub com.obsproject.Studio
-```
-4. *Install the DroidCam OBS plugin*. This plugin integrates DroidCam with OBS Studio. Install it with:
-```python
-flatpak install flathub com.obsproject.Studio.Plugin.DroidCam
-```
-5. *Start OBS Studio*. Launch with:
-```python
-flatpak run com.obsproject.Studio
-```
-6. *Add your phone as a source*. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`.
-7. *Adjust resolution settings*. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in.
-8. *Start virtual camera*. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide).
-9. *Verify the virtual camera setup*. Use `v4l2-ctl` to list the devices:
-```python
-v4l2-ctl --list-devices
-```
-You should see an entry like:
-```
-VirtualCam (platform:v4l2loopback-000):
-/dev/video1
-```
-10. *Check the camera resolution*. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`.
-```python
-v4l2-ctl -d /dev/video1 --get-fmt-video
-```
-You should see an entry like:
-```
->>> Format Video Capture:
->>>	Width/Height      : 640/480
->>>	Pixel Format      : 'YUYV' (YUYV 4:2:2)
-```
-
-Troubleshooting: If the resolution is not correct you will have to delete the Virtual Camera port and try again as it cannot be changed.
-
-If everything is set up correctly, you can proceed with the rest of the tutorial.
-
-</hfoption>
-</hfoptions>
--- a/docs/source/contributing.md
+++ b/docs/source/contributing.md
@@ -1 +0,0 @@
-../../CONTRIBUTING.md
--- a/docs/source/getting_started_real_world_robot.mdx
+++ b/docs/source/getting_started_real_world_robot.mdx
@@ -1,311 +0,0 @@
-# Getting Started with Real-World Robots
-
-This tutorial will explain how to train a neural network to control a real robot autonomously.
-
-**You'll learn:**
-1. How to record and visualize your dataset.
-2. How to train a policy using your data and prepare it for evaluation.
-3. How to evaluate your policy and visualize the results.
-
-By following these steps, you'll be able to replicate tasks, such as picking up a Lego block and placing it in a bin with a high success rate, as shown in the video below.
-
-<details>
-<summary><strong>Video: pickup lego block task</strong></summary>
-
-<div class="video-container">
-  <video controls width="600">
-    <source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/lerobot_task.mp4" type="video/mp4" />
-  </video>
-</div>
-
-</details>
-
-This tutorial isn’t tied to a specific robot: we walk you through the commands and API snippets you can adapt for any supported platform.
-
-During data collection, you’ll use a “teloperation” device, such as a leader arm or keyboard to teleoperate the robot and record its motion trajectories.
-
-Once you’ve gathered enough trajectories, you’ll train a neural network to imitate these trajectories and deploy the trained model so your robot can perform the task autonomously.
-
-If you run into any issues at any point, jump into our [Discord community](https://discord.com/invite/s3KuuzsPFb) for support.
-
-## Set up and Calibrate
-
-If you haven't yet set up and calibrated your robot and teleop device, please do so by following the robot-specific tutorial.
-
-## Teleoperate
-
-In this example, we’ll demonstrate how to teleoperate the SO101 robot. For each command, we also provide a corresponding API example.
-
-Note that the `id` associated with a robot is used to store the calibration file. It's important to use the same `id` when teleoperating, recording, and evaluating when using the same setup.
-
-<hfoptions id="teleoperate_so101">
-<hfoption id="Command">
-```bash
-python -m lerobot.teleoperate \
-    --robot.type=so101_follower \
-    --robot.port=/dev/tty.usbmodem58760431541 \
-    --robot.id=my_awesome_follower_arm \
-    --teleop.type=so101_leader \
-    --teleop.port=/dev/tty.usbmodem58760431551 \
-    --teleop.id=my_awesome_leader_arm
-```
-</hfoption>
-<hfoption id="API example">
-```python
-from lerobot.common.teleoperators.so101_leader import SO101LeaderConfig, SO101Leader
-from lerobot.common.robots.so101_follower import SO101FollowerConfig, SO101Follower
-
-robot_config = SO101FollowerConfig(
-    port="/dev/tty.usbmodem58760431541",
-    id="my_red_robot_arm",
-)
-
-teleop_config = SO101LeaderConfig(
-    port="/dev/tty.usbmodem58760431551",
-    id="my_blue_leader_arm",
-)
-
-robot = SO101Follower(robot_config)
-teleop_device = SO101Leader(teleop_config)
-robot.connect()
-teleop_device.connect()
-
-while True:
-    action = teleop_device.get_action()
-    robot.send_action(action)
-```
-</hfoption>
-</hfoptions>
-
-The teleoperate command will automatically:
-1. Identify any missing calibrations and initiate the calibration procedure.
-2. Connect the robot and teleop device and start teleoperation.
-
-## Cameras
-
-To add cameras to your setup, follow this [Guide](./cameras#setup-cameras).
-
-## Teleoperate with cameras
-
-With `rerun`, you can teleoperate again while simultaneously visualizing the camera feeds and joint positions. In this example, we’re using the Koch arm.
-
-<hfoptions id="teleoperate_koch_camera">
-<hfoption id="Command">
-```bash
-python -m lerobot.teleoperate \
-    --robot.type=koch_follower \
-    --robot.port=/dev/tty.usbmodem58760431541 \
-    --robot.id=my_awesome_follower_arm \
-    --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \
-    --teleop.type=koch_leader \
-    --teleop.port=/dev/tty.usbmodem58760431551 \
-    --teleop.id=my_awesome_leader_arm \
-    --display_data=true
-```
-</hfoption>
-<hfoption id="API example">
-```python
-from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.common.teleoperators.koch_leader import KochLeaderConfig, KochLeader
-from lerobot.common.robots.koch_follower import KochFollowerConfig, KochFollower
-
-camera_config = {
-    "front": OpenCVCameraConfig(index_or_path=0, width=1920, height=1080, fps=30)
-}
-
-robot_config = KochFollowerConfig(
-    port="/dev/tty.usbmodem585A0076841",
-    id="my_red_robot_arm",
-    cameras=camera_config
-)
-
-teleop_config = KochLeaderConfig(
-    port="/dev/tty.usbmodem58760431551",
-    id="my_blue_leader_arm",
-)
-
-robot = KochFollower(robot_config)
-teleop_device = KochLeader(teleop_config)
-robot.connect()
-teleop_device.connect()
-
-while True:
-    observation = robot.get_observation()
-    action = teleop_device.get_action()
-    robot.send_action(action)
-```
-</hfoption>
-</hfoptions>
-
-## Record a dataset
-
-Once you're familiar with teleoperation, you can record your first dataset.
-
-We use the Hugging Face hub features for uploading your dataset. If you haven't previously used the Hub, make sure you can login via the cli using a write-access token, this token can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens).
-
-Add your token to the CLI by running this command:
-```bash
-huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
-```
-
-Then store your Hugging Face repository name in a variable:
-```bash
-HF_USER=$(huggingface-cli whoami | head -n 1)
-echo $HF_USER
-```
-
-Now you can record a dataset. To record 2 episodes and upload your dataset to the hub, execute this command tailored to the SO101.
-```bash
-python -m lerobot.record \
-    --robot.type=so101_follower \
-    --robot.port=/dev/tty.usbmodem585A0076841 \
-    --robot.id=my_awesome_follower_arm \
-    --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \
-    --teleop.type=so101_leader \
-    --teleop.port=/dev/tty.usbmodem58760431551 \
-    --teleop.id=my_awesome_leader_arm \
-    --display_data=true \
-    --dataset.repo_id=${HF_USER}/record-test \
-    --dataset.num_episodes=2 \
-    --dataset.single_task="Grab the black cube"
-```
-
-#### Dataset upload
-Locally, your dataset is stored in this folder: `~/.cache/huggingface/lerobot/{repo-id}`. At the end of data recording, your dataset will be uploaded on your Hugging Face page (e.g. https://huggingface.co/datasets/cadene/so101_test) that you can obtain by running:
-```bash
-echo https://huggingface.co/datasets/${HF_USER}/so101_test
-```
-Your dataset will be automatically tagged with `LeRobot` for the community to find it easily, and you can also add custom tags (in this case `tutorial` for example).
-
-You can look for other LeRobot datasets on the hub by searching for `LeRobot` [tags](https://huggingface.co/datasets?other=LeRobot).
-
-#### Record function
-
-The `record` function provides a suite of tools for capturing and managing data during robot operation:
-
-##### 1. Data Storage
- Data is stored using the `LeRobotDataset` format and is stored on disk during recording.
- By default, the dataset is pushed to your Hugging Face page after recording.
-  - To disable uploading, use `--dataset.push_to_hub=False`.
-
-##### 2. Checkpointing and Resuming
- Checkpoints are automatically created during recording.
- If an issue occurs, you can resume by re-running the same command with `--control.resume=true`.
- To start recording from scratch, **manually delete** the dataset directory.
-
-##### 3. Recording Parameters
-Set the flow of data recording using command-line arguments:
- `--dataset.episode_time_s=60`
-  Duration of each data recording episode (default: **60 seconds**).
- `--dataset.reset_time_s=60`
-  Duration for resetting the environment after each episode (default: **60 seconds**).
- `--dataset.num_episodes=50`
-  Total number of episodes to record (default: **50**).
-
-##### 4. Keyboard Controls During Recording
-Control the data recording flow using keyboard shortcuts:
- Press **Right Arrow (`→`)**: Early stop the current episode or reset time and move to the next.
- Press **Left Arrow (`←`)**: Cancel the current episode and re-record it.
- Press **Escape (`ESC`)**: Immediately stop the session, encode videos, and upload the dataset.
-
-#### Tips for gathering data
-
-Once you're comfortable with data recording, you can create a larger dataset for training. A good starting task is grasping an object at different locations and placing it in a bin. We suggest recording at least 50 episodes, with 10 episodes per location. Keep the cameras fixed and maintain consistent grasping behavior throughout the recordings. Also make sure the object you are manipulating is visible on the camera's. A good rule of thumb is you should be able to do the task yourself by only looking at the camera images.
-
-In the following sections, you’ll train your neural network. After achieving reliable grasping performance, you can start introducing more variations during data collection, such as additional grasp locations, different grasping techniques, and altering camera positions.
-
-Avoid adding too much variation too quickly, as it may hinder your results.
-
-If you want to dive deeper into this important topic, you can check out the [blog post](https://huggingface.co/blog/lerobot-datasets#what-makes-a-good-dataset) we wrote on what makes a good dataset.
-
-
-#### Troubleshooting:
- On Linux, if the left and right arrow keys and escape key don't have any effect during data recording, make sure you've set the `$DISPLAY` environment variable. See [pynput limitations](https://pynput.readthedocs.io/en/latest/limitations.html#linux).
-
-## Visualize a dataset
-
-If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
-```bash
-echo ${HF_USER}/so101_test
-```
-
-## Replay an episode
-
-A useful feature is the `replay` function, which allows you to replay any episode that you've recorded or episodes from any dataset out there. This function helps you test the repeatability of your robot's actions and assess transferability across robots of the same model.
-
-You can replay the first episode on your robot with:
-```bash
-python -m lerobot.replay \
-    --robot.type=so101_follower \
-    --robot.port=/dev/tty.usbmodem58760431541 \
-    --robot.id=my_awesome_follower_arm \
-    --dataset.repo_id=${HF_USER}/record-test \
-    --dataset.episode=0 # choose the episode you want to replay
-```
-
-Your robot should replicate movements similar to those you recorded. For example, check out [this video](https://x.com/RemiCadene/status/1793654950905680090) where we use `replay` on a Aloha robot from [Trossen Robotics](https://www.trossenrobotics.com).
-
-## Train a policy
-
-To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
-```bash
-python lerobot/scripts/train.py \
-  --dataset.repo_id=${HF_USER}/so101_test \
-  --policy.type=act \
-  --output_dir=outputs/train/act_so101_test \
-  --job_name=act_so101_test \
-  --policy.device=cuda \
-  --wandb.enable=true
-```
-
-Let's explain the command:
-1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/so101_test`.
-2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
-4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
-5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
-
-Training should take several hours. You will find checkpoints in `outputs/train/act_so101_test/checkpoints`.
-
-To resume training from a checkpoint, below is an example command to resume from `last` checkpoint of the `act_so101_test` policy:
-```bash
-python lerobot/scripts/train.py \
-  --config_path=outputs/train/act_so101_test/checkpoints/last/pretrained_model/train_config.json \
-  --resume=true
-```
-
-#### Upload policy checkpoints
-
-Once training is done, upload the latest checkpoint with:
-```bash
-huggingface-cli upload ${HF_USER}/act_so101_test \
-  outputs/train/act_so101_test/checkpoints/last/pretrained_model
-```
-
-You can also upload intermediate checkpoints with:
-```bash
-CKPT=010000
-huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
-  outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
-```
-
-## Evaluate your policy
-
-You can use the `record` script from [`lerobot/record.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/record.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
-```bash
-python -m lerobot.record  \
-  --robot.type=so100_follower \
-  --robot.port=/dev/ttyACM1 \
-  --robot.cameras="{ up: {type: opencv, index_or_path: /dev/video10, width: 640, height: 480, fps: 30}, side: {type: intelrealsense, serial_number_or_name: 233522074606, width: 640, height: 480, fps: 30}}" \
-  --robot.id=my_awesome_follower_arm \
-  --teleop.type=so100_leader \
-  --teleop.port=/dev/ttyACM0 \
-  --teleop.id=my_awesome_leader_arm \
-  --display_data=false \
-  --dataset.repo_id=$HF_USER/eval_so100 \
-  --dataset.single_task="Put lego brick into the transparent box" \
-  --policy.path=${HF_USER}/my_policy
-```
-
-As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
-1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with  (e.g. `outputs/train/eval_act_so101_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_so101_test`).
-2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_so101_test`).
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -1,19 +0,0 @@
-<div class="flex justify-center">
-  <a target="_blank" href="https://huggingface.co/lerobot">
-      <img alt="HuggingFace Expert Acceleration Program" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/lerobot-logo-thumbnail.png" style="width: 100%"></img>
-  </a>
-</div>
-
-# LeRobot
-
-**State-of-the-art machine learning for real-world robotics**
-
-🤗 LeRobot aims to provide models, datasets, and tools for real-world robotics in PyTorch. The goal is to lower the barrier for entry to robotics so that everyone can contribute and benefit from sharing datasets and pretrained models.
-
-🤗 LeRobot contains state-of-the-art approaches that have been shown to transfer to the real-world with a focus on imitation learning and reinforcement learning.
-
-🤗 LeRobot already provides a set of pretrained models, datasets with human collected demonstrations, and simulated environments so that everyone can get started.
-
-🤗 LeRobot hosts pretrained models and datasets on the LeRobot HuggingFace page.
-
-Join the LeRobot community on [Discord](https://discord.gg/s3KuuzsPFb)
--- a/docs/source/installation.mdx
+++ b/docs/source/installation.mdx
@@ -1,70 +0,0 @@
-# Installation
-
-## Install LeRobot
-
-Currently only available from source.
-
-Download our source code:
-```bash
-git clone https://github.com/huggingface/lerobot.git
-cd lerobot
-```
-
-Create a virtual environment with Python 3.10, using [`Miniconda`](https://docs.anaconda.com/miniconda/install/#quick-command-line-install)
-```bash
-conda create -y -n lerobot python=3.10
-```
-
-Then activate your conda environment, you have to do this each time you open a shell to use lerobot:
-```bash
-conda activate lerobot
-```
-
-When using `miniconda`, install `ffmpeg` in your environment:
-```bash
-conda install ffmpeg -c conda-forge
-```
-
-> [!TIP]
-> This usually installs `ffmpeg 7.X` for your platform compiled with the `libsvtav1` encoder. If `libsvtav1` is not supported (check supported encoders with `ffmpeg -encoders`), you can:
->  - _[On any platform]_ Explicitly install `ffmpeg 7.X` using:
->  ```bash
->  conda install ffmpeg=7.1.1 -c conda-forge
->  ```
->  - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.
-
-Install 🤗 LeRobot:
-```bash
-pip install -e .
-```
-
-### Troubleshooting
-If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
-To install these for linux run:
-```bash
-sudo apt-get install cmake build-essential python-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev pkg-config
-```
-For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg)
-
-## Optional dependencies
-
-LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`.
-
-### Simulations
-Install environment packages: `aloha` ([gym-aloha](https://github.com/huggingface/gym-aloha)), `xarm` ([gym-xarm](https://github.com/huggingface/gym-xarm)), or `pusht` ([gym-pusht](https://github.com/huggingface/gym-pusht))
-Example:
-```bash
-pip install -e ".[aloha]" # or "[pusht]" for example
-```
-
-### Motor Control
-For Koch v1.1 install the Dynamixel SDK, for SO100/SO101/Moss install the Feetech SDK.
-```bash
-pip install -e ".[feetech]" # or "[dynamixel]" for example
-```
-
-### Experiment Tracking
-To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiment tracking, log in with
-```bash
-wandb login
-```
--- a/docs/source/koch.mdx
+++ b/docs/source/koch.mdx
@@ -1 +0,0 @@
-../../lerobot/common/robots/koch_follower/koch.mdx
--- a/docs/source/lekiwi.mdx
+++ b/docs/source/lekiwi.mdx
@@ -1 +0,0 @@
-../../lerobot/common/robots/lekiwi/lekiwi.mdx
--- a/docs/source/so100.mdx
+++ b/docs/source/so100.mdx
@@ -1 +0,0 @@
-../../lerobot/common/robots/so100_follower/so100.mdx
--- a/docs/source/so101.mdx
+++ b/docs/source/so101.mdx
@@ -1 +0,0 @@
-../../lerobot/common/robots/so101_follower/so101.mdx
--- a/examples/10_use_so100.md
+++ b/examples/10_use_so100.md
@@ -0,0 +1,280 @@
+This tutorial explains how to use [SO-100](https://github.com/TheRobotStudio/SO-ARM100) with LeRobot.
+
+## Source the parts
+
+Follow this [README](https://github.com/TheRobotStudio/SO-ARM100). It contains the bill of materials, with link to source the parts, as well as the instructions to 3D print the parts, and advices if it's your first time printing or if you don't own a 3D printer already.
+
+**Important**: Before assembling, you will first need to configure your motors. To this end, we provide a nice script, so let's first install LeRobot. After configuration, we will also guide you through assembly.
+
+## Install LeRobot
+
+On your computer:
+
+1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install):
+```bash
+mkdir -p ~/miniconda3
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
+bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+rm ~/miniconda3/miniconda.sh
+~/miniconda3/bin/conda init bash
+```
+
+2. Restart shell or `source ~/.bashrc`
+
+3. Create and activate a fresh conda environment for lerobot
+```bash
+conda create -y -n lerobot python=3.10 && conda activate lerobot
+```
+
+4. Clone LeRobot:
+```bash
+git clone https://github.com/huggingface/lerobot.git ~/lerobot
+```
+
+5. Install LeRobot with dependencies for the feetech motors:
+```bash
+cd ~/lerobot && pip install -e ".[feetech]"
+```
+
+For Linux only (not Mac), install extra dependencies for recording datasets:
+```bash
+conda install -y -c conda-forge ffmpeg
+pip uninstall -y opencv-python
+conda install -y -c conda-forge "opencv>=4.10.0"
+```
+
+## Configure the motors
+
+Follow steps 1 of the [assembly video](https://www.youtube.com/watch?v=FioA2oeFZ5I) which illustrates the use of our scripts below.
+
+**Find USB ports associated to your arms**
+To find the correct ports for each arm, run the utility script twice:
+```bash
+python lerobot/scripts/find_motors_bus_port.py
+```
+
+Example output when identifying the leader arm's port (e.g., `/dev/tty.usbmodem575E0031751` on Mac, or possibly `/dev/ttyACM0` on Linux):
+```
+Finding all available ports for the MotorBus.
+['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
+Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
+
+[...Disconnect leader arm and press Enter...]
+
+The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0031751
+Reconnect the usb cable.
+```
+
+Example output when identifying the follower arm's port (e.g., `/dev/tty.usbmodem575E0032081`, or possibly `/dev/ttyACM1` on Linux):
+```
+Finding all available ports for the MotorBus.
+['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
+Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
+
+[...Disconnect follower arm and press Enter...]
+
+The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0032081
+Reconnect the usb cable.
+```
+
+Troubleshooting: On Linux, you might need to give access to the USB ports by running:
+```bash
+sudo chmod 666 /dev/ttyACM0
+sudo chmod 666 /dev/ttyACM1
+```
+
+**Configure your motors**
+Plug your first motor and run this script to set its ID to 1. It will also set its present position to 2048, so expect your motor to rotate:
+```bash
+python lerobot/scripts/configure_motor.py \
+  --port /dev/tty.usbmodem58760432961 \
+  --brand feetech \
+  --model sts3215 \
+  --baudrate 1000000 \
+  --ID 1
+```
+
+Note: These motors are currently limitated. They can take values between 0 and 4096 only, which corresponds to a full turn. They can't turn more than that. 2048 is at the middle of this range, so we can take -2048 steps (180 degrees anticlockwise) and reach the maximum range, or take +2048 steps (180 degrees clockwise) and reach the maximum range. The configuration step also sets the homing offset to 0, so that if you misassembled the arm, you can always update the homing offset to account for a shift up to ± 2048 steps (± 180 degrees).
+
+Then unplug your motor and plug the second motor and set its ID to 2.
+```bash
+python lerobot/scripts/configure_motor.py \
+  --port /dev/tty.usbmodem58760432961 \
+  --brand feetech \
+  --model sts3215 \
+  --baudrate 1000000 \
+  --ID 2
+```
+
+Redo the process for all your motors until ID 6. Do the same for the 6 motors of the leader arm.
+
+**Remove the gears of the 6 leader motors**
+Follow step 2 of the [assembly video](https://www.youtube.com/watch?v=FioA2oeFZ5I). You need to remove the gear for the motors of the leader arm. As a result, you will only use the position encoding of the motor and reduce friction to more easily operate the leader arm.
+
+**Add motor horn to the motors**
+Follow step 3 of the [assembly video](https://www.youtube.com/watch?v=FioA2oeFZ5I). For SO-100, you need to align the holes on the motor horn to the motor spline to be approximately 1:30, 4:30, 7:30 and 10:30.
+Try to avoid rotating the motor while doing so to keep position 2048 set during configuration. It is especially tricky for the leader motors as it is more sensible without the gears, but it's ok if it's a bit rotated.
+
+## Assemble the arms
+
+Follow step 4 of the [assembly video](https://www.youtube.com/watch?v=FioA2oeFZ5I). The first arm should take a bit more than 1 hour to assemble, but once you get use to it, you can do it under 1 hour for the second arm.
+
+## Calibrate
+
+Next, you'll need to calibrate your SO-100 robot to ensure that the leader and follower arms have the same position values when they are in the same physical position. This calibration is essential because it allows a neural network trained on one SO-100 robot to work on another.
+
+**Manual calibration of follower arm**
+/!\ Contrarily to step 6 of the [assembly video](https://www.youtube.com/watch?v=FioA2oeFZ5I) which illustrates the auto calibration, we will actually do manual calibration of follower for now.
+
+You will need to move the follower arm to these positions sequentially:
+
+| 1. Zero position | 2. Rotated position | 3. Rest position |
+|---|---|---|
+| <img src="../media/so100/follower_zero.webp?raw=true" alt="SO-100 follower arm zero position" title="SO-100 follower arm zero position" style="width:100%;"> | <img src="../media/so100/follower_rotated.webp?raw=true" alt="SO-100 follower arm rotated position" title="SO-100 follower arm rotated position" style="width:100%;"> | <img src="../media/so100/follower_rest.webp?raw=true" alt="SO-100 follower arm rest position" title="SO-100 follower arm rest position" style="width:100%;"> |
+
+Make sure both arms are connected and run this script to launch manual calibration:
+```bash
+python lerobot/scripts/control_robot.py calibrate \
+    --robot-path lerobot/configs/robot/so100.yaml \
+    --robot-overrides '~cameras' --arms main_follower
+```
+
+**Manual calibration of leader arm**
+Follow step 6 of the [assembly video](https://www.youtube.com/watch?v=FioA2oeFZ5I) which illustrates the manual calibration. You will need to move the leader arm to these positions sequentially:
+
+| 1. Zero position | 2. Rotated position | 3. Rest position |
+|---|---|---|
+| <img src="../media/so100/leader_zero.webp?raw=true" alt="SO-100 leader arm zero position" title="SO-100 leader arm zero position" style="width:100%;"> | <img src="../media/so100/leader_rotated.webp?raw=true" alt="SO-100 leader arm rotated position" title="SO-100 leader arm rotated position" style="width:100%;"> | <img src="../media/so100/leader_rest.webp?raw=true" alt="SO-100 leader arm rest position" title="SO-100 leader arm rest position" style="width:100%;"> |
+
+Run this script to launch manual calibration:
+```bash
+python lerobot/scripts/control_robot.py calibrate \
+    --robot-path lerobot/configs/robot/so100.yaml \
+    --robot-overrides '~cameras' --arms main_leader
+```
+
+## Teleoperate
+
+**Simple teleop**
+Then you are ready to teleoperate your robot! Run this simple script (it won't connect and display the cameras):
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/so100.yaml \
+    --robot-overrides '~cameras' \
+    --display-cameras 0
+```
+
+
+**Teleop with displaying cameras**
+Follow [this guide to setup your cameras](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#c-add-your-cameras-with-opencvcamera). Then you will be able to display the cameras on your computer while you are teleoperating by running the following code. This is useful to prepare your setup before recording your first dataset.
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/so100.yaml
+```
+
+## Record a dataset
+
+Once you're familiar with teleoperation, you can record your first dataset with SO-100.
+
+If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
+```bash
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+```
+
+Store your Hugging Face repository name in a variable to run these commands:
+```bash
+HF_USER=$(huggingface-cli whoami | head -n 1)
+echo $HF_USER
+```
+
+Record 2 episodes and upload your dataset to the hub:
+```bash
+python lerobot/scripts/control_robot.py record \
+    --robot-path lerobot/configs/robot/so100.yaml \
+    --fps 30 \
+    --root data \
+    --repo-id ${HF_USER}/so100_test \
+    --tags so100 tutorial \
+    --warmup-time-s 5 \
+    --episode-time-s 40 \
+    --reset-time-s 10 \
+    --num-episodes 2 \
+    --push-to-hub 1
+```
+
+## Visualize a dataset
+
+If you uploaded your dataset to the hub with `--push-to-hub 1`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
+```bash
+echo ${HF_USER}/so100_test
+```
+
+If you didn't upload with `--push-to-hub 0`, you can also visualize it locally with:
+```bash
+python lerobot/scripts/visualize_dataset_html.py \
+  --root data \
+  --repo-id ${HF_USER}/so100_test
+```
+
+## Replay an episode
+
+Now try to replay the first episode on your robot:
+```bash
+DATA_DIR=data python lerobot/scripts/control_robot.py replay \
+    --robot-path lerobot/configs/robot/so100.yaml \
+    --fps 30 \
+    --root data \
+    --repo-id ${HF_USER}/so100_test \
+    --episode 0
+```
+
+## Train a policy
+
+To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
+```bash
+DATA_DIR=data python lerobot/scripts/train.py \
+  dataset_repo_id=${HF_USER}/so100_test \
+  policy=act_so100_real \
+  env=so100_real \
+  hydra.run.dir=outputs/train/act_so100_test \
+  hydra.job.name=act_so100_test \
+  device=cuda \
+  wandb.enable=true
+```
+
+Let's explain it:
+1. We provided the dataset as argument with `dataset_repo_id=${HF_USER}/so100_test`.
+2. We provided the policy with `policy=act_so100_real`. This loads configurations from [`lerobot/configs/policy/act_so100_real.yaml`](../lerobot/configs/policy/act_so100_real.yaml). Importantly, this policy uses 2 cameras as input `laptop`, `phone`.
+3. We provided an environment as argument with `env=so100_real`. This loads configurations from [`lerobot/configs/env/so100_real.yaml`](../lerobot/configs/env/so100_real.yaml).
+4. We provided `device=cuda` since we are training on a Nvidia GPU, but you can also use `device=mps` if you are using a Mac with Apple silicon, or `device=cpu` otherwise.
+5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
+6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync.
+
+Training should take several hours. You will find checkpoints in `outputs/train/act_so100_test/checkpoints`.
+
+## Evaluate your policy
+
+You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
+```bash
+python lerobot/scripts/control_robot.py record \
+  --robot-path lerobot/configs/robot/so100.yaml \
+  --fps 30 \
+  --root data \
+  --repo-id ${HF_USER}/eval_act_so100_test \
+  --tags so100 tutorial eval \
+  --warmup-time-s 5 \
+  --episode-time-s 40 \
+  --reset-time-s 10 \
+  --num-episodes 10 \
+  -p outputs/train/act_so100_test/checkpoints/last/pretrained_model
+```
+
+As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
+1. There is an additional `-p` argument which indicates the path to your policy checkpoint with  (e.g. `-p outputs/train/eval_so100_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `-p ${HF_USER}/act_so100_test`).
+2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `--repo-id ${HF_USER}/eval_act_so100_test`).
+
+## More
+
+Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth tutorial on controlling real robots with LeRobot.
+
+If you have any question or need help, please reach out on Discord in the channel [`#so100-arm`](https://discord.com/channels/1216765309076115607/1237741463832363039).
--- a/examples/11_use_moss.md
+++ b/examples/11_use_moss.md
@@ -0,0 +1,280 @@
+This tutorial explains how to use [Moss v1](https://github.com/jess-moss/moss-robot-arms) with LeRobot.
+
+## Source the parts
+
+Follow this [README](https://github.com/jess-moss/moss-robot-arms). It contains the bill of materials, with link to source the parts, as well as the instructions to 3D print the parts, and advices if it's your first time printing or if you don't own a 3D printer already.
+
+**Important**: Before assembling, you will first need to configure your motors. To this end, we provide a nice script, so let's first install LeRobot. After configuration, we will also guide you through assembly.
+
+## Install LeRobot
+
+On your computer:
+
+1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install):
+```bash
+mkdir -p ~/miniconda3
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
+bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+rm ~/miniconda3/miniconda.sh
+~/miniconda3/bin/conda init bash
+```
+
+2. Restart shell or `source ~/.bashrc`
+
+3. Create and activate a fresh conda environment for lerobot
+```bash
+conda create -y -n lerobot python=3.10 && conda activate lerobot
+```
+
+4. Clone LeRobot:
+```bash
+git clone https://github.com/huggingface/lerobot.git ~/lerobot
+```
+
+5. Install LeRobot with dependencies for the feetech motors:
+```bash
+cd ~/lerobot && pip install -e ".[feetech]"
+```
+
+For Linux only (not Mac), install extra dependencies for recording datasets:
+```bash
+conda install -y -c conda-forge ffmpeg
+pip uninstall -y opencv-python
+conda install -y -c conda-forge "opencv>=4.10.0"
+```
+
+## Configure the motors
+
+Follow steps 1 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic) which illustrates the use of our scripts below.
+
+**Find USB ports associated to your arms**
+To find the correct ports for each arm, run the utility script twice:
+```bash
+python lerobot/scripts/find_motors_bus_port.py
+```
+
+Example output when identifying the leader arm's port (e.g., `/dev/tty.usbmodem575E0031751` on Mac, or possibly `/dev/ttyACM0` on Linux):
+```
+Finding all available ports for the MotorBus.
+['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
+Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
+
+[...Disconnect leader arm and press Enter...]
+
+The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0031751
+Reconnect the usb cable.
+```
+
+Example output when identifying the follower arm's port (e.g., `/dev/tty.usbmodem575E0032081`, or possibly `/dev/ttyACM1` on Linux):
+```
+Finding all available ports for the MotorBus.
+['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
+Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
+
+[...Disconnect follower arm and press Enter...]
+
+The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0032081
+Reconnect the usb cable.
+```
+
+Troubleshooting: On Linux, you might need to give access to the USB ports by running:
+```bash
+sudo chmod 666 /dev/ttyACM0
+sudo chmod 666 /dev/ttyACM1
+```
+
+**Configure your motors**
+Plug your first motor and run this script to set its ID to 1. It will also set its present position to 2048, so expect your motor to rotate:
+```bash
+python lerobot/scripts/configure_motor.py \
+  --port /dev/tty.usbmodem58760432961 \
+  --brand feetech \
+  --model sts3215 \
+  --baudrate 1000000 \
+  --ID 1
+```
+
+Note: These motors are currently limitated. They can take values between 0 and 4096 only, which corresponds to a full turn. They can't turn more than that. 2048 is at the middle of this range, so we can take -2048 steps (180 degrees anticlockwise) and reach the maximum range, or take +2048 steps (180 degrees clockwise) and reach the maximum range. The configuration step also sets the homing offset to 0, so that if you misassembled the arm, you can always update the homing offset to account for a shift up to ± 2048 steps (± 180 degrees).
+
+Then unplug your motor and plug the second motor and set its ID to 2.
+```bash
+python lerobot/scripts/configure_motor.py \
+  --port /dev/tty.usbmodem58760432961 \
+  --brand feetech \
+  --model sts3215 \
+  --baudrate 1000000 \
+  --ID 2
+```
+
+Redo the process for all your motors until ID 6. Do the same for the 6 motors of the leader arm.
+
+**Remove the gears of the 6 leader motors**
+Follow step 2 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic). You need to remove the gear for the motors of the leader arm. As a result, you will only use the position encoding of the motor and reduce friction to more easily operate the leader arm.
+
+**Add motor horn to the motors**
+Follow step 3 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic). For Moss v1, you need to align the holes on the motor horn to the motor spline to be approximately 3, 6, 9 and 12 o'clock.
+Try to avoid rotating the motor while doing so to keep position 2048 set during configuration. It is especially tricky for the leader motors as it is more sensible without the gears, but it's ok if it's a bit rotated.
+
+## Assemble the arms
+
+Follow step 4 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic). The first arm should take a bit more than 1 hour to assemble, but once you get use to it, you can do it under 1 hour for the second arm.
+
+## Calibrate
+
+Next, you'll need to calibrate your Moss v1 robot to ensure that the leader and follower arms have the same position values when they are in the same physical position. This calibration is essential because it allows a neural network trained on one Moss v1 robot to work on another.
+
+**Manual calibration of follower arm**
+/!\ Contrarily to step 6 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic) which illustrates the auto calibration, we will actually do manual calibration of follower for now.
+
+You will need to move the follower arm to these positions sequentially:
+
+| 1. Zero position | 2. Rotated position | 3. Rest position |
+|---|---|---|
+| <img src="../media/moss/follower_zero.webp?raw=true" alt="Moss v1 follower arm zero position" title="Moss v1 follower arm zero position" style="width:100%;"> | <img src="../media/moss/follower_rotated.webp?raw=true" alt="Moss v1 follower arm rotated position" title="Moss v1 follower arm rotated position" style="width:100%;"> | <img src="../media/moss/follower_rest.webp?raw=true" alt="Moss v1 follower arm rest position" title="Moss v1 follower arm rest position" style="width:100%;"> |
+
+Make sure both arms are connected and run this script to launch manual calibration:
+```bash
+python lerobot/scripts/control_robot.py calibrate \
+    --robot-path lerobot/configs/robot/moss.yaml \
+    --robot-overrides '~cameras' --arms main_follower
+```
+
+**Manual calibration of leader arm**
+Follow step 6 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic) which illustrates the manual calibration. You will need to move the leader arm to these positions sequentially:
+
+| 1. Zero position | 2. Rotated position | 3. Rest position |
+|---|---|---|
+| <img src="../media/moss/leader_zero.webp?raw=true" alt="Moss v1 leader arm zero position" title="Moss v1 leader arm zero position" style="width:100%;"> | <img src="../media/moss/leader_rotated.webp?raw=true" alt="Moss v1 leader arm rotated position" title="Moss v1 leader arm rotated position" style="width:100%;"> | <img src="../media/moss/leader_rest.webp?raw=true" alt="Moss v1 leader arm rest position" title="Moss v1 leader arm rest position" style="width:100%;"> |
+
+Run this script to launch manual calibration:
+```bash
+python lerobot/scripts/control_robot.py calibrate \
+    --robot-path lerobot/configs/robot/moss.yaml \
+    --robot-overrides '~cameras' --arms main_leader
+```
+
+## Teleoperate
+
+**Simple teleop**
+Then you are ready to teleoperate your robot! Run this simple script (it won't connect and display the cameras):
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/moss.yaml \
+    --robot-overrides '~cameras' \
+    --display-cameras 0
+```
+
+
+**Teleop with displaying cameras**
+Follow [this guide to setup your cameras](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#c-add-your-cameras-with-opencvcamera). Then you will be able to display the cameras on your computer while you are teleoperating by running the following code. This is useful to prepare your setup before recording your first dataset.
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/moss.yaml
+```
+
+## Record a dataset
+
+Once you're familiar with teleoperation, you can record your first dataset with Moss v1.
+
+If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
+```bash
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+```
+
+Store your Hugging Face repository name in a variable to run these commands:
+```bash
+HF_USER=$(huggingface-cli whoami | head -n 1)
+echo $HF_USER
+```
+
+Record 2 episodes and upload your dataset to the hub:
+```bash
+python lerobot/scripts/control_robot.py record \
+    --robot-path lerobot/configs/robot/moss.yaml \
+    --fps 30 \
+    --root data \
+    --repo-id ${HF_USER}/moss_test \
+    --tags moss tutorial \
+    --warmup-time-s 5 \
+    --episode-time-s 40 \
+    --reset-time-s 10 \
+    --num-episodes 2 \
+    --push-to-hub 1
+```
+
+## Visualize a dataset
+
+If you uploaded your dataset to the hub with `--push-to-hub 1`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
+```bash
+echo ${HF_USER}/moss_test
+```
+
+If you didn't upload with `--push-to-hub 0`, you can also visualize it locally with:
+```bash
+python lerobot/scripts/visualize_dataset_html.py \
+  --root data \
+  --repo-id ${HF_USER}/moss_test
+```
+
+## Replay an episode
+
+Now try to replay the first episode on your robot:
+```bash
+DATA_DIR=data python lerobot/scripts/control_robot.py replay \
+    --robot-path lerobot/configs/robot/moss.yaml \
+    --fps 30 \
+    --root data \
+    --repo-id ${HF_USER}/moss_test \
+    --episode 0
+```
+
+## Train a policy
+
+To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
+```bash
+DATA_DIR=data python lerobot/scripts/train.py \
+  dataset_repo_id=${HF_USER}/moss_test \
+  policy=act_moss_real \
+  env=moss_real \
+  hydra.run.dir=outputs/train/act_moss_test \
+  hydra.job.name=act_moss_test \
+  device=cuda \
+  wandb.enable=true
+```
+
+Let's explain it:
+1. We provided the dataset as argument with `dataset_repo_id=${HF_USER}/moss_test`.
+2. We provided the policy with `policy=act_moss_real`. This loads configurations from [`lerobot/configs/policy/act_moss_real.yaml`](../lerobot/configs/policy/act_moss_real.yaml). Importantly, this policy uses 2 cameras as input `laptop`, `phone`.
+3. We provided an environment as argument with `env=moss_real`. This loads configurations from [`lerobot/configs/env/moss_real.yaml`](../lerobot/configs/env/moss_real.yaml).
+4. We provided `device=cuda` since we are training on a Nvidia GPU, but you can also use `device=mps` if you are using a Mac with Apple silicon, or `device=cpu` otherwise.
+5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
+6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync.
+
+Training should take several hours. You will find checkpoints in `outputs/train/act_moss_test/checkpoints`.
+
+## Evaluate your policy
+
+You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
+```bash
+python lerobot/scripts/control_robot.py record \
+  --robot-path lerobot/configs/robot/moss.yaml \
+  --fps 30 \
+  --root data \
+  --repo-id ${HF_USER}/eval_act_moss_test \
+  --tags moss tutorial eval \
+  --warmup-time-s 5 \
+  --episode-time-s 40 \
+  --reset-time-s 10 \
+  --num-episodes 10 \
+  -p outputs/train/act_moss_test/checkpoints/last/pretrained_model
+```
+
+As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
+1. There is an additional `-p` argument which indicates the path to your policy checkpoint with  (e.g. `-p outputs/train/eval_moss_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `-p ${HF_USER}/act_moss_test`).
+2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `--repo-id ${HF_USER}/eval_act_moss_test`).
+
+## More
+
+Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth tutorial on controlling real robots with LeRobot.
+
+If you have any question or need help, please reach out on Discord in the channel [`#moss-arm`](https://discord.com/channels/1216765309076115607/1275374638985252925).
--- a/examples/1_load_lerobot_dataset.py
+++ b/examples/1_load_lerobot_dataset.py
@@ -1,136 +1,80 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """
 This script demonstrates the use of `LeRobotDataset` class for handling and processing robotic datasets from Hugging Face.
 It illustrates how to load datasets, manipulate them, and apply transformations suitable for machine learning tasks in PyTorch.

 Features included in this script:
- Viewing a dataset's metadata and exploring its properties.
- Loading an existing dataset from the hub or a subset of it.
- Accessing frames by episode number.
+- Loading a dataset and accessing its properties.
+- Filtering data by episode number.
+- Converting tensor data for visualization.
+- Saving video files from dataset frames.
 - Using advanced dataset features like timestamp-based frame selection.
 - Demonstrating compatibility with PyTorch DataLoader for batch processing.

 The script ends with examples of how to batch process data using PyTorch's DataLoader.
 """

+from pathlib import Path
 from pprint import pprint

+import imageio
 import torch
-from huggingface_hub import HfApi

 import lerobot
-from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset

-# We ported a number of existing datasets ourselves, use this to see the list:
 print("List of available datasets:")
 pprint(lerobot.available_datasets)

-# You can also browse through the datasets created/ported by the community on the hub using the hub api:
-hub_api = HfApi()
-repo_ids = [info.id for info in hub_api.list_datasets(task_categories="robotics", tags=["LeRobot"])]
-pprint(repo_ids)
+# Let's take one for this example
+repo_id = "lerobot/pusht"

-# Or simply explore them in your web browser directly at:
-# https://huggingface.co/datasets?other=LeRobot
-
-# Let's take this one for this example
-repo_id = "lerobot/aloha_mobile_cabinet"
-# We can have a look and fetch its metadata to know more about it:
-ds_meta = LeRobotDatasetMetadata(repo_id)
-
-# By instantiating just this class, you can quickly access useful information about the content and the
-# structure of the dataset without downloading the actual data yet (only metadata files — which are
-# lightweight).
-print(f"Total number of episodes: {ds_meta.total_episodes}")
-print(f"Average number of frames per episode: {ds_meta.total_frames / ds_meta.total_episodes:.3f}")
-print(f"Frames per second used during data collection: {ds_meta.fps}")
-print(f"Robot type: {ds_meta.robot_type}")
-print(f"keys to access images from cameras: {ds_meta.camera_keys=}\n")
-
-print("Tasks:")
-print(ds_meta.tasks)
-print("Features:")
-pprint(ds_meta.features)
-
-# You can also get a short summary by simply printing the object:
-print(ds_meta)
-
-# You can then load the actual dataset from the hub.
-# Either load any subset of episodes:
-dataset = LeRobotDataset(repo_id, episodes=[0, 10, 11, 23])
-
-# And see how many frames you have:
-print(f"Selected episodes: {dataset.episodes}")
-print(f"Number of episodes selected: {dataset.num_episodes}")
-print(f"Number of frames selected: {dataset.num_frames}")
-
-# Or simply load the entire dataset:
+# You can easily load a dataset from a Hugging Face repository
 dataset = LeRobotDataset(repo_id)
-print(f"Number of episodes selected: {dataset.num_episodes}")
-print(f"Number of frames selected: {dataset.num_frames}")

-# The previous metadata class is contained in the 'meta' attribute of the dataset:
-print(dataset.meta)
-
-# LeRobotDataset actually wraps an underlying Hugging Face dataset
-# (see https://huggingface.co/docs/datasets for more information).
+# LeRobotDataset is actually a thin wrapper around an underlying Hugging Face dataset
+# (see https://huggingface.co/docs/datasets/index for more information).
+print(dataset)
 print(dataset.hf_dataset)

-# LeRobot datasets also subclasses PyTorch datasets so you can do everything you know and love from working
-# with the latter, like iterating through the dataset.
-# The __getitem__ iterates over the frames of the dataset. Since our datasets are also structured by
-# episodes, you can access the frame indices of any episode using the episode_data_index. Here, we access
-# frame indices associated to the first episode:
+# And provides additional utilities for robotics and compatibility with Pytorch
+print(f"\naverage number of frames per episode: {dataset.num_samples / dataset.num_episodes:.3f}")
+print(f"frames per second used during data collection: {dataset.fps=}")
+print(f"keys to access images from cameras: {dataset.camera_keys=}\n")
+
+# Access frame indexes associated to first episode
 episode_index = 0
 from_idx = dataset.episode_data_index["from"][episode_index].item()
 to_idx = dataset.episode_data_index["to"][episode_index].item()

-# Then we grab all the image frames from the first camera:
-camera_key = dataset.meta.camera_keys[0]
-frames = [dataset[idx][camera_key] for idx in range(from_idx, to_idx)]
+# LeRobot datasets actually subclass PyTorch datasets so you can do everything you know and love from working
+# with the latter, like iterating through the dataset. Here we grab all the image frames.
+frames = [dataset[idx]["observation.image"] for idx in range(from_idx, to_idx)]

-# The objects returned by the dataset are all torch.Tensors
-print(type(frames[0]))
-print(frames[0].shape)
+# Video frames are now float32 in range [0,1] channel first (c,h,w) to follow pytorch convention. To visualize
+# them, we convert to uint8 in range [0,255]
+frames = [(frame * 255).type(torch.uint8) for frame in frames]
+# and to channel last (h,w,c).
+frames = [frame.permute((1, 2, 0)).numpy() for frame in frames]

-# Since we're using pytorch, the shape is in pytorch, channel-first convention (c, h, w).
-# We can compare this shape with the information available for that feature
-pprint(dataset.features[camera_key])
-# In particular:
-print(dataset.features[camera_key]["shape"])
-# The shape is in (h, w, c) which is a more universal format.
+# Finally, we save the frames to a mp4 video for visualization.
+Path("outputs/examples/1_load_lerobot_dataset").mkdir(parents=True, exist_ok=True)
+imageio.mimsave("outputs/examples/1_load_lerobot_dataset/episode_0.mp4", frames, fps=dataset.fps)

 # For many machine learning applications we need to load the history of past observations or trajectories of
 # future actions. Our datasets can load previous and future frames for each key/modality, using timestamps
 # differences with the current loaded frame. For instance:
 delta_timestamps = {
    # loads 4 images: 1 second before current frame, 500 ms before, 200 ms before, and current frame
-    camera_key: [-1, -0.5, -0.20, 0],
-    # loads 6 state vectors: 1.5 seconds before, 1 second before, ... 200 ms, 100 ms, and current frame
-    "observation.state": [-1.5, -1, -0.5, -0.20, -0.10, 0],
+    "observation.image": [-1, -0.5, -0.20, 0],
+    # loads 8 state vectors: 1.5 seconds before, 1 second before, ... 20 ms, 10 ms, and current frame
+    "observation.state": [-1.5, -1, -0.5, -0.20, -0.10, -0.02, -0.01, 0],
    # loads 64 action vectors: current frame, 1 frame in the future, 2 frames, ... 63 frames in the future
    "action": [t / dataset.fps for t in range(64)],
 }
-# Note that in any case, these delta_timestamps values need to be multiples of (1/fps) so that added to any
-# timestamp, you still get a valid timestamp.
-
 dataset = LeRobotDataset(repo_id, delta_timestamps=delta_timestamps)
-print(f"\n{dataset[0][camera_key].shape=}")  # (4, c, h, w)
-print(f"{dataset[0]['observation.state'].shape=}")  # (6, c)
-print(f"{dataset[0]['action'].shape=}\n")  # (64, c)
+print(f"\n{dataset[0]['observation.image'].shape=}")  # (4,c,h,w)
+print(f"{dataset[0]['observation.state'].shape=}")  # (8,c)
+print(f"{dataset[0]['action'].shape=}\n")  # (64,c)

 # Finally, our datasets are fully compatible with PyTorch dataloaders and samplers because they are just
 # PyTorch datasets.
@@ -140,9 +84,8 @@ dataloader = torch.utils.data.DataLoader(
    batch_size=32,
    shuffle=True,
 )
-
 for batch in dataloader:
-    print(f"{batch[camera_key].shape=}")  # (32, 4, c, h, w)
-    print(f"{batch['observation.state'].shape=}")  # (32, 6, c)
-    print(f"{batch['action'].shape=}")  # (32, 64, c)
+    print(f"{batch['observation.image'].shape=}")  # (32,4,c,h,w)
+    print(f"{batch['observation.state'].shape=}")  # (32,8,c)
+    print(f"{batch['action'].shape=}")  # (32,64,c)
    break
--- a/examples/2_evaluate_pretrained_policy.py
+++ b/examples/2_evaluate_pretrained_policy.py
@@ -1,25 +1,6 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """
-This script demonstrates how to evaluate a pretrained policy from the HuggingFace Hub or from your local
+This scripts demonstrates how to evaluate a pretrained policy from the HuggingFace Hub or from your local
 training outputs directory. In the latter case, you might want to run examples/3_train_policy.py first.
-
-It requires the installation of the 'gym_pusht' simulation environment. Install it by running:
-```bash
-pip install -e ".[pusht]"
-```
 """

 from pathlib import Path
@@ -29,6 +10,7 @@ import gymnasium as gym
 import imageio
 import numpy
 import torch
+from huggingface_hub import snapshot_download

 from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy

@@ -36,15 +18,25 @@ from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
 output_directory = Path("outputs/eval/example_pusht_diffusion")
 output_directory.mkdir(parents=True, exist_ok=True)

-# Select your device
-device = "cuda"
-
-# Provide the [hugging face repo id](https://huggingface.co/lerobot/diffusion_pusht):
-pretrained_policy_path = "lerobot/diffusion_pusht"
-# OR a path to a local outputs/train folder.
+# Download the diffusion policy for pusht environment
+pretrained_policy_path = Path(snapshot_download("lerobot/diffusion_pusht"))
+# OR uncomment the following to evaluate a policy from the local outputs/train folder.
 # pretrained_policy_path = Path("outputs/train/example_pusht_diffusion")

 policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)
+policy.eval()
+
+# Check if GPU is available
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+    print("GPU is available. Device set to:", device)
+else:
+    device = torch.device("cpu")
+    print(f"GPU is not available. Device set to: {device}. Inference will be slower than on GPU.")
+    # Decrease the number of reverse-diffusion steps (trades off a bit of quality for 10x speed)
+    policy.diffusion.num_inference_steps = 10
+
+policy.to(device)

 # Initialize evaluation environment to render two observation types:
 # an image of the scene and state/position of the agent. The environment
@@ -55,17 +47,7 @@ env = gym.make(
    max_episode_steps=300,
 )

-# We can verify that the shapes of the features expected by the policy match the ones from the observations
-# produced by the environment
-print(policy.config.input_features)
-print(env.observation_space)
-
-# Similarly, we can check that the actions produced by the policy will match the actions expected by the
-# environment
-print(policy.config.output_features)
-print(env.action_space)
-
-# Reset the policy and environments to prepare for rollout
+# Reset the policy and environmens to prepare for rollout
 policy.reset()
 numpy_observation, info = env.reset(seed=42)

@@ -119,7 +101,7 @@ while not done:
    rewards.append(reward)
    frames.append(env.render())

-    # The rollout is considered done when the success state is reached (i.e. terminated is True),
+    # The rollout is considered done when the success state is reach (i.e. terminated is True),
    # or the maximum number of iterations is reached (i.e. truncated is True)
    done = terminated | truncated | done
    step += 1
--- a/examples/3_train_policy.py
+++ b/examples/3_train_policy.py
@@ -1,18 +1,4 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""This script demonstrates how to train Diffusion Policy on the PushT environment.
+"""This scripts demonstrates how to train Diffusion Policy on the PushT environment.

 Once you have trained a model with this script, you can try to evaluate it on
 examples/2_evaluate_pretrained_policy.py
@@ -22,99 +8,72 @@ from pathlib import Path

 import torch

-from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
-from lerobot.common.datasets.utils import dataset_to_policy_features
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
 from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
-from lerobot.configs.types import FeatureType

+# Create a directory to store the training checkpoint.
+output_directory = Path("outputs/train/example_pusht_diffusion")
+output_directory.mkdir(parents=True, exist_ok=True)

-def main():
-    # Create a directory to store the training checkpoint.
-    output_directory = Path("outputs/train/example_pusht_diffusion")
-    output_directory.mkdir(parents=True, exist_ok=True)
+# Number of offline training steps (we'll only do offline training for this example.)
+# Adjust as you prefer. 5000 steps are needed to get something worth evaluating.
+training_steps = 5000
+device = torch.device("cuda")
+log_freq = 250

-    # # Select your device
-    device = torch.device("cuda")
+# Set up the dataset.
+delta_timestamps = {
+    # Load the previous image and state at -0.1 seconds before current frame,
+    # then load current image and state corresponding to 0.0 second.
+    "observation.image": [-0.1, 0.0],
+    "observation.state": [-0.1, 0.0],
+    # Load the previous action (-0.1), the next action to be executed (0.0),
+    # and 14 future actions with a 0.1 seconds spacing. All these actions will be
+    # used to supervise the policy.
+    "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4],
+}
+dataset = LeRobotDataset("lerobot/pusht", delta_timestamps=delta_timestamps)

-    # Number of offline training steps (we'll only do offline training for this example.)
-    # Adjust as you prefer. 5000 steps are needed to get something worth evaluating.
-    training_steps = 5000
-    log_freq = 1
+# Set up the the policy.
+# Policies are initialized with a configuration class, in this case `DiffusionConfig`.
+# For this example, no arguments need to be passed because the defaults are set up for PushT.
+# If you're doing something different, you will likely need to change at least some of the defaults.
+cfg = DiffusionConfig()
+policy = DiffusionPolicy(cfg, dataset_stats=dataset.stats)
+policy.train()
+policy.to(device)

-    # When starting from scratch (i.e. not from a pretrained policy), we need to specify 2 things before
-    # creating the policy:
-    #   - input/output shapes: to properly size the policy
-    #   - dataset stats: for normalization and denormalization of input/outputs
-    dataset_metadata = LeRobotDatasetMetadata("lerobot/pusht")
-    features = dataset_to_policy_features(dataset_metadata.features)
-    output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
-    input_features = {key: ft for key, ft in features.items() if key not in output_features}
+optimizer = torch.optim.Adam(policy.parameters(), lr=1e-4)

-    # Policies are initialized with a configuration class, in this case `DiffusionConfig`. For this example,
-    # we'll just use the defaults and so no arguments other than input/output features need to be passed.
-    cfg = DiffusionConfig(input_features=input_features, output_features=output_features)
+# Create dataloader for offline training.
+dataloader = torch.utils.data.DataLoader(
+    dataset,
+    num_workers=4,
+    batch_size=64,
+    shuffle=True,
+    pin_memory=device != torch.device("cpu"),
+    drop_last=True,
+)

-    # We can now instantiate our policy with this config and the dataset stats.
-    policy = DiffusionPolicy(cfg, dataset_stats=dataset_metadata.stats)
-    policy.train()
-    policy.to(device)
+# Run training loop.
+step = 0
+done = False
+while not done:
+    for batch in dataloader:
+        batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}
+        output_dict = policy.forward(batch)
+        loss = output_dict["loss"]
+        loss.backward()
+        optimizer.step()
+        optimizer.zero_grad()

-    # Another policy-dataset interaction is with the delta_timestamps. Each policy expects a given number frames
-    # which can differ for inputs, outputs and rewards (if there are some).
-    delta_timestamps = {
-        "observation.image": [i / dataset_metadata.fps for i in cfg.observation_delta_indices],
-        "observation.state": [i / dataset_metadata.fps for i in cfg.observation_delta_indices],
-        "action": [i / dataset_metadata.fps for i in cfg.action_delta_indices],
-    }
+        if step % log_freq == 0:
+            print(f"step: {step} loss: {loss.item():.3f}")
+        step += 1
+        if step >= training_steps:
+            done = True
+            break

-    # In this case with the standard configuration for Diffusion Policy, it is equivalent to this:
-    delta_timestamps = {
-        # Load the previous image and state at -0.1 seconds before current frame,
-        # then load current image and state corresponding to 0.0 second.
-        "observation.image": [-0.1, 0.0],
-        "observation.state": [-0.1, 0.0],
-        # Load the previous action (-0.1), the next action to be executed (0.0),
-        # and 14 future actions with a 0.1 seconds spacing. All these actions will be
-        # used to supervise the policy.
-        "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4],
-    }
-
-    # We can then instantiate the dataset with these delta_timestamps configuration.
-    dataset = LeRobotDataset("lerobot/pusht", delta_timestamps=delta_timestamps)
-
-    # Then we create our optimizer and dataloader for offline training.
-    optimizer = torch.optim.Adam(policy.parameters(), lr=1e-4)
-    dataloader = torch.utils.data.DataLoader(
-        dataset,
-        num_workers=4,
-        batch_size=64,
-        shuffle=True,
-        pin_memory=device.type != "cpu",
-        drop_last=True,
-    )
-
-    # Run training loop.
-    step = 0
-    done = False
-    while not done:
-        for batch in dataloader:
-            batch = {k: (v.to(device) if isinstance(v, torch.Tensor) else v) for k, v in batch.items()}
-            loss, _ = policy.forward(batch)
-            loss.backward()
-            optimizer.step()
-            optimizer.zero_grad()
-
-            if step % log_freq == 0:
-                print(f"step: {step} loss: {loss.item():.3f}")
-            step += 1
-            if step >= training_steps:
-                done = True
-                break
-
-    # Save a policy checkpoint.
-    policy.save_pretrained(output_directory)
-
-
-if __name__ == "__main__":
-    main()
+# Save a policy checkpoint.
+policy.save_pretrained(output_directory)
--- a/examples/4_train_policy_with_script.md
+++ b/examples/4_train_policy_with_script.md
@@ -1,223 +1,193 @@
-This tutorial will explain the training script, how to use it, and particularly how to configure everything needed for the training run.
-> **Note:** The following assumes you're running these commands on a machine equipped with a cuda GPU. If you don't have one (or if you're using a Mac), you can add `--policy.device=cpu` (`--policy.device=mps` respectively). However, be advised that the code executes much slower on cpu.
-
+This tutorial will explain the training script, how to use it, and particularly the use of Hydra to configure everything needed for the training run.

 ## The training script

-LeRobot offers a training script at [`lerobot/scripts/train.py`](../lerobot/scripts/train.py). At a high level it does the following:
+LeRobot offers a training script at [`lerobot/scripts/train.py`](../../lerobot/scripts/train.py). At a high level it does the following:

- Initialize/load a configuration for the following steps using.
- Instantiates a dataset.
- (Optional) Instantiates a simulation environment corresponding to that dataset.
- Instantiates a policy.
+- Loads a Hydra configuration file for the following steps (more on Hydra in a moment).
+- Makes a simulation environment.
+- Makes a dataset corresponding to that simulation environment.
+- Makes a policy.
 - Runs a standard training loop with forward pass, backward pass, optimization step, and occasional logging, evaluation (of the policy on the environment), and checkpointing.

-## Overview of the configuration system
+## Basics of how we use Hydra
+
+Explaining the ins and outs of [Hydra](https://hydra.cc/docs/intro/) is beyond the scope of this document, but here we'll share the main points you need to know.
+
+First, `lerobot/configs` has a directory structure like this:
+
+```
+.
+├── default.yaml
+├── env
+│   ├── aloha.yaml
+│   ├── pusht.yaml
+│   └── xarm.yaml
+└── policy
+    ├── act.yaml
+    ├── diffusion.yaml
+    └── tdmpc.yaml
+```
+
+**_For brevity, in the rest of this document we'll drop the leading `lerobot/configs` path. So `default.yaml` really refers to `lerobot/configs/default.yaml`._**
+
+When you run the training script with

-In the training script, the main function `train` expects a `TrainPipelineConfig` object:
 ```python
-# train.py
-@parser.wrap()
-def train(cfg: TrainPipelineConfig):
+python lerobot/scripts/train.py
 ```

-You can inspect the `TrainPipelineConfig` defined in [`lerobot/configs/train.py`](../lerobot/configs/train.py) (which is heavily commented and meant to be a reference to understand any option)
+Hydra is set up to read `default.yaml` (via the `@hydra.main` decorator). If you take a look at the `@hydra.main`'s arguments you will see `config_path="../configs", config_name="default"`. At the top of `default.yaml`, is a `defaults` section which looks likes this:

-When running the script, inputs for the command line are parsed thanks to the `@parser.wrap()` decorator and an instance of this class is automatically generated. Under the hood, this is done with [Draccus](https://github.com/dlwh/draccus) which is a tool dedicated to this purpose. If you're familiar with Hydra, Draccus can similarly load configurations from config files (.json, .yaml) and also override their values through command line inputs. Unlike Hydra, these configurations are pre-defined in the code through dataclasses rather than being defined entirely in config files. This allows for more rigorous serialization/deserialization, typing, and to manipulate configuration as objects directly in the code and not as dictionaries or namespaces (which enables nice features in an IDE such as autocomplete, jump-to-def, etc.)
-
-Let's have a look at a simplified example. Amongst other attributes, the training config has the following attributes:
-```python
-@dataclass
-class TrainPipelineConfig:
-    dataset: DatasetConfig
-    env: envs.EnvConfig | None = None
-    policy: PreTrainedConfig | None = None
-```
-in which `DatasetConfig` for example is defined as such:
-```python
-@dataclass
-class DatasetConfig:
-    repo_id: str
-    episodes: list[int] | None = None
-    video_backend: str = "pyav"
+```yaml
+defaults:
+  - _self_
+  - env: pusht
+  - policy: diffusion
 ```

-This creates a hierarchical relationship where, for example assuming we have a `cfg` instance of `TrainPipelineConfig`, we can access the `repo_id` value with `cfg.dataset.repo_id`.
-From the command line, we can specify this value by using a very similar syntax `--dataset.repo_id=repo/id`.
+This logic tells Hydra to incorporate configuration parameters from `env/pusht.yaml` and `policy/diffusion.yaml`. _Note: Be aware of the order as any configuration parameters with the same name will be overidden. Thus, `default.yaml` is overridden by `env/pusht.yaml`  which is overidden by `policy/diffusion.yaml`_.

-By default, every field takes its default value specified in the dataclass. If a field doesn't have a default value, it needs to be specified either from the command line or from a config file – which path is also given in the command line (more in this below). In the example above, the `dataset` field doesn't have a default value which means it must be specified.
+Then, `default.yaml` also contains common configuration parameters such as `device: cuda` or `use_amp: false` (for enabling fp16 training). Some other parameters are set to `???` which indicates that they are expected to be set in additional yaml files. For instance, `training.offline_steps: ???` in `default.yaml` is set to `200000` in `diffusion.yaml`.

+Thanks to this `defaults` section in `default.yaml`, if you want to train Diffusion Policy with PushT, you really only need to run:

-## Specifying values from the CLI
+```bash
+python lerobot/scripts/train.py
+```
+
+However, you can be more explicit and launch the exact same Diffusion Policy training on PushT with:
+
+```bash
+python lerobot/scripts/train.py policy=diffusion env=pusht
+```
+
+This way of overriding defaults via the CLI is especially useful when you want to change the policy and/or environment. For instance, you can train ACT on the default Aloha environment with:
+
+```bash
+python lerobot/scripts/train.py policy=act env=aloha
+```
+
+There are two things to note here:
+- Config overrides are passed as `param_name=param_value`.
+- Here we have overridden the defaults section. `policy=act` tells Hydra to use `policy/act.yaml`, and `env=aloha` tells Hydra to use `env/aloha.yaml`.
+
+_As an aside: we've set up all of our configurations so that they reproduce state-of-the-art results from papers in the literature._
+
+## Overriding configuration parameters in the CLI
+
+Now let's say that we want to train on a different task in the Aloha environment. If you look in `env/aloha.yaml` you will see something like:
+
+```yaml
+# lerobot/configs/env/aloha.yaml
+env:
+  task: AlohaInsertion-v0
+```
+
+And if you look in `policy/act.yaml` you will see something like:
+
+```yaml
+# lerobot/configs/policy/act.yaml
+dataset_repo_id: lerobot/aloha_sim_insertion_human
+```
+
+But our Aloha environment actually supports a cube transfer task as well. To train for this task, you could manually modify the two yaml configuration files respectively.
+
+First, we'd need to switch to using the cube transfer task for the ALOHA environment.
+
+```diff
+# lerobot/configs/env/aloha.yaml
+env:
+-  task: AlohaInsertion-v0
+  task: AlohaTransferCube-v0
+```
+
+Then, we'd also need to switch to using the cube transfer dataset.
+
+```diff
+# lerobot/configs/policy/act.yaml
+-dataset_repo_id: lerobot/aloha_sim_insertion_human
+dataset_repo_id: lerobot/aloha_sim_transfer_cube_human
+```
+
+Then, you'd be able to run:
+
+```bash
+python lerobot/scripts/train.py policy=act env=aloha
+```
+
+and you'd be training and evaluating on the cube transfer task.
+
+An alternative approach to editing the yaml configuration files, would be to override the defaults via the command line:

-Let's say that we want to train [Diffusion Policy](../lerobot/common/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this:
 ```bash
 python lerobot/scripts/train.py \
-    --dataset.repo_id=lerobot/pusht \
-    --policy.type=diffusion \
-    --env.type=pusht
+    policy=act \
+    dataset_repo_id=lerobot/aloha_sim_transfer_cube_human \
+    env=aloha \
+    env.task=AlohaTransferCube-v0
 ```

-Let's break this down:
- To specify the dataset, we just need to specify its `repo_id` on the hub which is the only required argument in the `DatasetConfig`. The rest of the fields have default values and in this case we are fine with those so we can just add the option `--dataset.repo_id=lerobot/pusht`.
- To specify the policy, we can just select diffusion policy using `--policy` appended with `.type`. Here, `.type` is a special argument which allows us to select config classes inheriting from `draccus.ChoiceRegistry` and that have been decorated with the `register_subclass()` method. To have a better explanation of this feature, have a look at this [Draccus demo](https://github.com/dlwh/draccus?tab=readme-ov-file#more-flexible-configuration-with-choice-types). In our code, we use this mechanism mainly to select policies, environments, robots, and some other components like optimizers. The policies available to select are located in [lerobot/common/policies](../lerobot/common/policies)
- Similarly, we select the environment with `--env.type=pusht`. The different environment configs are available in [`lerobot/common/envs/configs.py`](../lerobot/common/envs/configs.py)
+There's something new here. Notice the `.` delimiter used to traverse the configuration hierarchy. _But be aware that the `defaults` section is an exception. As you saw above, we didn't need to write `defaults.policy=act` in the CLI. `policy=act` was enough._
+
+Putting all that knowledge together, here's the command that was used to train https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human.

-Let's see another example. Let's say you've been training [ACT](../lerobot/common/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with:
 ```bash
 python lerobot/scripts/train.py \
-    --policy.type=act \
-    --dataset.repo_id=lerobot/aloha_sim_insertion_human \
-    --env.type=aloha \
-    --output_dir=outputs/train/act_aloha_insertion
+    hydra.run.dir=outputs/train/act_aloha_sim_transfer_cube_human \
+    device=cuda
+    env=aloha \
+    env.task=AlohaTransferCube-v0 \
+    dataset_repo_id=lerobot/aloha_sim_transfer_cube_human \
+    policy=act \
+    training.eval_freq=10000 \
+    training.log_freq=250 \
+    training.offline_steps=100000 \
+    training.save_model=true \
+    training.save_freq=25000 \
+    eval.n_episodes=50 \
+    eval.batch_size=50 \
+    wandb.enable=false \
 ```
-> Notice we added `--output_dir` to explicitly tell where to write outputs from this run (checkpoints, training state, configs etc.). This is not mandatory and if you don't specify it, a default directory will be created from the current date and time, env.type and policy.type. This will typically look like `outputs/train/2025-01-24/16-10-05_aloha_act`.

-We now want to train a different policy for aloha on another task. We'll change the dataset and use [lerobot/aloha_sim_transfer_cube_human](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human) instead. Of course, we also need to change the task of the environment as well to match this other task.
-Looking at the [`AlohaEnv`](../lerobot/common/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using:
+There's one new thing here: `hydra.run.dir=outputs/train/act_aloha_sim_transfer_cube_human`, which specifies where to save the training output.
+
+## Using a configuration file not in `lerobot/configs`
+
+Above we discusses the our training script is set up such that Hydra looks for `default.yaml` in `lerobot/configs`. But, if you have a configuration file elsewhere in your filesystem you may use:
+
 ```bash
-python lerobot/scripts/train.py \
-    --policy.type=act \
-    --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
-    --env.type=aloha \
-    --env.task=AlohaTransferCube-v0 \
-    --output_dir=outputs/train/act_aloha_transfer
+python lerobot/scripts/train.py --config-dir PARENT/PATH --config-name FILE_NAME_WITHOUT_EXTENSION
 ```

-## Loading from a config file
+Note: here we use regular syntax for providing CLI arguments to a Python script, not Hydra's `param_name=param_value` syntax.

-Now, let's assume that we want to reproduce the run just above. That run has produced a `train_config.json` file in its checkpoints, which serializes the `TrainPipelineConfig` instance it used:
-```json
-{
-    "dataset": {
-        "repo_id": "lerobot/aloha_sim_transfer_cube_human",
-        "episodes": null,
-        ...
-    },
-    "env": {
-        "type": "aloha",
-        "task": "AlohaTransferCube-v0",
-        "fps": 50,
-        ...
-    },
-    "policy": {
-        "type": "act",
-        "n_obs_steps": 1,
-        ...
-    },
-    ...
-}
-```
+As a concrete example, this becomes particularly handy when you have a folder with training outputs, and would like to re-run the training. For example, say you previously ran the training script with one of the earlier commands and have `outputs/train/my_experiment/checkpoints/pretrained_model/config.yaml`. This `config.yaml` file will have the full set of configuration parameters within it. To run the training with the same configuration again, do:

-We can then simply load the config values from this file using:
 ```bash
-python lerobot/scripts/train.py \
-    --config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
-    --output_dir=outputs/train/act_aloha_transfer_2
-```
-`--config_path` is also a special argument which allows to initialize the config from a local config file. It can point to a directory that contains `train_config.json` or to the config file itself directly.
-
-Similarly to Hydra, we can still override some parameters in the CLI if we want to, e.g.:
-```bash
-python lerobot/scripts/train.py \
-    --config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
-    --output_dir=outputs/train/act_aloha_transfer_2
-    --policy.n_action_steps=80
-```
-> Note: While `--output_dir` is not required in general, in this case we need to specify it since it will otherwise take the value from the `train_config.json` (which is `outputs/train/act_aloha_transfer`). In order to prevent accidental deletion of previous run checkpoints, we raise an error if you're trying to write in an existing directory. This is not the case when resuming a run, which is what you'll learn next.
-
-`--config_path` can also accept the repo_id of a repo on the hub that contains a `train_config.json` file, e.g. running:
-```bash
-python lerobot/scripts/train.py --config_path=lerobot/diffusion_pusht
-```
-will start a training run with the same configuration used for training [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht)
-
-
-## Resume training
-
-Being able to resume a training run is important in case it crashed or aborted for any reason. We'll demonstrate how to do that here.
-
-Let's reuse the command from the previous run and add a few more options:
-```bash
-python lerobot/scripts/train.py \
-    --policy.type=act \
-    --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
-    --env.type=aloha \
-    --env.task=AlohaTransferCube-v0 \
-    --log_freq=25 \
-    --save_freq=100 \
-    --output_dir=outputs/train/run_resumption
+python lerobot/scripts/train.py --config-dir outputs/train/my_experiment/checkpoints/last/pretrained_model --config-name config
 ```

-Here we've taken care to set up the log frequency and checkpointing frequency to low numbers so we can showcase resumption. You should be able to see some logging and have a first checkpoint within 1 minute (depending on hardware). Wait for the first checkpoint to happen, you should see a line that looks like this in your terminal:
-```
-INFO 2025-01-24 16:10:56 ts/train.py:263 Checkpoint policy after step 100
-```
-Now let's simulate a crash by killing the process (hit `ctrl`+`c`). We can then simply resume this run from the last checkpoint available with:
-```bash
-python lerobot/scripts/train.py \
-    --config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
-    --resume=true
-```
-You should see from the logging that your training picks up from where it left off.
-
-Another reason for which you might want to resume a run is simply to extend training and add more training steps. The number of training steps is set by the option `--steps`, which is 100 000 by default.
-You could double the number of steps of the previous run with:
-```bash
-python lerobot/scripts/train.py \
-    --config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
-    --resume=true \
-    --steps=200000
-```
-
-## Outputs of a run
-In the output directory, there will be a folder called `checkpoints` with the following structure:
-```bash
-outputs/train/run_resumption/checkpoints
-├── 000100  # checkpoint_dir for training step 100
-│   ├── pretrained_model/
-│   │   ├── config.json  # policy config
-│   │   ├── model.safetensors  # policy weights
-│   │   └── train_config.json  # train config
-│   └── training_state/
-│       ├── optimizer_param_groups.json  #  optimizer param groups
-│       ├── optimizer_state.safetensors  # optimizer state
-│       ├── rng_state.safetensors  # rng states
-│       ├── scheduler_state.json  # scheduler state
-│       └── training_step.json  # training step
-├── 000200
-└── last -> 000200  # symlink to the last available checkpoint
-```
-
-## Fine-tuning a pre-trained policy
-
-In addition to the features currently in Draccus, we've added a special `.path` argument for the policy, which allows to load a policy as you would with `PreTrainedPolicy.from_pretrained()`. In that case, `path` can be a local directory that contains a checkpoint or a repo_id pointing to a pretrained policy on the hub.
-
-For example, we could fine-tune a [policy pre-trained on the aloha transfer task](https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human) on the aloha insertion task. We can achieve this with:
-```bash
-python lerobot/scripts/train.py \
-    --policy.path=lerobot/act_aloha_sim_transfer_cube_human \
-    --dataset.repo_id=lerobot/aloha_sim_insertion_human \
-    --env.type=aloha \
-    --env.task=AlohaInsertion-v0
-```
-
-When doing so, keep in mind that the features of the fine-tuning dataset would have to match the input/output features of the pretrained policy.
+Note that you may still use the regular syntax for config parameter overrides (eg: by adding `training.offline_steps=200000`).

 ## Typical logs and metrics

-When you start the training process, you will first see your full configuration being printed in the terminal. You can check it to make sure that you configured your run correctly. The final configuration will also be saved with the checkpoint.
+When you start the training process, you will first see your full configuration being printed in the terminal. You can check it to make sure that you config it correctly and your config is not overrided by other files. The final configuration will also be saved with the checkpoint.

 After that, you will see training log like this one:
+
 ```
 INFO 2024-08-14 13:35:12 ts/train.py:192 step:0 smpl:64 ep:1 epch:0.00 loss:1.112 grdn:15.387 lr:2.0e-07 updt_s:1.738 data_s:4.774
 ```
-or evaluation log:
+
+or evaluation log like:
+
 ```
 INFO 2024-08-14 13:38:45 ts/train.py:226 step:100 smpl:6K ep:52 epch:0.25 ∑rwrd:20.693 success:0.0% eval_s:120.266
 ```

 These logs will also be saved in wandb if `wandb.enable` is set to `true`. Here are the meaning of some abbreviations:
+
 - `smpl`: number of samples seen during training.
 - `ep`: number of episodes seen during training. An episode contains multiple samples in a complete manipulation task.
 - `epch`: number of time all unique samples are seen (epoch).
@@ -230,45 +200,14 @@ These logs will also be saved in wandb if `wandb.enable` is set to `true`. Here

 Some metrics are useful for initial performance profiling. For example, if you find the current GPU utilization is low via the `nvidia-smi` command and `data_s` sometimes is too high, you may need to modify batch size or number of dataloading workers to accelerate dataloading. We also recommend [pytorch profiler](https://github.com/huggingface/lerobot?tab=readme-ov-file#improve-your-code-with-profiling) for detailed performance probing.

-## In short
-
-We'll summarize here the main use cases to remember from this tutorial.
-
-#### Train a policy from scratch – CLI
-```bash
-python lerobot/scripts/train.py \
-    --policy.type=act \  # <- select 'act' policy
-    --env.type=pusht \  # <- select 'pusht' environment
-    --dataset.repo_id=lerobot/pusht  # <- train on this dataset
-```
-
-#### Train a policy from scratch - config file + CLI
-```bash
-python lerobot/scripts/train.py \
-    --config_path=path/to/pretrained_model \  # <- can also be a repo_id
-    --policy.n_action_steps=80  # <- you may still override values
-```
-
-#### Resume/continue a training run
-```bash
-python lerobot/scripts/train.py \
-    --config_path=checkpoint/pretrained_model/ \
-    --resume=true \
-    --steps=200000  # <- you can change some training parameters
-```
-
-#### Fine-tuning
-```bash
-python lerobot/scripts/train.py \
-    --policy.path=lerobot/act_aloha_sim_transfer_cube_human \  # <- can also be a local path to a checkpoint
-    --dataset.repo_id=lerobot/aloha_sim_insertion_human \
-    --env.type=aloha \
-    --env.task=AlohaInsertion-v0
-```
-
 ---

-Now that you know the basics of how to train a policy, you might want to know how to apply this knowledge to actual robots, or how to record your own datasets and train policies on your specific task?
-If that's the case, head over to the next tutorial [`7_get_started_with_real_robot.md`](./7_get_started_with_real_robot.md).
+So far we've seen how to train Diffusion Policy for PushT and ACT for ALOHA. Now, what if we want to train ACT for PushT? Well, there are aspects of the ACT configuration that are specific to the ALOHA environments, and these happen to be incompatible with PushT. Therefore, trying to run the following will almost certainly raise an exception of sorts (eg: feature dimension mismatch):

-Or in the meantime, happy training! 🤗
+```bash
+python lerobot/scripts/train.py policy=act env=pusht dataset_repo_id=lerobot/pusht
+```
+
+Please, head on over to our [advanced tutorial on adapting policy configuration to various environments](./advanced/train_act_pusht/train_act_pusht.md) to learn more.
+
+Or in the meantime, happy coding! 🤗
--- a/examples/5_resume_training.md
+++ b/examples/5_resume_training.md
@@ -0,0 +1,37 @@
+This tutorial explains how to resume a training run that you've started with the training script. If you don't know how our training script and configuration system works, please read [4_train_policy_with_script.md](./4_train_policy_with_script.md) first.
+
+## Basic training resumption
+
+Let's consider the example of training ACT for one of the ALOHA tasks. Here's a command that can achieve that:
+
+```bash
+python lerobot/scripts/train.py \
+    hydra.run.dir=outputs/train/run_resumption \
+    policy=act \
+    dataset_repo_id=lerobot/aloha_sim_transfer_cube_human \
+    env=aloha \
+    env.task=AlohaTransferCube-v0 \
+    training.log_freq=25 \
+    training.save_checkpoint=true \
+    training.save_freq=100
+```
+
+Here we're using the default dataset and environment for ACT, and we've taken care to set up the log frequency and checkpointing frequency to low numbers so we can test resumption. You should be able to see some logging and have a first checkpoint within 1 minute. Please interrupt the training after the first checkpoint.
+
+To resume, all that we have to do is run the training script, providing the run directory, and the resume option:
+
+```bash
+python lerobot/scripts/train.py \
+    hydra.run.dir=outputs/train/run_resumption \
+    resume=true
+```
+
+You should see from the logging that your training picks up from where it left off.
+
+Note that with `resume=true`, the configuration file from the last checkpoint in the training output directory is loaded. So it doesn't matter that we haven't provided all the other configuration parameters from our previous command (although there may be warnings to notify you that your command has a different configuration than than the checkpoint).
+
+---
+
+Now you should know how to resume your training run in case it gets interrupted or you want to extend a finished training run.
+
+Happy coding! 🤗
--- a/examples/advanced/1_add_image_transforms.py
+++ b/examples/advanced/1_add_image_transforms.py
@@ -1,21 +1,7 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """
 This script demonstrates how to use torchvision's image transformation with LeRobotDataset for data
 augmentation purposes. The transformations are passed to the dataset as an argument upon creation, and
-transforms are applied to the observation images before they are returned in the dataset's __getitem__.
+transforms are applied to the observation images before they are returned in the dataset's __get_item__.
 """

 from pathlib import Path
@@ -24,17 +10,17 @@ from torchvision.transforms import ToPILImage, v2

 from lerobot.common.datasets.lerobot_dataset import LeRobotDataset

-dataset_repo_id = "lerobot/aloha_static_screw_driver"
+dataset_repo_id = "lerobot/aloha_static_tape"

 # Create a LeRobotDataset with no transformations
-dataset = LeRobotDataset(dataset_repo_id, episodes=[0])
+dataset = LeRobotDataset(dataset_repo_id)
 # This is equivalent to `dataset = LeRobotDataset(dataset_repo_id, image_transforms=None)`

 # Get the index of the first observation in the first episode
 first_idx = dataset.episode_data_index["from"][0].item()

 # Get the frame corresponding to the first camera
-frame = dataset[first_idx][dataset.meta.camera_keys[0]]
+frame = dataset[first_idx][dataset.camera_keys[0]]


 # Define the transformations
@@ -42,16 +28,15 @@ transforms = v2.Compose(
    [
        v2.ColorJitter(brightness=(0.5, 1.5)),
        v2.ColorJitter(contrast=(0.5, 1.5)),
-        v2.ColorJitter(hue=(-0.1, 0.1)),
        v2.RandomAdjustSharpness(sharpness_factor=2, p=1),
    ]
 )

 # Create another LeRobotDataset with the defined transformations
-transformed_dataset = LeRobotDataset(dataset_repo_id, episodes=[0], image_transforms=transforms)
+transformed_dataset = LeRobotDataset(dataset_repo_id, image_transforms=transforms)

 # Get a frame from the transformed dataset
-transformed_frame = transformed_dataset[first_idx][transformed_dataset.meta.camera_keys[0]]
+transformed_frame = transformed_dataset[first_idx][transformed_dataset.camera_keys[0]]

 # Create a directory to store output images
 output_dir = Path("outputs/image_transforms")
--- a/examples/7_get_started_with_real_robot.md
+++ b/examples/7_get_started_with_real_robot.md
--- a/lerobot/common/robots/stretch3/README.md
+++ b/lerobot/common/robots/stretch3/README.md
@@ -43,19 +43,21 @@ conda create -y -n lerobot python=3.10 && conda activate lerobot
 git clone https://github.com/huggingface/lerobot.git ~/lerobot
 ```

-6. When using `miniconda`, install `ffmpeg` in your environment:
-```bash
-conda install ffmpeg -c conda-forge
-```
-
-7. Install LeRobot with stretch dependencies:
+6. Install LeRobot with stretch dependencies:
 ```bash
 cd ~/lerobot && pip install -e ".[stretch]"
 ```

 > **Note:** If you get this message, you can ignore it: `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed.`

-8. Run a [system check](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#system-check) to make sure your robot is ready:
+For Linux only (not Mac), install extra dependencies for recording datasets:
+```bash
+conda install -y -c conda-forge ffmpeg
+pip uninstall -y opencv-python
+conda install -y -c conda-forge "opencv>=4.10.0"
+```
+
+7. Run a [system check](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#system-check) to make sure your robot is ready:
 ```bash
 stretch_system_check.py
 ```
@@ -90,24 +92,20 @@ Serial Number = stretch-se3-3054
 **Calibrate (Optional)**
 Before operating Stretch, you need to [home](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#homing) it first. Be mindful about giving Stretch some space as this procedure will move the robot's arm and gripper. Now run this command:
 ```bash
-python lerobot/scripts/control_robot.py \
-    --robot.type=stretch \
-    --control.type=calibrate
+python lerobot/scripts/control_robot.py calibrate \
+    --robot-path lerobot/configs/robot/stretch.yaml
 ```
 This is equivalent to running `stretch_robot_home.py`

-> **Note:** If you run any of the LeRobot scripts below and Stretch is not properly homed, it will automatically home/calibrate first.
+> **Note:** If you run any of the LeRobot scripts below and Stretch is not poperly homed, it will automatically home/calibrate first.

 **Teleoperate**
-Before trying teleoperation, you need to activate the gamepad controller by pressing the middle button. For more info, see Stretch's [doc](https://docs.hello-robot.com/0.3/getting_started/hello_robot/#gamepad-teleoperation).
+Before trying teleoperation, you need activate the gamepad controller by pressing the middle button. For more info, see Stretch's [doc](https://docs.hello-robot.com/0.3/getting_started/hello_robot/#gamepad-teleoperation).

 Now try out teleoperation (see above documentation to learn about the gamepad controls):
-
-> **NOTE:** To visualize the data, enable `--control.display_data=true`. This streams the data using `rerun`.
 ```bash
-python lerobot/scripts/control_robot.py \
-    --robot.type=stretch \
-    --control.type=teleoperate
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/stretch.yaml
 ```
 This is essentially the same as running `stretch_gamepad_teleop.py`

@@ -127,18 +125,17 @@ echo $HF_USER

 Record one episode:
 ```bash
-python lerobot/scripts/control_robot.py \
-  --robot.type=stretch \
-  --control.type=record \
-  --control.fps=30 \
-  --control.single_task="Grasp a lego block and put it in the bin." \
-  --control.repo_id=${HF_USER}/stretch_test \
-  --control.tags='["tutorial"]' \
-  --control.warmup_time_s=5 \
-  --control.episode_time_s=30 \
-  --control.reset_time_s=30 \
-  --control.num_episodes=2 \
-  --control.push_to_hub=true
+python lerobot/scripts/control_robot.py record \
+    --robot-path lerobot/configs/robot/stretch.yaml \
+    --fps 20 \
+    --root data \
+    --repo-id ${HF_USER}/stretch_test \
+    --tags stretch tutorial \
+    --warmup-time-s 3 \
+    --episode-time-s 40 \
+    --reset-time-s 10 \
+    --num-episodes 1 \
+    --push-to-hub 0
 ```

 > **Note:** If you're using ssh to connect to Stretch and run this script, you won't be able to visualize its cameras feed (though they will still be recording). To see the cameras stream, use [tethered](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#tethered-setup) or [untethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#untethered-setup).
@@ -146,12 +143,12 @@ python lerobot/scripts/control_robot.py \
 **Replay an episode**
 Now try to replay this episode (make sure the robot's initial position is the same):
 ```bash
-python lerobot/scripts/control_robot.py \
-  --robot.type=stretch \
-  --control.type=replay \
-  --control.fps=30 \
-  --control.repo_id=${HF_USER}/stretch_test \
-  --control.episode=0
+python lerobot/scripts/control_robot.py replay \
+    --robot-path lerobot/configs/robot/stretch.yaml \
+    --fps 20 \
+    --root data \
+    --repo-id ${HF_USER}/stretch_test \
+    --episode 0
 ```

 Follow [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) to train a policy on your data and run inference on your robot. You will need to adapt the code for Stretch.
--- a/examples/9_use_aloha.md
+++ b/examples/9_use_aloha.md
@@ -0,0 +1,179 @@
+This tutorial explains how to use [Aloha and Aloha 2 stationary](https://www.trossenrobotics.com/aloha-stationary) with LeRobot.
+
+## Setup
+
+Follow the [documentation from Trossen Robotics](https://docs.trossenrobotics.com/aloha_docs/getting_started/stationary/hardware_setup.html) for setting up the hardware and plugging the 4 arms and 4 cameras to your computer.
+
+
+## Install LeRobot
+
+On your computer:
+
+1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install):
+```bash
+mkdir -p ~/miniconda3
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
+bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+rm ~/miniconda3/miniconda.sh
+~/miniconda3/bin/conda init bash
+```
+
+2. Restart shell or `source ~/.bashrc`
+
+3. Create and activate a fresh conda environment for lerobot
+```bash
+conda create -y -n lerobot python=3.10 && conda activate lerobot
+```
+
+4. Clone LeRobot:
+```bash
+git clone https://github.com/huggingface/lerobot.git ~/lerobot
+```
+
+5. Install LeRobot with dependencies for the Aloha motors (dynamixel) and cameras (intelrealsense):
+```bash
+cd ~/lerobot && pip install -e ".[dynamixel, intelrealsense]"
+```
+
+For Linux only (not Mac), install extra dependencies for recording datasets:
+```bash
+conda install -y -c conda-forge ffmpeg
+pip uninstall -y opencv-python
+conda install -y -c conda-forge "opencv>=4.10.0"
+```
+
+## Teleoperate
+
+**/!\ FOR SAFETY, READ THIS /!\**
+Teleoperation consists in manually operating the leader arms to move the follower arms. Importantly:
+1. Make sure your leader arms are in the same position as the follower arms, so that the follower arms don't move too fast to match the leader arms,
+2. Our code assumes that your robot has been assembled following Trossen Robotics instructions. This allows us to skip calibration, as we use the pre-defined calibration files in `.cache/calibration/aloha_default`. If you replace a motor, make sure you follow the exact instructions from Trossen Robotics.
+
+By running the following code, you can start your first **SAFE** teleoperation:
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/aloha.yaml \
+    --robot-overrides max_relative_target=5
+```
+
+By adding `--robot-overrides max_relative_target=5`, we override the default value for `max_relative_target` defined in `lerobot/configs/robot/aloha.yaml`. It is expected to be `5` to limit the magnitude of the movement for more safety, but the teloperation won't be smooth. When you feel confident, you can disable this limit by adding `--robot-overrides max_relative_target=null` to the command line:
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/aloha.yaml \
+    --robot-overrides max_relative_target=null
+```
+
+## Record a dataset
+
+Once you're familiar with teleoperation, you can record your first dataset with Aloha.
+
+If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
+```bash
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+```
+
+Store your Hugging Face repository name in a variable to run these commands:
+```bash
+HF_USER=$(huggingface-cli whoami | head -n 1)
+echo $HF_USER
+```
+
+Record 2 episodes and upload your dataset to the hub:
+```bash
+python lerobot/scripts/control_robot.py record \
+    --robot-path lerobot/configs/robot/aloha.yaml \
+    --robot-overrides max_relative_target=null \
+    --fps 30 \
+    --root data \
+    --repo-id ${HF_USER}/aloha_test \
+    --tags aloha tutorial \
+    --warmup-time-s 5 \
+    --episode-time-s 40 \
+    --reset-time-s 10 \
+    --num-episodes 2 \
+    --push-to-hub 1
+```
+
+## Visualize a dataset
+
+If you uploaded your dataset to the hub with `--push-to-hub 1`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
+```bash
+echo ${HF_USER}/aloha_test
+```
+
+If you didn't upload with `--push-to-hub 0`, you can also visualize it locally with:
+```bash
+python lerobot/scripts/visualize_dataset_html.py \
+  --root data \
+  --repo-id ${HF_USER}/aloha_test
+```
+
+## Replay an episode
+
+**/!\ FOR SAFETY, READ THIS /!\**
+Replay consists in automatically replaying the sequence of actions (i.e. goal positions for your motors) recorded in a given dataset episode. Make sure the current initial position of your robot is similar to the one in your episode, so that your follower arms don't move too fast to go to the first goal positions. For safety, you might want to add `--robot-overrides max_relative_target=5` to your command line as explained above.
+
+Now try to replay the first episode on your robot:
+```bash
+python lerobot/scripts/control_robot.py replay \
+    --robot-path lerobot/configs/robot/aloha.yaml \
+    --robot-overrides max_relative_target=null \
+    --fps 30 \
+    --root data \
+    --repo-id ${HF_USER}/aloha_test \
+    --episode 0
+```
+
+## Train a policy
+
+To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
+```bash
+DATA_DIR=data python lerobot/scripts/train.py \
+  dataset_repo_id=${HF_USER}/aloha_test \
+  policy=act_aloha_real \
+  env=aloha_real \
+  hydra.run.dir=outputs/train/act_aloha_test \
+  hydra.job.name=act_aloha_test \
+  device=cuda \
+  wandb.enable=true
+```
+
+Let's explain it:
+1. We provided the dataset as argument with `dataset_repo_id=${HF_USER}/aloha_test`.
+2. We provided the policy with `policy=act_aloha_real`. This loads configurations from [`lerobot/configs/policy/act_aloha_real.yaml`](../lerobot/configs/policy/act_aloha_real.yaml). Importantly, this policy uses 4 cameras as input `cam_right_wrist`, `cam_left_wrist`, `cam_high`, and `cam_low`.
+3. We provided an environment as argument with `env=aloha_real`. This loads configurations from [`lerobot/configs/env/aloha_real.yaml`](../lerobot/configs/env/aloha_real.yaml). Note: this yaml defines 18 dimensions for the `state_dim` and `action_dim`, corresponding to 18 motors, not 14 motors as used in previous Aloha work. This is because, we include the `shoulder_shadow` and `elbow_shadow` motors for simplicity.
+4. We provided `device=cuda` since we are training on a Nvidia GPU.
+5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
+6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync.
+
+Training should take several hours. You will find checkpoints in `outputs/train/act_aloha_test/checkpoints`.
+
+## Evaluate your policy
+
+You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
+```bash
+python lerobot/scripts/control_robot.py record \
+  --robot-path lerobot/configs/robot/aloha.yaml \
+  --robot-overrides max_relative_target=null \
+  --fps 30 \
+  --root data \
+  --repo-id ${HF_USER}/eval_act_aloha_test \
+  --tags aloha tutorial eval \
+  --warmup-time-s 5 \
+  --episode-time-s 40 \
+  --reset-time-s 10 \
+  --num-episodes 10 \
+  --num-image-writer-processes 1 \
+  -p outputs/train/act_aloha_test/checkpoints/last/pretrained_model
+```
+
+As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
+1. There is an additional `-p` argument which indicates the path to your policy checkpoint with  (e.g. `-p outputs/train/eval_aloha_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `-p ${HF_USER}/act_aloha_test`).
+2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `--repo-id ${HF_USER}/eval_act_aloha_test`).
+3. We use `--num-image-writer-processes 1` instead of the default value (`0`). On our computer, using a dedicated process to write images from the 4 cameras on disk allows to reach constent 30 fps during inference. Feel free to explore different values for `--num-image-writer-processes`.
+
+## More
+
+Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth explaination.
+
+If you have any question or need help, please reach out on Discord in the channel `#aloha-arm`.
--- a/examples/advanced/1_train_act_pusht/act_pusht.yaml
+++ b/examples/advanced/1_train_act_pusht/act_pusht.yaml
@@ -0,0 +1,87 @@
+# @package _global_
+
+# Change the seed to match what PushT eval uses
+# (to avoid evaluating on seeds used for generating the training data).
+seed: 100000
+# Change the dataset repository to the PushT one.
+dataset_repo_id: lerobot/pusht
+
+override_dataset_stats:
+  observation.image:
+    # stats from imagenet, since we use a pretrained vision model
+    mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
+    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
+
+training:
+  offline_steps: 80000
+  online_steps: 0
+  eval_freq: 10000
+  save_freq: 100000
+  log_freq: 250
+  save_model: true
+
+  batch_size: 8
+  lr: 1e-5
+  lr_backbone: 1e-5
+  weight_decay: 1e-4
+  grad_clip_norm: 10
+  online_steps_between_rollouts: 1
+
+  delta_timestamps:
+    action: "[i / ${fps} for i in range(${policy.chunk_size})]"
+
+eval:
+  n_episodes: 50
+  batch_size: 50
+
+# See `configuration_act.py` for more details.
+policy:
+  name: act
+
+  # Input / output structure.
+  n_obs_steps: 1
+  chunk_size: 100 # chunk_size
+  n_action_steps: 100
+
+  input_shapes:
+    observation.image: [3, 96, 96]
+    observation.state: ["${env.state_dim}"]
+  output_shapes:
+    action: ["${env.action_dim}"]
+
+  # Normalization / Unnormalization
+  input_normalization_modes:
+    observation.image: mean_std
+    # Use min_max normalization just because it's more standard.
+    observation.state: min_max
+  output_normalization_modes:
+    # Use min_max normalization just because it's more standard.
+    action: min_max
+
+  # Architecture.
+  # Vision backbone.
+  vision_backbone: resnet18
+  pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
+  replace_final_stride_with_dilation: false
+  # Transformer layers.
+  pre_norm: false
+  dim_model: 512
+  n_heads: 8
+  dim_feedforward: 3200
+  feedforward_activation: relu
+  n_encoder_layers: 4
+    # Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code
+  # that means only the first layer is used. Here we match the original implementation by setting this to 1.
+  # See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521.
+  n_decoder_layers: 1
+  # VAE.
+  use_vae: true
+  latent_dim: 32
+  n_vae_encoder_layers: 4
+
+  # Inference.
+  temporal_ensemble_coeff: null
+
+  # Training and loss computation.
+  dropout: 0.1
+  kl_weight: 10.0
--- a/examples/advanced/1_train_act_pusht/train_act_pusht.md
+++ b/examples/advanced/1_train_act_pusht/train_act_pusht.md
@@ -0,0 +1,70 @@
+In this tutorial we will learn how to adapt a policy configuration to be compatible with a new environment and dataset. As a concrete example, we will adapt the default configuration for ACT to be compatible with the PushT environment and dataset.
+
+If you haven't already read our tutorial on the [training script and configuration tooling](../4_train_policy_with_script.md) please do so prior to tackling this tutorial.
+
+Let's get started!
+
+Suppose we want to train ACT for PushT. Well, there are aspects of the ACT configuration that are specific to the ALOHA environments, and these happen to be incompatible with PushT. Therefore, trying to run the following will almost certainly raise an exception of sorts (eg: feature dimension mismatch):
+
+```bash
+python lerobot/scripts/train.py policy=act env=pusht dataset_repo_id=lerobot/pusht
+```
+
+We need to adapt the parameters of the ACT policy configuration to the PushT environment. The most important ones are the image keys.
+
+ALOHA's datasets and environments typically use a variable number of cameras. In `lerobot/configs/policy/act.yaml` you may notice two relevant sections. Here we show you the minimal diff needed to adjust to PushT:
+
+```diff
+override_dataset_stats:
+-  observation.images.top:
+  observation.image:
+    # stats from imagenet, since we use a pretrained vision model
+    mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
+    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
+
+policy:
+  input_shapes:
+-    observation.images.top: [3, 480, 640]
+    observation.image: [3, 96, 96]
+    observation.state: ["${env.state_dim}"]
+  output_shapes:
+    action: ["${env.action_dim}"]
+
+  input_normalization_modes:
+-    observation.images.top: mean_std
+    observation.image: mean_std
+     observation.state: min_max
+  output_normalization_modes:
+    action: min_max
+```
+
+Here we've accounted for the following:
+- PushT uses "observation.image" for its image key.
+- PushT provides smaller images.
+
+_Side note: technically we could override these via the CLI, but with many changes it gets a bit messy, and we also have a bit of a challenge in that we're using `.` in our observation keys which is treated by Hydra as a hierarchical separator_.
+
+For your convenience, we provide [`act_pusht.yaml`](./act_pusht.yaml) in this directory. It contains the diff above, plus some other (optional) ones that are explained within. Please copy it into `lerobot/configs/policy` with:
+
+```bash
+cp examples/advanced/1_train_act_pusht/act_pusht.yaml lerobot/configs/policy/act_pusht.yaml
+```
+
+(remember from a [previous tutorial](../4_train_policy_with_script.md) that Hydra will look in the `lerobot/configs` directory). Now try running the following.
+
+<!-- Note to contributor: are you changing this command? Note that it's tested in `Makefile`, so change it there too! -->
+```bash
+python lerobot/scripts/train.py policy=act_pusht env=pusht
+```
+
+Notice that this is much the same as the command that failed at the start of the tutorial, only:
+- Now we are using `policy=act_pusht` to point to our new configuration file.
+- We can drop `dataset_repo_id=lerobot/pusht` as the change is incorporated in our new configuration file.
+
+Hurrah! You're now training ACT for the PushT environment.
+
+---
+
+The bottom line of this tutorial is that when training policies for different environments and datasets you will need to understand what parts of the policy configuration are specific to those and make changes accordingly.
+
+Happy coding! 🤗
--- a/examples/advanced/2_calculate_validation_loss.py
+++ b/examples/advanced/2_calculate_validation_loss.py
@@ -1,17 +1,3 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """This script demonstrates how to slice a dataset and calculate the loss on a subset of the data.

 This technique can be useful for debugging and testing purposes, as well as identifying whether a policy
@@ -23,82 +9,82 @@ on the target environment, whether that be in simulation or the real world.
 """

 import math
+from pathlib import Path

 import torch
+from huggingface_hub import snapshot_download

-from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy

+device = torch.device("cuda")

-def main():
-    device = torch.device("cuda")
+# Download the diffusion policy for pusht environment
+pretrained_policy_path = Path(snapshot_download("lerobot/diffusion_pusht"))
+# OR uncomment the following to evaluate a policy from the local outputs/train folder.
+# pretrained_policy_path = Path("outputs/train/example_pusht_diffusion")

-    # Download the diffusion policy for pusht environment
-    pretrained_policy_path = "lerobot/diffusion_pusht"
-    # OR uncomment the following to evaluate a policy from the local outputs/train folder.
-    # pretrained_policy_path = Path("outputs/train/example_pusht_diffusion")
+policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)
+policy.eval()
+policy.to(device)

-    policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)
-    policy.eval()
-    policy.to(device)
+# Set up the dataset.
+delta_timestamps = {
+    # Load the previous image and state at -0.1 seconds before current frame,
+    # then load current image and state corresponding to 0.0 second.
+    "observation.image": [-0.1, 0.0],
+    "observation.state": [-0.1, 0.0],
+    # Load the previous action (-0.1), the next action to be executed (0.0),
+    # and 14 future actions with a 0.1 seconds spacing. All these actions will be
+    # used to calculate the loss.
+    "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4],
+}

-    # Set up the dataset.
-    delta_timestamps = {
-        # Load the previous image and state at -0.1 seconds before current frame,
-        # then load current image and state corresponding to 0.0 second.
-        "observation.image": [-0.1, 0.0],
-        "observation.state": [-0.1, 0.0],
-        # Load the previous action (-0.1), the next action to be executed (0.0),
-        # and 14 future actions with a 0.1 seconds spacing. All these actions will be
-        # used to calculate the loss.
-        "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4],
-    }
+# Load the last 10% of episodes of the dataset as a validation set.
+# - Load full dataset
+full_dataset = LeRobotDataset("lerobot/pusht", split="train")
+# - Calculate train and val subsets
+num_train_episodes = math.floor(full_dataset.num_episodes * 90 / 100)
+num_val_episodes = full_dataset.num_episodes - num_train_episodes
+print(f"Number of episodes in full dataset: {full_dataset.num_episodes}")
+print(f"Number of episodes in training dataset (90% subset): {num_train_episodes}")
+print(f"Number of episodes in validation dataset (10% subset): {num_val_episodes}")
+# - Get first frame index of the validation set
+first_val_frame_index = full_dataset.episode_data_index["from"][num_train_episodes].item()
+# - Load frames subset belonging to validation set using the `split` argument.
+#   It utilizes the `datasets` library's syntax for slicing datasets.
+#   For more information on the Slice API, please see:
+#   https://huggingface.co/docs/datasets/v2.19.0/loading#slice-splits
+train_dataset = LeRobotDataset(
+    "lerobot/pusht", split=f"train[:{first_val_frame_index}]", delta_timestamps=delta_timestamps
+)
+val_dataset = LeRobotDataset(
+    "lerobot/pusht", split=f"train[{first_val_frame_index}:]", delta_timestamps=delta_timestamps
+)
+print(f"Number of frames in training dataset (90% subset): {len(train_dataset)}")
+print(f"Number of frames in validation dataset (10% subset): {len(val_dataset)}")

-    # Load the last 10% of episodes of the dataset as a validation set.
-    # - Load dataset metadata
-    dataset_metadata = LeRobotDatasetMetadata("lerobot/pusht")
-    # - Calculate train and val episodes
-    total_episodes = dataset_metadata.total_episodes
-    episodes = list(range(dataset_metadata.total_episodes))
-    num_train_episodes = math.floor(total_episodes * 90 / 100)
-    train_episodes = episodes[:num_train_episodes]
-    val_episodes = episodes[num_train_episodes:]
-    print(f"Number of episodes in full dataset: {total_episodes}")
-    print(f"Number of episodes in training dataset (90% subset): {len(train_episodes)}")
-    print(f"Number of episodes in validation dataset (10% subset): {len(val_episodes)}")
-    # - Load train and val datasets
-    train_dataset = LeRobotDataset(
-        "lerobot/pusht", episodes=train_episodes, delta_timestamps=delta_timestamps
-    )
-    val_dataset = LeRobotDataset("lerobot/pusht", episodes=val_episodes, delta_timestamps=delta_timestamps)
-    print(f"Number of frames in training dataset (90% subset): {len(train_dataset)}")
-    print(f"Number of frames in validation dataset (10% subset): {len(val_dataset)}")
+# Create dataloader for evaluation.
+val_dataloader = torch.utils.data.DataLoader(
+    val_dataset,
+    num_workers=4,
+    batch_size=64,
+    shuffle=False,
+    pin_memory=device != torch.device("cpu"),
+    drop_last=False,
+)

-    # Create dataloader for evaluation.
-    val_dataloader = torch.utils.data.DataLoader(
-        val_dataset,
-        num_workers=4,
-        batch_size=64,
-        shuffle=False,
-        pin_memory=device != torch.device("cpu"),
-        drop_last=False,
-    )
+# Run validation loop.
+loss_cumsum = 0
+n_examples_evaluated = 0
+for batch in val_dataloader:
+    batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}
+    output_dict = policy.forward(batch)

-    # Run validation loop.
-    loss_cumsum = 0
-    n_examples_evaluated = 0
-    for batch in val_dataloader:
-        batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}
-        loss, _ = policy.forward(batch)
+    loss_cumsum += output_dict["loss"].item()
+    n_examples_evaluated += batch["index"].shape[0]

-        loss_cumsum += loss.item()
-        n_examples_evaluated += batch["index"].shape[0]
+# Calculate the average loss over the validation set.
+average_loss = loss_cumsum / n_examples_evaluated

-    # Calculate the average loss over the validation set.
-    average_loss = loss_cumsum / n_examples_evaluated
-
-    print(f"Average loss on validation set: {average_loss:.4f}")
-
-
-if __name__ == "__main__":
-    main()
+print(f"Average loss on validation set: {average_loss:.4f}")
--- a/examples/lekiwi/evaluate.py
+++ b/examples/lekiwi/evaluate.py
@@ -1,38 +0,0 @@
-import torch
-
-from lerobot.common.policies.act.modeling_act import ACTPolicy
-from lerobot.common.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
-from lerobot.common.robots.lekiwi.lekiwi_client import LeKiwiClient
-from lerobot.common.utils.control_utils import predict_action
-from lerobot.common.utils.utils import get_safe_torch_device
-
-NB_CYCLES_CLIENT_CONNECTION = 1000
-
-robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
-robot = LeKiwiClient(robot_config)
-
-robot.connect()
-
-policy = ACTPolicy.from_pretrained("pepijn223/act_lekiwi_circle")
-policy.reset()
-
-print("Running inference")
-i = 0
-while i < NB_CYCLES_CLIENT_CONNECTION:
-    obs = robot.get_observation()
-
-    for key, value in obs.items():
-        if isinstance(value, torch.Tensor):
-            obs[key] = value.numpy()
-
-    action_values = predict_action(
-        obs, policy, get_safe_torch_device(policy.config.device), policy.config.use_amp
-    )
-    action = {
-        key: action_values[i].item() if isinstance(action_values[i], torch.Tensor) else action_values[i]
-        for i, key in enumerate(robot.action_features)
-    }
-    robot.send_action(action)
-    i += 1
-
-robot.disconnect()
--- a/examples/lekiwi/record.py
+++ b/examples/lekiwi/record.py
@@ -1,67 +0,0 @@
-import time
-
-from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.common.datasets.utils import hw_to_dataset_features
-from lerobot.common.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
-from lerobot.common.robots.lekiwi.lekiwi_client import LeKiwiClient
-from lerobot.common.teleoperators.keyboard import KeyboardTeleop, KeyboardTeleopConfig
-from lerobot.common.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
-
-NB_CYCLES_CLIENT_CONNECTION = 250
-
-leader_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem58760431551")
-leader_arm = SO100Leader(leader_arm_config)
-
-keyboard_config = KeyboardTeleopConfig()
-keyboard = KeyboardTeleop(keyboard_config)
-
-robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
-robot = LeKiwiClient(robot_config)
-
-action_features = hw_to_dataset_features(robot.action_features, "action")
-obs_features = hw_to_dataset_features(robot.observation_features, "observation")
-dataset_features = {**action_features, **obs_features}
-
-dataset = LeRobotDataset.create(
-    repo_id="user/lekiwi" + str(int(time.time())),
-    fps=10,
-    features=dataset_features,
-    robot_type=robot.name,
-)
-
-leader_arm.connect()
-keyboard.connect()
-robot.connect()
-
-if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
-    exit()
-
-print("Starting LeKiwi teleoperation")
-i = 0
-while i < NB_CYCLES_CLIENT_CONNECTION:
-    arm_action = leader_arm.get_action()
-    arm_action = {f"arm_{k}": v for k, v in arm_action.items()}
-
-    keyboard_keys = keyboard.get_action()
-
-    base_action = robot._from_keyboard_to_base_action(keyboard_keys)
-
-    action = {**arm_action, **base_action} if len(base_action) > 0 else arm_action
-
-    action_sent = robot.send_action(action)
-    observation = robot.get_observation()
-
-    frame = {**action_sent, **observation}
-    task = "Dummy Example Task Dataset"
-
-    dataset.add_frame(frame, task)
-    i += 1
-
-print("Disconnecting Teleop Devices and LeKiwi Client")
-robot.disconnect()
-leader_arm.disconnect()
-keyboard.disconnect()
-
-print("Uploading dataset to the hub")
-dataset.save_episode()
-dataset.push_to_hub()
--- a/examples/lekiwi/replay.py
+++ b/examples/lekiwi/replay.py
@@ -1,25 +0,0 @@
-import time
-
-from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.common.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
-from lerobot.common.robots.lekiwi.lekiwi_client import LeKiwiClient
-from lerobot.common.utils.robot_utils import busy_wait
-
-robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
-robot = LeKiwiClient(robot_config)
-
-dataset = LeRobotDataset("pepijn223/lekiwi1749025613", episodes=[0])
-
-robot.connect()
-
-print("Replaying episode…")
-for _, action_array in enumerate(dataset.hf_dataset["action"]):
-    t0 = time.perf_counter()
-
-    action = {name: float(action_array[i]) for i, name in enumerate(dataset.features["action"]["names"])}
-    robot.send_action(action)
-
-    busy_wait(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
-
-print("Disconnecting LeKiwi Client")
-robot.disconnect()
--- a/examples/lekiwi/teleoperate.py
+++ b/examples/lekiwi/teleoperate.py
@@ -1,32 +0,0 @@
-from lerobot.common.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
-from lerobot.common.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop, KeyboardTeleopConfig
-from lerobot.common.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
-
-robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="my_lekiwi")
-
-teleop__arm_config = SO100LeaderConfig(
-    port="/dev/tty.usbmodem58760431551",
-    id="my_awesome_leader_arm",
-)
-
-teleop_keyboard_config = KeyboardTeleopConfig(
-    id="my_laptop_keyboard",
-)
-
-robot = LeKiwiClient(robot_config)
-teleop_arm = SO100Leader(teleop__arm_config)
-telep_keyboard = KeyboardTeleop(teleop_keyboard_config)
-robot.connect()
-teleop_arm.connect()
-telep_keyboard.connect()
-
-while True:
-    observation = robot.get_observation()
-
-    arm_action = teleop_arm.get_action()
-    arm_action = {f"arm_{k}": v for k, v in arm_action.items()}
-
-    keyboard_keys = telep_keyboard.get_action()
-    base_action = robot._from_keyboard_to_base_action(keyboard_keys)
-
-    robot.send_action(arm_action | base_action)
--- a/lerobot/init.py
+++ b/lerobot/init.py
@@ -58,6 +58,7 @@ available_tasks_per_env = {
    ],
    "pusht": ["PushT-v0"],
    "xarm": ["XarmLift-v0"],
+    "dora_aloha_real": ["DoraAloha-v0", "DoraKoch-v0", "DoraReachy2-v0"],
 }
 available_envs = list(available_tasks_per_env.keys())

@@ -85,6 +86,23 @@ available_datasets_per_env = {
        "lerobot/xarm_push_medium_image",
        "lerobot/xarm_push_medium_replay_image",
    ],
+    "dora_aloha_real": [
+        "lerobot/aloha_static_battery",
+        "lerobot/aloha_static_candy",
+        "lerobot/aloha_static_coffee",
+        "lerobot/aloha_static_coffee_new",
+        "lerobot/aloha_static_cups_open",
+        "lerobot/aloha_static_fork_pick_up",
+        "lerobot/aloha_static_pingpong_test",
+        "lerobot/aloha_static_pro_pencil",
+        "lerobot/aloha_static_screw_driver",
+        "lerobot/aloha_static_tape",
+        "lerobot/aloha_static_thread_velcro",
+        "lerobot/aloha_static_towel",
+        "lerobot/aloha_static_vinh_cup",
+        "lerobot/aloha_static_vinh_cup_left",
+        "lerobot/aloha_static_ziploc_slide",
+    ],
 }

 available_real_world_datasets = [
@@ -163,12 +181,17 @@ available_real_world_datasets = [
    "lerobot/usc_cloth_sim",
 ]

-available_datasets = sorted(
-    set(itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets))
+available_datasets = list(
+    itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets)
 )

 # lists all available policies from `lerobot/common/policies`
-available_policies = ["act", "diffusion", "tdmpc", "vqbet"]
+available_policies = [
+    "act",
+    "diffusion",
+    "tdmpc",
+    "vqbet",
+]

 # lists all available robots from `lerobot/common/robot_devices/robots`
 available_robots = [
@@ -176,7 +199,7 @@ available_robots = [
    "koch_bimanual",
    "aloha",
    "so100",
-    "so101",
+    "moss",
 ]

 # lists all available cameras from `lerobot/common/robot_devices/cameras`
@@ -198,6 +221,7 @@ available_policies_per_env = {
    "xarm": ["tdmpc"],
    "koch_real": ["act_koch_real"],
    "aloha_real": ["act_aloha_real"],
+    "dora_aloha_real": ["act_aloha_real"],
 }

 env_task_pairs = [(env, task) for env, tasks in available_tasks_per_env.items() for task in tasks]
--- a/lerobot/calibrate.py
+++ b/lerobot/calibrate.py
@@ -1,84 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Helper to recalibrate your device (robot or teleoperator).
-
-Example:
-
-```shell
-python -m lerobot.calibrate \
-    --teleop.type=so100_leader \
-    --teleop.port=/dev/tty.usbmodem58760431551 \
-    --teleop.id=blue
-```
-"""
-
-import logging
-from dataclasses import asdict, dataclass
-from pprint import pformat
-
-import draccus
-
-from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
-from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
-from lerobot.common.robots import (  # noqa: F401
-    Robot,
-    RobotConfig,
-    koch_follower,
-    lekiwi,
-    make_robot_from_config,
-    so100_follower,
-    so101_follower,
-)
-from lerobot.common.teleoperators import (  # noqa: F401
-    Teleoperator,
-    TeleoperatorConfig,
-    koch_leader,
-    make_teleoperator_from_config,
-    so100_leader,
-    so101_leader,
-)
-from lerobot.common.utils.utils import init_logging
-
-
-@dataclass
-class CalibrateConfig:
-    teleop: TeleoperatorConfig | None = None
-    robot: RobotConfig | None = None
-
-    def __post_init__(self):
-        if bool(self.teleop) == bool(self.robot):
-            raise ValueError("Choose either a teleop or a robot.")
-
-        self.device = self.robot if self.robot else self.teleop
-
-
-@draccus.wrap()
-def calibrate(cfg: CalibrateConfig):
-    init_logging()
-    logging.info(pformat(asdict(cfg)))
-
-    if isinstance(cfg.device, RobotConfig):
-        device = make_robot_from_config(cfg.device)
-    elif isinstance(cfg.device, TeleoperatorConfig):
-        device = make_teleoperator_from_config(cfg.device)
-
-    device.connect(calibrate=False)
-    device.calibrate()
-    device.disconnect()
-
-
-if __name__ == "__main__":
-    calibrate()
--- a/lerobot/common/cameras/init.py
+++ b/lerobot/common/cameras/init.py
@@ -1,17 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .camera import Camera
-from .configs import CameraConfig, ColorMode, Cv2Rotation
-from .utils import make_cameras_from_configs
--- a/lerobot/common/cameras/camera.py
+++ b/lerobot/common/cameras/camera.py
@@ -1,120 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import abc
-from typing import Any, Dict, List
-
-import numpy as np
-
-from .configs import CameraConfig, ColorMode
-
-
-class Camera(abc.ABC):
-    """Base class for camera implementations.
-
-    Defines a standard interface for camera operations across different backends.
-    Subclasses must implement all abstract methods.
-
-    Manages basic camera properties (FPS, resolution) and core operations:
-    - Connection/disconnection
-    - Frame capture (sync/async)
-
-    Attributes:
-        fps (int | None): Configured frames per second
-        width (int | None): Frame width in pixels
-        height (int | None): Frame height in pixels
-
-    Example:
-        class MyCamera(Camera):
-            def __init__(self, config): ...
-            @property
-            def is_connected(self) -> bool: ...
-            def connect(self, warmup=True): ...
-            # Plus other required methods
-    """
-
-    def __init__(self, config: CameraConfig):
-        """Initialize the camera with the given configuration.
-
-        Args:
-            config: Camera configuration containing FPS and resolution.
-        """
-        self.fps: int | None = config.fps
-        self.width: int | None = config.width
-        self.height: int | None = config.height
-
-    @property
-    @abc.abstractmethod
-    def is_connected(self) -> bool:
-        """Check if the camera is currently connected.
-
-        Returns:
-            bool: True if the camera is connected and ready to capture frames,
-                  False otherwise.
-        """
-        pass
-
-    @staticmethod
-    @abc.abstractmethod
-    def find_cameras() -> List[Dict[str, Any]]:
-        """Detects available cameras connected to the system.
-        Returns:
-            List[Dict[str, Any]]: A list of dictionaries,
-            where each dictionary contains information about a detected camera.
-        """
-        pass
-
-    @abc.abstractmethod
-    def connect(self, warmup: bool = True) -> None:
-        """Establish connection to the camera.
-
-        Args:
-            warmup: If True (default), captures a warmup frame before returning. Useful
-                   for cameras that require time to adjust capture settings.
-                   If False, skips the warmup frame.
-        """
-        pass
-
-    @abc.abstractmethod
-    def read(self, color_mode: ColorMode | None = None) -> np.ndarray:
-        """Capture and return a single frame from the camera.
-
-        Args:
-            color_mode: Desired color mode for the output frame. If None,
-                        uses the camera's default color mode.
-
-        Returns:
-            np.ndarray: Captured frame as a numpy array.
-        """
-        pass
-
-    @abc.abstractmethod
-    def async_read(self, timeout_ms: float = ...) -> np.ndarray:
-        """Asynchronously capture and return a single frame from the camera.
-
-        Args:
-            timeout_ms: Maximum time to wait for a frame in milliseconds.
-                        Defaults to implementation-specific timeout.
-
-        Returns:
-            np.ndarray: Captured frame as a numpy array.
-        """
-        pass
-
-    @abc.abstractmethod
-    def disconnect(self) -> None:
-        """Disconnect from the camera and release resources."""
-        pass
--- a/lerobot/common/cameras/configs.py
+++ b/lerobot/common/cameras/configs.py
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import abc
-from dataclasses import dataclass
-from enum import Enum
-
-import draccus
-
-
-class ColorMode(str, Enum):
-    RGB = "rgb"
-    BGR = "bgr"
-
-
-class Cv2Rotation(int, Enum):
-    NO_ROTATION = 0
-    ROTATE_90 = 90
-    ROTATE_180 = 180
-    ROTATE_270 = -90
-
-
-@dataclass(kw_only=True)
-class CameraConfig(draccus.ChoiceRegistry, abc.ABC):
-    fps: int | None = None
-    width: int | None = None
-    height: int | None = None
-
-    @property
-    def type(self) -> str:
-        return self.get_choice_name(self.__class__)
--- a/lerobot/common/cameras/opencv/init.py
+++ b/lerobot/common/cameras/opencv/init.py
@@ -1,16 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .camera_opencv import OpenCVCamera
-from .configuration_opencv import OpenCVCameraConfig
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -1,479 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Provides the OpenCVCamera class for capturing frames from cameras using OpenCV.
-"""
-
-import logging
-import math
-import platform
-import time
-from pathlib import Path
-from threading import Event, Lock, Thread
-from typing import Any, Dict, List
-
-import cv2
-import numpy as np
-
-from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-
-from ..camera import Camera
-from ..utils import get_cv2_backend, get_cv2_rotation
-from .configuration_opencv import ColorMode, OpenCVCameraConfig
-
-# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
-# if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
-# on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
-# When you change the USB port or reboot the computer, the operating system might
-# treat the same cameras as new devices. Thus we select a higher bound to search indices.
-MAX_OPENCV_INDEX = 60
-
-logger = logging.getLogger(__name__)
-
-
-class OpenCVCamera(Camera):
-    """
-    Manages camera interactions using OpenCV for efficient frame recording.
-
-    This class provides a high-level interface to connect to, configure, and read
-    frames from cameras compatible with OpenCV's VideoCapture. It supports both
-    synchronous and asynchronous frame reading.
-
-    An OpenCVCamera instance requires a camera index (e.g., 0) or a device path
-    (e.g., '/dev/video0' on Linux). Camera indices can be unstable across reboots
-    or port changes, especially on Linux. Use the provided utility script to find
-    available camera indices or paths:
-    ```bash
-    python -m lerobot.find_cameras opencv
-    ```
-
-    The camera's default settings (FPS, resolution, color mode) are used unless
-    overridden in the configuration.
-
-    Example:
-        ```python
-        from lerobot.common.cameras.opencv import OpenCVCamera
-        from lerobot.common.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode, Cv2Rotation
-
-        # Basic usage with camera index 0
-        config = OpenCVCameraConfig(index_or_path=0)
-        camera = OpenCVCamera(config)
-        camera.connect()
-
-        # Read 1 frame synchronously
-        color_image = camera.read()
-        print(color_image.shape)
-
-        # Read 1 frame asynchronously
-        async_image = camera.async_read()
-
-        # When done, properly disconnect the camera using
-        camera.disconnect()
-
-        # Example with custom settings
-        custom_config = OpenCVCameraConfig(
-            index_or_path='/dev/video0', # Or use an index
-            fps=30,
-            width=1280,
-            height=720,
-            color_mode=ColorMode.RGB,
-            rotation=Cv2Rotation.ROTATE_90
-        )
-        custom_camera = OpenCVCamera(custom_config)
-        # ... connect, read, disconnect ...
-        ```
-    """
-
-    def __init__(self, config: OpenCVCameraConfig):
-        """
-        Initializes the OpenCVCamera instance.
-
-        Args:
-            config: The configuration settings for the camera.
-        """
-        super().__init__(config)
-
-        self.config = config
-        self.index_or_path = config.index_or_path
-
-        self.fps = config.fps
-        self.color_mode = config.color_mode
-        self.warmup_s = config.warmup_s
-
-        self.videocapture: cv2.VideoCapture | None = None
-
-        self.thread: Thread | None = None
-        self.stop_event: Event | None = None
-        self.frame_lock: Lock = Lock()
-        self.latest_frame: np.ndarray | None = None
-        self.new_frame_event: Event = Event()
-
-        self.rotation: int | None = get_cv2_rotation(config.rotation)
-        self.backend: int = get_cv2_backend()
-
-        if self.height and self.width:
-            self.capture_width, self.capture_height = self.width, self.height
-            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
-                self.capture_width, self.capture_height = self.height, self.width
-
-    def __str__(self) -> str:
-        return f"{self.__class__.__name__}({self.index_or_path})"
-
-    @property
-    def is_connected(self) -> bool:
-        """Checks if the camera is currently connected and opened."""
-        return isinstance(self.videocapture, cv2.VideoCapture) and self.videocapture.isOpened()
-
-    def connect(self, warmup: bool = True):
-        """
-        Connects to the OpenCV camera specified in the configuration.
-
-        Initializes the OpenCV VideoCapture object, sets desired camera properties
-        (FPS, width, height), and performs initial checks.
-
-        Raises:
-            DeviceAlreadyConnectedError: If the camera is already connected.
-            ConnectionError: If the specified camera index/path is not found or the camera is found but fails to open.
-            RuntimeError: If the camera opens but fails to apply requested FPS/resolution settings.
-        """
-        if self.is_connected:
-            raise DeviceAlreadyConnectedError(f"{self} is already connected.")
-
-        # Use 1 thread for OpenCV operations to avoid potential conflicts or
-        # blocking in multi-threaded applications, especially during data collection.
-        cv2.setNumThreads(1)
-
-        self.videocapture = cv2.VideoCapture(self.index_or_path, self.backend)
-
-        if not self.videocapture.isOpened():
-            self.videocapture.release()
-            self.videocapture = None
-            raise ConnectionError(
-                f"Failed to open {self}."
-                f"Run `python -m lerobot.find_cameras opencv` to find available cameras."
-            )
-
-        self._configure_capture_settings()
-
-        if warmup:
-            start_time = time.time()
-            while time.time() - start_time < self.warmup_s:
-                self.read()
-                time.sleep(0.1)
-
-        logger.info(f"{self} connected.")
-
-    def _configure_capture_settings(self) -> None:
-        """
-        Applies the specified FPS, width, and height settings to the connected camera.
-
-        This method attempts to set the camera properties via OpenCV. It checks if
-        the camera successfully applied the settings and raises an error if not.
-
-        Args:
-            fps: The desired frames per second. If None, the setting is skipped.
-            width: The desired capture width. If None, the setting is skipped.
-            height: The desired capture height. If None, the setting is skipped.
-
-        Raises:
-            RuntimeError: If the camera fails to set any of the specified properties
-                          to the requested value.
-            DeviceNotConnectedError: If the camera is not connected when attempting
-                                     to configure settings.
-        """
-        if not self.is_connected:
-            raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")
-
-        if self.fps is None:
-            self.fps = self.videocapture.get(cv2.CAP_PROP_FPS)
-        else:
-            self._validate_fps()
-
-        default_width = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)))
-        default_height = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
-
-        if self.width is None or self.height is None:
-            self.width, self.height = default_width, default_height
-            self.capture_width, self.capture_height = default_width, default_height
-            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
-                self.width, self.height = default_height, default_width
-                self.capture_width, self.capture_height = default_width, default_height
-        else:
-            self._validate_width_and_height()
-
-    def _validate_fps(self) -> None:
-        """Validates and sets the camera's frames per second (FPS)."""
-
-        success = self.videocapture.set(cv2.CAP_PROP_FPS, float(self.fps))
-        actual_fps = self.videocapture.get(cv2.CAP_PROP_FPS)
-        # Use math.isclose for robust float comparison
-        if not success or not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
-            raise RuntimeError(f"{self} failed to set fps={self.fps} ({actual_fps=}).")
-
-    def _validate_width_and_height(self) -> None:
-        """Validates and sets the camera's frame capture width and height."""
-
-        success = self.videocapture.set(cv2.CAP_PROP_FRAME_WIDTH, float(self.capture_width))
-        actual_width = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)))
-        if not success or self.capture_width != actual_width:
-            raise RuntimeError(f"{self} failed to set capture_width={self.capture_width} ({actual_width=}).")
-
-        success = self.videocapture.set(cv2.CAP_PROP_FRAME_HEIGHT, float(self.capture_height))
-        actual_height = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
-        if not success or self.capture_height != actual_height:
-            raise RuntimeError(
-                f"{self} failed to set capture_height={self.capture_height} ({actual_height})."
-            )
-
-    @staticmethod
-    def find_cameras() -> List[Dict[str, Any]]:
-        """
-        Detects available OpenCV cameras connected to the system.
-
-        On Linux, it scans '/dev/video*' paths. On other systems (like macOS, Windows),
-        it checks indices from 0 up to `MAX_OPENCV_INDEX`.
-
-        Returns:
-            List[Dict[str, Any]]: A list of dictionaries,
-            where each dictionary contains 'type', 'id' (port index or path),
-            and the default profile properties (width, height, fps, format).
-        """
-        found_cameras_info = []
-
-        if platform.system() == "Linux":
-            possible_paths = sorted(Path("/dev").glob("video*"), key=lambda p: p.name)
-            targets_to_scan = [str(p) for p in possible_paths]
-        else:
-            targets_to_scan = list(range(MAX_OPENCV_INDEX))
-
-        for target in targets_to_scan:
-            camera = cv2.VideoCapture(target)
-            if camera.isOpened():
-                default_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH))
-                default_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
-                default_fps = camera.get(cv2.CAP_PROP_FPS)
-                default_format = camera.get(cv2.CAP_PROP_FORMAT)
-                camera_info = {
-                    "name": f"OpenCV Camera @ {target}",
-                    "type": "OpenCV",
-                    "id": target,
-                    "backend_api": camera.getBackendName(),
-                    "default_stream_profile": {
-                        "format": default_format,
-                        "width": default_width,
-                        "height": default_height,
-                        "fps": default_fps,
-                    },
-                }
-
-                found_cameras_info.append(camera_info)
-                camera.release()
-
-        return found_cameras_info
-
-    def read(self, color_mode: ColorMode | None = None) -> np.ndarray:
-        """
-        Reads a single frame synchronously from the camera.
-
-        This is a blocking call. It waits for the next available frame from the
-        camera hardware via OpenCV.
-
-        Args:
-            color_mode (Optional[ColorMode]): If specified, overrides the default
-                color mode (`self.color_mode`) for this read operation (e.g.,
-                request RGB even if default is BGR).
-
-        Returns:
-            np.ndarray: The captured frame as a NumPy array in the format
-                       (height, width, channels), using the specified or default
-                       color mode and applying any configured rotation.
-
-        Raises:
-            DeviceNotConnectedError: If the camera is not connected.
-            RuntimeError: If reading the frame from the camera fails or if the
-                          received frame dimensions don't match expectations before rotation.
-            ValueError: If an invalid `color_mode` is requested.
-        """
-        if not self.is_connected:
-            raise DeviceNotConnectedError(f"{self} is not connected.")
-
-        start_time = time.perf_counter()
-
-        ret, frame = self.videocapture.read()
-
-        if not ret or frame is None:
-            raise RuntimeError(f"{self} read failed (status={ret}).")
-
-        processed_frame = self._postprocess_image(frame, color_mode)
-
-        read_duration_ms = (time.perf_counter() - start_time) * 1e3
-        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
-
-        return processed_frame
-
-    def _postprocess_image(self, image: np.ndarray, color_mode: ColorMode | None = None) -> np.ndarray:
-        """
-        Applies color conversion, dimension validation, and rotation to a raw frame.
-
-        Args:
-            image (np.ndarray): The raw image frame (expected BGR format from OpenCV).
-            color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
-                                             uses the instance's default `self.color_mode`.
-
-        Returns:
-            np.ndarray: The processed image frame.
-
-        Raises:
-            ValueError: If the requested `color_mode` is invalid.
-            RuntimeError: If the raw frame dimensions do not match the configured
-                          `width` and `height`.
-        """
-        requested_color_mode = self.color_mode if color_mode is None else color_mode
-
-        if requested_color_mode not in (ColorMode.RGB, ColorMode.BGR):
-            raise ValueError(
-                f"Invalid color mode '{requested_color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
-            )
-
-        h, w, c = image.shape
-
-        if h != self.capture_height or w != self.capture_width:
-            raise RuntimeError(
-                f"{self} frame width={w} or height={h} do not match configured width={self.capture_width} or height={self.capture_height}."
-            )
-
-        if c != 3:
-            raise RuntimeError(f"{self} frame channels={c} do not match expected 3 channels (RGB/BGR).")
-
-        processed_image = image
-        if requested_color_mode == ColorMode.RGB:
-            processed_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-
-        if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
-            processed_image = cv2.rotate(processed_image, self.rotation)
-
-        return processed_image
-
-    def _read_loop(self):
-        """
-        Internal loop run by the background thread for asynchronous reading.
-
-        On each iteration:
-        1. Reads a color frame
-        2. Stores result in latest_frame (thread-safe)
-        3. Sets new_frame_event to notify listeners
-
-        Stops on DeviceNotConnectedError, logs other errors and continues.
-        """
-        while not self.stop_event.is_set():
-            try:
-                color_image = self.read()
-
-                with self.frame_lock:
-                    self.latest_frame = color_image
-                self.new_frame_event.set()
-
-            except DeviceNotConnectedError:
-                break
-            except Exception as e:
-                logger.warning(f"Error reading frame in background thread for {self}: {e}")
-
-    def _start_read_thread(self) -> None:
-        """Starts or restarts the background read thread if it's not running."""
-        if self.thread is not None and self.thread.is_alive():
-            self.thread.join(timeout=0.1)
-        if self.stop_event is not None:
-            self.stop_event.set()
-
-        self.stop_event = Event()
-        self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop")
-        self.thread.daemon = True
-        self.thread.start()
-
-    def _stop_read_thread(self) -> None:
-        """Signals the background read thread to stop and waits for it to join."""
-        if self.stop_event is not None:
-            self.stop_event.set()
-
-        if self.thread is not None and self.thread.is_alive():
-            self.thread.join(timeout=2.0)
-
-        self.thread = None
-        self.stop_event = None
-
-    def async_read(self, timeout_ms: float = 200) -> np.ndarray:
-        """
-        Reads the latest available frame asynchronously.
-
-        This method retrieves the most recent frame captured by the background
-        read thread. It does not block waiting for the camera hardware directly,
-        but may wait up to timeout_ms for the background thread to provide a frame.
-
-        Args:
-            timeout_ms (float): Maximum time in milliseconds to wait for a frame
-                to become available. Defaults to 200ms (0.2 seconds).
-
-        Returns:
-            np.ndarray: The latest captured frame as a NumPy array in the format
-                       (height, width, channels), processed according to configuration.
-
-        Raises:
-            DeviceNotConnectedError: If the camera is not connected.
-            TimeoutError: If no frame becomes available within the specified timeout.
-            RuntimeError: If an unexpected error occurs.
-        """
-        if not self.is_connected:
-            raise DeviceNotConnectedError(f"{self} is not connected.")
-
-        if self.thread is None or not self.thread.is_alive():
-            self._start_read_thread()
-
-        if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
-            thread_alive = self.thread is not None and self.thread.is_alive()
-            raise TimeoutError(
-                f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. "
-                f"Read thread alive: {thread_alive}."
-            )
-
-        with self.frame_lock:
-            frame = self.latest_frame
-            self.new_frame_event.clear()
-
-        if frame is None:
-            raise RuntimeError(f"Internal error: Event set but no frame available for {self}.")
-
-        return frame
-
-    def disconnect(self):
-        """
-        Disconnects from the camera and cleans up resources.
-
-        Stops the background read thread (if running) and releases the OpenCV
-        VideoCapture object.
-
-        Raises:
-            DeviceNotConnectedError: If the camera is already disconnected.
-        """
-        if not self.is_connected and self.thread is None:
-            raise DeviceNotConnectedError(f"{self} not connected.")
-
-        if self.thread is not None:
-            self._stop_read_thread()
-
-        if self.videocapture is not None:
-            self.videocapture.release()
-            self.videocapture = None
-
-        logger.info(f"{self} disconnected.")
--- a/lerobot/common/cameras/opencv/configuration_opencv.py
+++ b/lerobot/common/cameras/opencv/configuration_opencv.py
@@ -1,73 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dataclasses import dataclass
-from pathlib import Path
-
-from ..configs import CameraConfig, ColorMode, Cv2Rotation
-
-
-@CameraConfig.register_subclass("opencv")
-@dataclass
-class OpenCVCameraConfig(CameraConfig):
-    """Configuration class for OpenCV-based camera devices or video files.
-
-    This class provides configuration options for cameras accessed through OpenCV,
-    supporting both physical camera devices and video files. It includes settings
-    for resolution, frame rate, color mode, and image rotation.
-
-    Example configurations:
-    ```python
-    # Basic configurations
-    OpenCVCameraConfig(0, 30, 1280, 720)   # 1280x720 @ 30FPS
-    OpenCVCameraConfig(/dev/video4, 60, 640, 480)   # 640x480 @ 60FPS
-
-    # Advanced configurations
-    OpenCVCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
-    ```
-
-    Attributes:
-        index_or_path: Either an integer representing the camera device index,
-                      or a Path object pointing to a video file.
-        fps: Requested frames per second for the color stream.
-        width: Requested frame width in pixels for the color stream.
-        height: Requested frame height in pixels for the color stream.
-        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
-        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
-        warmup_s: Time reading frames before returning from connect (in seconds)
-
-    Note:
-        - Only 3-channel color output (RGB/BGR) is currently supported.
-    """
-
-    index_or_path: int | Path
-    color_mode: ColorMode = ColorMode.RGB
-    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
-    warmup_s: int = 1
-
-    def __post_init__(self):
-        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
-            raise ValueError(
-                f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
-            )
-
-        if self.rotation not in (
-            Cv2Rotation.NO_ROTATION,
-            Cv2Rotation.ROTATE_90,
-            Cv2Rotation.ROTATE_180,
-            Cv2Rotation.ROTATE_270,
-        ):
-            raise ValueError(
-                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
-            )
--- a/lerobot/common/cameras/realsense/init.py
+++ b/lerobot/common/cameras/realsense/init.py
@@ -1,16 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .camera_realsense import RealSenseCamera
-from .configuration_realsense import RealSenseCameraConfig
--- a/lerobot/common/cameras/realsense/camera_realsense.py
+++ b/lerobot/common/cameras/realsense/camera_realsense.py
@@ -1,556 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Provides the RealSenseCamera class for capturing frames from Intel RealSense cameras.
-"""
-
-import logging
-import time
-from threading import Event, Lock, Thread
-from typing import Any, Dict, List
-
-import cv2
-import numpy as np
-
-try:
-    import pyrealsense2 as rs
-except Exception as e:
-    logging.info(f"Could not import realsense: {e}")
-
-from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-
-from ..camera import Camera
-from ..configs import ColorMode
-from ..utils import get_cv2_rotation
-from .configuration_realsense import RealSenseCameraConfig
-
-logger = logging.getLogger(__name__)
-
-
-class RealSenseCamera(Camera):
-    """
-    Manages interactions with Intel RealSense cameras for frame and depth recording.
-
-    This class provides an interface similar to `OpenCVCamera` but tailored for
-    RealSense devices, leveraging the `pyrealsense2` library. It uses the camera's
-    unique serial number for identification, offering more stability than device
-    indices, especially on Linux. It also supports capturing depth maps alongside
-    color frames.
-
-    Use the provided utility script to find available camera indices and default profiles:
-    ```bash
-    python -m lerobot.find_cameras realsense
-    ```
-
-    A `RealSenseCamera` instance requires a configuration object specifying the
-    camera's serial number or a unique device name. If using the name, ensure only
-    one camera with that name is connected.
-
-    The camera's default settings (FPS, resolution, color mode) from the stream
-    profile are used unless overridden in the configuration.
-
-    Example:
-        ```python
-        from lerobot.common.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
-        from lerobot.common.cameras import ColorMode, Cv2Rotation
-
-        # Basic usage with serial number
-        config = RealSenseCameraConfig(serial_number_or_name="0123456789") # Replace with actual SN
-        camera = RealSenseCamera(config)
-        camera.connect()
-
-        # Read 1 frame synchronously
-        color_image = camera.read()
-        print(color_image.shape)
-
-        # Read 1 frame asynchronously
-        async_image = camera.async_read()
-
-        # When done, properly disconnect the camera using
-        camera.disconnect()
-
-        # Example with depth capture and custom settings
-        custom_config = RealSenseCameraConfig(
-            serial_number_or_name="0123456789", # Replace with actual SN
-            fps=30,
-            width=1280,
-            height=720,
-            color_mode=ColorMode.BGR, # Request BGR output
-            rotation=Cv2Rotation.NO_ROTATION,
-            use_depth=True
-        )
-        depth_camera = RealSenseCamera(custom_config)
-        depth_camera.connect()
-
-        # Read 1 depth frame
-        depth_map = depth_camera.read_depth()
-
-        # Example using a unique camera name
-        name_config = RealSenseCameraConfig(serial_number_or_name="Intel RealSense D435") # If unique
-        name_camera = RealSenseCamera(name_config)
-        # ... connect, read, disconnect ...
-        ```
-    """
-
-    def __init__(self, config: RealSenseCameraConfig):
-        """
-        Initializes the RealSenseCamera instance.
-
-        Args:
-            config: The configuration settings for the camera.
-        """
-
-        super().__init__(config)
-
-        self.config = config
-
-        if config.serial_number_or_name.isdigit():
-            self.serial_number = config.serial_number_or_name
-        else:
-            self.serial_number = self._find_serial_number_from_name(config.serial_number_or_name)
-
-        self.fps = config.fps
-        self.color_mode = config.color_mode
-        self.use_depth = config.use_depth
-        self.warmup_s = config.warmup_s
-
-        self.rs_pipeline: rs.pipeline | None = None
-        self.rs_profile: rs.pipeline_profile | None = None
-
-        self.thread: Thread | None = None
-        self.stop_event: Event | None = None
-        self.frame_lock: Lock = Lock()
-        self.latest_frame: np.ndarray | None = None
-        self.new_frame_event: Event = Event()
-
-        self.rotation: int | None = get_cv2_rotation(config.rotation)
-
-        if self.height and self.width:
-            self.capture_width, self.capture_height = self.width, self.height
-            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
-                self.capture_width, self.capture_height = self.height, self.width
-
-    def __str__(self) -> str:
-        return f"{self.__class__.__name__}({self.serial_number})"
-
-    @property
-    def is_connected(self) -> bool:
-        """Checks if the camera pipeline is started and streams are active."""
-        return self.rs_pipeline is not None and self.rs_profile is not None
-
-    def connect(self, warmup: bool = True):
-        """
-        Connects to the RealSense camera specified in the configuration.
-
-        Initializes the RealSense pipeline, configures the required streams (color
-        and optionally depth), starts the pipeline, and validates the actual stream settings.
-
-        Raises:
-            DeviceAlreadyConnectedError: If the camera is already connected.
-            ValueError: If the configuration is invalid (e.g., missing serial/name, name not unique).
-            ConnectionError: If the camera is found but fails to start the pipeline or no RealSense devices are detected at all.
-            RuntimeError: If the pipeline starts but fails to apply requested settings.
-        """
-        if self.is_connected:
-            raise DeviceAlreadyConnectedError(f"{self} is already connected.")
-
-        self.rs_pipeline = rs.pipeline()
-        rs_config = rs.config()
-        self._configure_rs_pipeline_config(rs_config)
-
-        try:
-            self.rs_profile = self.rs_pipeline.start(rs_config)
-        except RuntimeError as e:
-            self.rs_profile = None
-            self.rs_pipeline = None
-            raise ConnectionError(
-                f"Failed to open {self}."
-                "Run `python -m lerobot.find_cameras realsense` to find available cameras."
-            ) from e
-
-        self._configure_capture_settings()
-
-        if warmup:
-            time.sleep(
-                1
-            )  # NOTE(Steven): RS cameras need a bit of time to warm up before the first read. If we don't wait, the first read from the warmup will raise.
-            start_time = time.time()
-            while time.time() - start_time < self.warmup_s:
-                self.read()
-                time.sleep(0.1)
-
-        logger.info(f"{self} connected.")
-
-    @staticmethod
-    def find_cameras() -> List[Dict[str, Any]]:
-        """
-        Detects available Intel RealSense cameras connected to the system.
-
-        Returns:
-            List[Dict[str, Any]]: A list of dictionaries,
-            where each dictionary contains 'type', 'id' (serial number), 'name',
-            firmware version, USB type, and other available specs, and the default profile properties (width, height, fps, format).
-
-        Raises:
-            OSError: If pyrealsense2 is not installed.
-            ImportError: If pyrealsense2 is not installed.
-        """
-        found_cameras_info = []
-        context = rs.context()
-        devices = context.query_devices()
-
-        for device in devices:
-            camera_info = {
-                "name": device.get_info(rs.camera_info.name),
-                "type": "RealSense",
-                "id": device.get_info(rs.camera_info.serial_number),
-                "firmware_version": device.get_info(rs.camera_info.firmware_version),
-                "usb_type_descriptor": device.get_info(rs.camera_info.usb_type_descriptor),
-                "physical_port": device.get_info(rs.camera_info.physical_port),
-                "product_id": device.get_info(rs.camera_info.product_id),
-                "product_line": device.get_info(rs.camera_info.product_line),
-            }
-
-            # Get stream profiles for each sensor
-            sensors = device.query_sensors()
-            for sensor in sensors:
-                profiles = sensor.get_stream_profiles()
-
-                for profile in profiles:
-                    if profile.is_video_stream_profile() and profile.is_default():
-                        vprofile = profile.as_video_stream_profile()
-                        stream_info = {
-                            "stream_type": vprofile.stream_name(),
-                            "format": vprofile.format().name,
-                            "width": vprofile.width(),
-                            "height": vprofile.height(),
-                            "fps": vprofile.fps(),
-                        }
-                        camera_info["default_stream_profile"] = stream_info
-
-            found_cameras_info.append(camera_info)
-
-        return found_cameras_info
-
-    def _find_serial_number_from_name(self, name: str) -> str:
-        """Finds the serial number for a given unique camera name."""
-        camera_infos = self.find_cameras()
-        found_devices = [cam for cam in camera_infos if str(cam["name"]) == name]
-
-        if not found_devices:
-            available_names = [cam["name"] for cam in camera_infos]
-            raise ValueError(
-                f"No RealSense camera found with name '{name}'. Available camera names: {available_names}"
-            )
-
-        if len(found_devices) > 1:
-            serial_numbers = [dev["serial_number"] for dev in found_devices]
-            raise ValueError(
-                f"Multiple RealSense cameras found with name '{name}'. "
-                f"Please use a unique serial number instead. Found SNs: {serial_numbers}"
-            )
-
-        serial_number = str(found_devices[0]["serial_number"])
-        return serial_number
-
-    def _configure_rs_pipeline_config(self, rs_config):
-        """Creates and configures the RealSense pipeline configuration object."""
-        rs.config.enable_device(rs_config, self.serial_number)
-
-        if self.width and self.height and self.fps:
-            rs_config.enable_stream(
-                rs.stream.color, self.capture_width, self.capture_height, rs.format.rgb8, self.fps
-            )
-            if self.use_depth:
-                rs_config.enable_stream(
-                    rs.stream.depth, self.capture_width, self.capture_height, rs.format.z16, self.fps
-                )
-        else:
-            rs_config.enable_stream(rs.stream.color)
-            if self.use_depth:
-                rs_config.enable_stream(rs.stream.depth)
-
-    def _configure_capture_settings(self) -> None:
-        """Sets fps, width, and height from device stream if not already configured.
-
-        Uses the color stream profile to update unset attributes. Handles rotation by
-        swapping width/height when needed. Original capture dimensions are always stored.
-
-        Raises:
-            DeviceNotConnectedError: If device is not connected.
-        """
-        if not self.is_connected:
-            raise DeviceNotConnectedError(f"Cannot validate settings for {self} as it is not connected.")
-
-        stream = self.rs_profile.get_stream(rs.stream.color).as_video_stream_profile()
-
-        if self.fps is None:
-            self.fps = stream.fps()
-
-        if self.width is None or self.height is None:
-            actual_width = int(round(stream.width()))
-            actual_height = int(round(stream.height()))
-            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
-                self.width, self.height = actual_height, actual_width
-                self.capture_width, self.capture_height = actual_width, actual_height
-            else:
-                self.width, self.height = actual_width, actual_height
-                self.capture_width, self.capture_height = actual_width, actual_height
-
-    def read_depth(self, timeout_ms: int = 200) -> np.ndarray:
-        """
-        Reads a single frame (depth) synchronously from the camera.
-
-        This is a blocking call. It waits for a coherent set of frames (depth)
-        from the camera hardware via the RealSense pipeline.
-
-        Args:
-            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.
-
-        Returns:
-            np.ndarray: The depth map as a NumPy array (height, width)
-                  of type `np.uint16` (raw depth values in millimeters) and rotation.
-
-        Raises:
-            DeviceNotConnectedError: If the camera is not connected.
-            RuntimeError: If reading frames from the pipeline fails or frames are invalid.
-        """
-
-        if not self.is_connected:
-            raise DeviceNotConnectedError(f"{self} is not connected.")
-        if not self.use_depth:
-            raise RuntimeError(
-                f"Failed to capture depth frame '.read_depth()'. Depth stream is not enabled for {self}."
-            )
-
-        start_time = time.perf_counter()
-
-        ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms)
-
-        if not ret or frame is None:
-            raise RuntimeError(f"{self} read_depth failed (status={ret}).")
-
-        depth_frame = frame.get_depth_frame()
-        depth_map = np.asanyarray(depth_frame.get_data())
-
-        depth_map_processed = self._postprocess_image(depth_map, depth_frame=True)
-
-        read_duration_ms = (time.perf_counter() - start_time) * 1e3
-        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
-
-        return depth_map_processed
-
-    def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 200) -> np.ndarray:
-        """
-        Reads a single frame (color) synchronously from the camera.
-
-        This is a blocking call. It waits for a coherent set of frames (color)
-        from the camera hardware via the RealSense pipeline.
-
-        Args:
-            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.
-
-        Returns:
-            np.ndarray: The captured color frame as a NumPy array
-              (height, width, channels), processed according to `color_mode` and rotation.
-
-        Raises:
-            DeviceNotConnectedError: If the camera is not connected.
-            RuntimeError: If reading frames from the pipeline fails or frames are invalid.
-            ValueError: If an invalid `color_mode` is requested.
-        """
-
-        if not self.is_connected:
-            raise DeviceNotConnectedError(f"{self} is not connected.")
-
-        start_time = time.perf_counter()
-
-        ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms)
-
-        if not ret or frame is None:
-            raise RuntimeError(f"{self} read failed (status={ret}).")
-
-        color_frame = frame.get_color_frame()
-        color_image_raw = np.asanyarray(color_frame.get_data())
-
-        color_image_processed = self._postprocess_image(color_image_raw, color_mode)
-
-        read_duration_ms = (time.perf_counter() - start_time) * 1e3
-        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
-
-        return color_image_processed
-
-    def _postprocess_image(
-        self, image: np.ndarray, color_mode: ColorMode | None = None, depth_frame: bool = False
-    ) -> np.ndarray:
-        """
-        Applies color conversion, dimension validation, and rotation to a raw color frame.
-
-        Args:
-            image (np.ndarray): The raw image frame (expected RGB format from RealSense).
-            color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
-                                             uses the instance's default `self.color_mode`.
-
-        Returns:
-            np.ndarray: The processed image frame according to `self.color_mode` and `self.rotation`.
-
-        Raises:
-            ValueError: If the requested `color_mode` is invalid.
-            RuntimeError: If the raw frame dimensions do not match the configured
-                          `width` and `height`.
-        """
-
-        if color_mode and color_mode not in (ColorMode.RGB, ColorMode.BGR):
-            raise ValueError(
-                f"Invalid requested color mode '{color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
-            )
-
-        if depth_frame:
-            h, w = image.shape
-        else:
-            h, w, c = image.shape
-
-            if c != 3:
-                raise RuntimeError(f"{self} frame channels={c} do not match expected 3 channels (RGB/BGR).")
-
-        if h != self.capture_height or w != self.capture_width:
-            raise RuntimeError(
-                f"{self} frame width={w} or height={h} do not match configured width={self.capture_width} or height={self.capture_height}."
-            )
-
-        processed_image = image
-        if self.color_mode == ColorMode.BGR:
-            processed_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-
-        if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
-            processed_image = cv2.rotate(processed_image, self.rotation)
-
-        return processed_image
-
-    def _read_loop(self):
-        """
-        Internal loop run by the background thread for asynchronous reading.
-
-        On each iteration:
-        1. Reads a color frame with 500ms timeout
-        2. Stores result in latest_frame (thread-safe)
-        3. Sets new_frame_event to notify listeners
-
-        Stops on DeviceNotConnectedError, logs other errors and continues.
-        """
-        while not self.stop_event.is_set():
-            try:
-                color_image = self.read(timeout_ms=500)
-
-                with self.frame_lock:
-                    self.latest_frame = color_image
-                self.new_frame_event.set()
-
-            except DeviceNotConnectedError:
-                break
-            except Exception as e:
-                logger.warning(f"Error reading frame in background thread for {self}: {e}")
-
-    def _start_read_thread(self) -> None:
-        """Starts or restarts the background read thread if it's not running."""
-        if self.thread is not None and self.thread.is_alive():
-            self.thread.join(timeout=0.1)
-        if self.stop_event is not None:
-            self.stop_event.set()
-
-        self.stop_event = Event()
-        self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop")
-        self.thread.daemon = True
-        self.thread.start()
-
-    def _stop_read_thread(self):
-        """Signals the background read thread to stop and waits for it to join."""
-        if self.stop_event is not None:
-            self.stop_event.set()
-
-        if self.thread is not None and self.thread.is_alive():
-            self.thread.join(timeout=2.0)
-
-        self.thread = None
-        self.stop_event = None
-
-    # NOTE(Steven): Missing implementation for depth for now
-    def async_read(self, timeout_ms: float = 200) -> np.ndarray:
-        """
-        Reads the latest available frame data (color) asynchronously.
-
-        This method retrieves the most recent color frame captured by the background
-        read thread. It does not block waiting for the camera hardware directly,
-        but may wait up to timeout_ms for the background thread to provide a frame.
-
-        Args:
-            timeout_ms (float): Maximum time in milliseconds to wait for a frame
-                to become available. Defaults to 200ms (0.2 seconds).
-
-        Returns:
-            np.ndarray:
-            The latest captured frame data (color image), processed according to configuration.
-
-        Raises:
-            DeviceNotConnectedError: If the camera is not connected.
-            TimeoutError: If no frame data becomes available within the specified timeout.
-            RuntimeError: If the background thread died unexpectedly or another error occurs.
-        """
-        if not self.is_connected:
-            raise DeviceNotConnectedError(f"{self} is not connected.")
-
-        if self.thread is None or not self.thread.is_alive():
-            self._start_read_thread()
-
-        if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
-            thread_alive = self.thread is not None and self.thread.is_alive()
-            raise TimeoutError(
-                f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. "
-                f"Read thread alive: {thread_alive}."
-            )
-
-        with self.frame_lock:
-            frame = self.latest_frame
-            self.new_frame_event.clear()
-
-        if frame is None:
-            raise RuntimeError(f"Internal error: Event set but no frame available for {self}.")
-
-        return frame
-
-    def disconnect(self):
-        """
-        Disconnects from the camera, stops the pipeline, and cleans up resources.
-
-        Stops the background read thread (if running) and stops the RealSense pipeline.
-
-        Raises:
-            DeviceNotConnectedError: If the camera is already disconnected (pipeline not running).
-        """
-
-        if not self.is_connected and self.thread is None:
-            raise DeviceNotConnectedError(
-                f"Attempted to disconnect {self}, but it appears already disconnected."
-            )
-
-        if self.thread is not None:
-            self._stop_read_thread()
-
-        if self.rs_pipeline is not None:
-            self.rs_pipeline.stop()
-            self.rs_pipeline = None
-            self.rs_profile = None
-
-        logger.info(f"{self} disconnected.")
--- a/lerobot/common/cameras/realsense/configuration_realsense.py
+++ b/lerobot/common/cameras/realsense/configuration_realsense.py
@@ -1,82 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dataclasses import dataclass
-
-from ..configs import CameraConfig, ColorMode, Cv2Rotation
-
-
-@CameraConfig.register_subclass("intelrealsense")
-@dataclass
-class RealSenseCameraConfig(CameraConfig):
-    """Configuration class for Intel RealSense cameras.
-
-    This class provides specialized configuration options for Intel RealSense cameras,
-    including support for depth sensing and device identification via serial number or name.
-
-    Example configurations for Intel RealSense D405:
-    ```python
-    # Basic configurations
-    RealSenseCameraConfig("0123456789", 30, 1280, 720)   # 1280x720 @ 30FPS
-    RealSenseCameraConfig("0123456789", 60, 640, 480)   # 640x480 @ 60FPS
-
-    # Advanced configurations
-    RealSenseCameraConfig("0123456789", 30, 640, 480, use_depth=True)  # With depth sensing
-    RealSenseCameraConfig("0123456789", 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
-    ```
-
-    Attributes:
-        fps: Requested frames per second for the color stream.
-        width: Requested frame width in pixels for the color stream.
-        height: Requested frame height in pixels for the color stream.
-        serial_number_or_name: Unique serial number or human-readable name to identify the camera.
-        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
-        use_depth: Whether to enable depth stream. Defaults to False.
-        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
-        warmup_s: Time reading frames before returning from connect (in seconds)
-
-    Note:
-        - Either name or serial_number must be specified.
-        - Depth stream configuration (if enabled) will use the same FPS as the color stream.
-        - The actual resolution and FPS may be adjusted by the camera to the nearest supported mode.
-        - For `fps`, `width` and `height`, either all of them need to be set, or none of them.
-    """
-
-    serial_number_or_name: str
-    color_mode: ColorMode = ColorMode.RGB
-    use_depth: bool = False
-    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
-    warmup_s: int = 1
-
-    def __post_init__(self):
-        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
-            raise ValueError(
-                f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
-            )
-
-        if self.rotation not in (
-            Cv2Rotation.NO_ROTATION,
-            Cv2Rotation.ROTATE_90,
-            Cv2Rotation.ROTATE_180,
-            Cv2Rotation.ROTATE_270,
-        ):
-            raise ValueError(
-                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
-            )
-
-        values = (self.fps, self.width, self.height)
-        if any(v is not None for v in values) and any(v is None for v in values):
-            raise ValueError(
-                "For `fps`, `width` and `height`, either all of them need to be set, or none of them."
-            )
--- a/lerobot/common/cameras/utils.py
+++ b/lerobot/common/cameras/utils.py
@@ -1,65 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import platform
-from pathlib import Path
-from typing import TypeAlias
-
-from .camera import Camera
-from .configs import CameraConfig, Cv2Rotation
-
-IndexOrPath: TypeAlias = int | Path
-
-
-def make_cameras_from_configs(camera_configs: dict[str, CameraConfig]) -> dict[str, Camera]:
-    cameras = {}
-
-    for key, cfg in camera_configs.items():
-        if cfg.type == "opencv":
-            from .opencv import OpenCVCamera
-
-            cameras[key] = OpenCVCamera(cfg)
-
-        elif cfg.type == "intelrealsense":
-            from .realsense.camera_realsense import RealSenseCamera
-
-            cameras[key] = RealSenseCamera(cfg)
-        else:
-            raise ValueError(f"The motor type '{cfg.type}' is not valid.")
-
-    return cameras
-
-
-def get_cv2_rotation(rotation: Cv2Rotation) -> int | None:
-    import cv2
-
-    if rotation == Cv2Rotation.ROTATE_90:
-        return cv2.ROTATE_90_CLOCKWISE
-    elif rotation == Cv2Rotation.ROTATE_180:
-        return cv2.ROTATE_180
-    elif rotation == Cv2Rotation.ROTATE_270:
-        return cv2.ROTATE_90_COUNTERCLOCKWISE
-    else:
-        return None
-
-
-def get_cv2_backend() -> int:
-    import cv2
-
-    if platform.system() == "Windows":
-        return cv2.CAP_AVFOUNDATION
-    else:
-        return cv2.CAP_ANY
--- a/lerobot/common/constants.py
+++ b/lerobot/common/constants.py
@@ -1,52 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# keys
-import os
-from pathlib import Path
-
-from huggingface_hub.constants import HF_HOME
-
-OBS_ENV_STATE = "observation.environment_state"
-OBS_STATE = "observation.state"
-OBS_IMAGE = "observation.image"
-OBS_IMAGES = "observation.images"
-ACTION = "action"
-
-ROBOTS = "robots"
-TELEOPERATORS = "teleoperators"
-
-# files & directories
-CHECKPOINTS_DIR = "checkpoints"
-LAST_CHECKPOINT_LINK = "last"
-PRETRAINED_MODEL_DIR = "pretrained_model"
-TRAINING_STATE_DIR = "training_state"
-RNG_STATE = "rng_state.safetensors"
-TRAINING_STEP = "training_step.json"
-OPTIMIZER_STATE = "optimizer_state.safetensors"
-OPTIMIZER_PARAM_GROUPS = "optimizer_param_groups.json"
-SCHEDULER_STATE = "scheduler_state.json"
-
-if "LEROBOT_HOME" in os.environ:
-    raise ValueError(
-        f"You have a 'LEROBOT_HOME' environment variable set to '{os.getenv('LEROBOT_HOME')}'.\n"
-        "'LEROBOT_HOME' is deprecated, please use 'HF_LEROBOT_HOME' instead."
-    )
-
-# cache dir
-default_cache_path = Path(HF_HOME) / "lerobot"
-HF_LEROBOT_HOME = Path(os.getenv("HF_LEROBOT_HOME", default_cache_path)).expanduser()
-
-# calibration dir
-default_calibration_path = HF_LEROBOT_HOME / "calibration"
-HF_LEROBOT_CALIBRATION = Path(os.getenv("HF_LEROBOT_CALIBRATION", default_calibration_path)).expanduser()
--- a/lerobot/common/datasets/backward_compatibility.py
+++ b/lerobot/common/datasets/backward_compatibility.py
@@ -1,68 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import packaging.version
-
-V2_MESSAGE = """
-The dataset you requested ({repo_id}) is in {version} format.
-
-We introduced a new format since v2.0 which is not backward compatible with v1.x.
-Please, use our conversion script. Modify the following command with your own task description:
-```
-python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \\
-    --repo-id {repo_id} \\
-    --single-task "TASK DESCRIPTION."  # <---- /!\\ Replace TASK DESCRIPTION /!\\
-```
-
-A few examples to replace TASK DESCRIPTION: "Pick up the blue cube and place it into the bin.", "Insert the
-peg into the socket.", "Slide open the ziploc bag.", "Take the elevator to the 1st floor.", "Open the top
-cabinet, store the pot inside it then close the cabinet.", "Push the T-shaped block onto the T-shaped
-target.", "Grab the spray paint on the shelf and place it in the bin on top of the robot dog.", "Fold the
-sweatshirt.", ...
-
-If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
-or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose).
-"""
-
-V21_MESSAGE = """
-The dataset you requested ({repo_id}) is in {version} format.
-While current version of LeRobot is backward-compatible with it, the version of your dataset still uses global
-stats instead of per-episode stats. Update your dataset stats to the new format using this command:
-```
-python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id={repo_id}
-```
-
-If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
-or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose).
-"""
-
-FUTURE_MESSAGE = """
-The dataset you requested ({repo_id}) is only available in {version} format.
-As we cannot ensure forward compatibility with it, please update your current version of lerobot.
-"""
-
-
-class CompatibilityError(Exception): ...
-
-
-class BackwardCompatibilityError(CompatibilityError):
-    def __init__(self, repo_id: str, version: packaging.version.Version):
-        message = V2_MESSAGE.format(repo_id=repo_id, version=version)
-        super().__init__(message)
-
-
-class ForwardCompatibilityError(CompatibilityError):
-    def __init__(self, repo_id: str, version: packaging.version.Version):
-        message = FUTURE_MESSAGE.format(repo_id=repo_id, version=version)
-        super().__init__(message)
--- a/lerobot/common/datasets/card_template.md
+++ b/lerobot/common/datasets/card_template.md
@@ -1,27 +0,0 @@
---
-# For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
-# Doc / guide: https://huggingface.co/docs/hub/datasets-cards
-{{ card_data }}
---
-
-This dataset was created using [LeRobot](https://github.com/huggingface/lerobot).
-
-## Dataset Description
-
-{{ dataset_description | default("", true) }}
-
- **Homepage:** {{ url | default("[More Information Needed]", true)}}
- **Paper:** {{ paper | default("[More Information Needed]", true)}}
- **License:** {{ license | default("[More Information Needed]", true)}}
-
-## Dataset Structure
-
-{{ dataset_structure | default("[More Information Needed]", true)}}
-
-## Citation
-
-**BibTeX:**
-
-```bibtex
-{{ citation_bibtex | default("[More Information Needed]", true)}}
-```
--- a/lerobot/common/datasets/compute_stats.py
+++ b/lerobot/common/datasets/compute_stats.py
@@ -13,164 +13,201 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import numpy as np
+from copy import deepcopy
+from math import ceil

-from lerobot.common.datasets.utils import load_image_as_numpy
+import einops
+import torch
+import tqdm
+from datasets import Image
+
+from lerobot.common.datasets.video_utils import VideoFrame


-def estimate_num_samples(
-    dataset_len: int, min_num_samples: int = 100, max_num_samples: int = 10_000, power: float = 0.75
-) -> int:
-    """Heuristic to estimate the number of samples based on dataset size.
-    The power controls the sample growth relative to dataset size.
-    Lower the power for less number of samples.
+def get_stats_einops_patterns(dataset, num_workers=0):
+    """These einops patterns will be used to aggregate batches and compute statistics.

-    For default arguments, we have:
-    - from 1 to ~500, num_samples=100
-    - at 1000, num_samples=177
-    - at 2000, num_samples=299
-    - at 5000, num_samples=594
-    - at 10000, num_samples=1000
-    - at 20000, num_samples=1681
+    Note: We assume the images are in channel first format
    """
-    if dataset_len < min_num_samples:
-        min_num_samples = dataset_len
-    return max(min_num_samples, min(int(dataset_len**power), max_num_samples))

+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        num_workers=num_workers,
+        batch_size=2,
+        shuffle=False,
+    )
+    batch = next(iter(dataloader))

-def sample_indices(data_len: int) -> list[int]:
-    num_samples = estimate_num_samples(data_len)
-    return np.round(np.linspace(0, data_len - 1, num_samples)).astype(int).tolist()
+    stats_patterns = {}
+    for key, feats_type in dataset.features.items():
+        # NOTE: skip language_instruction embedding in stats computation
+        if key == "language_instruction":
+            continue

+        # sanity check that tensors are not float64
+        assert batch[key].dtype != torch.float64

-def auto_downsample_height_width(img: np.ndarray, target_size: int = 150, max_size_threshold: int = 300):
-    _, height, width = img.shape
+        if isinstance(feats_type, (VideoFrame, Image)):
+            # sanity check that images are channel first
+            _, c, h, w = batch[key].shape
+            assert c < h and c < w, f"expect channel first images, but instead {batch[key].shape}"

-    if max(width, height) < max_size_threshold:
-        # no downsampling needed
-        return img
+            # sanity check that images are float32 in range [0,1]
+            assert batch[key].dtype == torch.float32, f"expect torch.float32, but instead {batch[key].dtype=}"
+            assert batch[key].max() <= 1, f"expect pixels lower than 1, but instead {batch[key].max()=}"
+            assert batch[key].min() >= 0, f"expect pixels greater than 1, but instead {batch[key].min()=}"

-    downsample_factor = int(width / target_size) if width > height else int(height / target_size)
-    return img[:, ::downsample_factor, ::downsample_factor]
-
-
-def sample_images(image_paths: list[str]) -> np.ndarray:
-    sampled_indices = sample_indices(len(image_paths))
-
-    images = None
-    for i, idx in enumerate(sampled_indices):
-        path = image_paths[idx]
-        # we load as uint8 to reduce memory usage
-        img = load_image_as_numpy(path, dtype=np.uint8, channel_first=True)
-        img = auto_downsample_height_width(img)
-
-        if images is None:
-            images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
-
-        images[i] = img
-
-    return images
-
-
-def get_feature_stats(array: np.ndarray, axis: tuple, keepdims: bool) -> dict[str, np.ndarray]:
-    return {
-        "min": np.min(array, axis=axis, keepdims=keepdims),
-        "max": np.max(array, axis=axis, keepdims=keepdims),
-        "mean": np.mean(array, axis=axis, keepdims=keepdims),
-        "std": np.std(array, axis=axis, keepdims=keepdims),
-        "count": np.array([len(array)]),
-    }
-
-
-def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], features: dict) -> dict:
-    ep_stats = {}
-    for key, data in episode_data.items():
-        if features[key]["dtype"] == "string":
-            continue  # HACK: we should receive np.arrays of strings
-        elif features[key]["dtype"] in ["image", "video"]:
-            ep_ft_array = sample_images(data)  # data is a list of image paths
-            axes_to_reduce = (0, 2, 3)  # keep channel dim
-            keepdims = True
+            stats_patterns[key] = "b c h w -> c 1 1"
+        elif batch[key].ndim == 2:
+            stats_patterns[key] = "b c -> c "
+        elif batch[key].ndim == 1:
+            stats_patterns[key] = "b -> 1"
        else:
-            ep_ft_array = data  # data is already a np.ndarray
-            axes_to_reduce = 0  # compute stats over the first axis
-            keepdims = data.ndim == 1  # keep as np.array
+            raise ValueError(f"{key}, {feats_type}, {batch[key].shape}")

-        ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
-
-        # finally, we normalize and remove batch dim for images
-        if features[key]["dtype"] in ["image", "video"]:
-            ep_stats[key] = {
-                k: v if k == "count" else np.squeeze(v / 255.0, axis=0) for k, v in ep_stats[key].items()
-            }
-
-    return ep_stats
+    return stats_patterns


-def _assert_type_and_shape(stats_list: list[dict[str, dict]]):
-    for i in range(len(stats_list)):
-        for fkey in stats_list[i]:
-            for k, v in stats_list[i][fkey].items():
-                if not isinstance(v, np.ndarray):
-                    raise ValueError(
-                        f"Stats must be composed of numpy array, but key '{k}' of feature '{fkey}' is of type '{type(v)}' instead."
-                    )
-                if v.ndim == 0:
-                    raise ValueError("Number of dimensions must be at least 1, and is 0 instead.")
-                if k == "count" and v.shape != (1,):
-                    raise ValueError(f"Shape of 'count' must be (1), but is {v.shape} instead.")
-                if "image" in fkey and k != "count" and v.shape != (3, 1, 1):
-                    raise ValueError(f"Shape of '{k}' must be (3,1,1), but is {v.shape} instead.")
+def compute_stats(dataset, batch_size=8, num_workers=8, max_num_samples=None):
+    """Compute mean/std and min/max statistics of all data keys in a LeRobotDataset."""
+    if max_num_samples is None:
+        max_num_samples = len(dataset)
+
+    # for more info on why we need to set the same number of workers, see `load_from_videos`
+    stats_patterns = get_stats_einops_patterns(dataset, num_workers)
+
+    # mean and std will be computed incrementally while max and min will track the running value.
+    mean, std, max, min = {}, {}, {}, {}
+    for key in stats_patterns:
+        mean[key] = torch.tensor(0.0).float()
+        std[key] = torch.tensor(0.0).float()
+        max[key] = torch.tensor(-float("inf")).float()
+        min[key] = torch.tensor(float("inf")).float()
+
+    def create_seeded_dataloader(dataset, batch_size, seed):
+        generator = torch.Generator()
+        generator.manual_seed(seed)
+        dataloader = torch.utils.data.DataLoader(
+            dataset,
+            num_workers=num_workers,
+            batch_size=batch_size,
+            shuffle=True,
+            drop_last=False,
+            generator=generator,
+        )
+        return dataloader
+
+    # Note: Due to be refactored soon. The point of storing `first_batch` is to make sure we don't get
+    # surprises when rerunning the sampler.
+    first_batch = None
+    running_item_count = 0  # for online mean computation
+    dataloader = create_seeded_dataloader(dataset, batch_size, seed=1337)
+    for i, batch in enumerate(
+        tqdm.tqdm(dataloader, total=ceil(max_num_samples / batch_size), desc="Compute mean, min, max")
+    ):
+        this_batch_size = len(batch["index"])
+        running_item_count += this_batch_size
+        if first_batch is None:
+            first_batch = deepcopy(batch)
+        for key, pattern in stats_patterns.items():
+            batch[key] = batch[key].float()
+            # Numerically stable update step for mean computation.
+            batch_mean = einops.reduce(batch[key], pattern, "mean")
+            # Hint: to update the mean we need x̄ₙ = (Nₙ₋₁x̄ₙ₋₁ + Bₙxₙ) / Nₙ, where the subscript represents
+            # the update step, N is the running item count, B is this batch size, x̄ is the running mean,
+            # and x is the current batch mean. Some rearrangement is then required to avoid risking
+            # numerical overflow. Another hint: Nₙ₋₁ = Nₙ - Bₙ. Rearrangement yields
+            # x̄ₙ = x̄ₙ₋₁ + Bₙ * (xₙ - x̄ₙ₋₁) / Nₙ
+            mean[key] = mean[key] + this_batch_size * (batch_mean - mean[key]) / running_item_count
+            max[key] = torch.maximum(max[key], einops.reduce(batch[key], pattern, "max"))
+            min[key] = torch.minimum(min[key], einops.reduce(batch[key], pattern, "min"))
+
+        if i == ceil(max_num_samples / batch_size) - 1:
+            break
+
+    first_batch_ = None
+    running_item_count = 0  # for online std computation
+    dataloader = create_seeded_dataloader(dataset, batch_size, seed=1337)
+    for i, batch in enumerate(
+        tqdm.tqdm(dataloader, total=ceil(max_num_samples / batch_size), desc="Compute std")
+    ):
+        this_batch_size = len(batch["index"])
+        running_item_count += this_batch_size
+        # Sanity check to make sure the batches are still in the same order as before.
+        if first_batch_ is None:
+            first_batch_ = deepcopy(batch)
+            for key in stats_patterns:
+                assert torch.equal(first_batch_[key], first_batch[key])
+        for key, pattern in stats_patterns.items():
+            batch[key] = batch[key].float()
+            # Numerically stable update step for mean computation (where the mean is over squared
+            # residuals).See notes in the mean computation loop above.
+            batch_std = einops.reduce((batch[key] - mean[key]) ** 2, pattern, "mean")
+            std[key] = std[key] + this_batch_size * (batch_std - std[key]) / running_item_count
+
+        if i == ceil(max_num_samples / batch_size) - 1:
+            break
+
+    for key in stats_patterns:
+        std[key] = torch.sqrt(std[key])
+
+    stats = {}
+    for key in stats_patterns:
+        stats[key] = {
+            "mean": mean[key],
+            "std": std[key],
+            "max": max[key],
+            "min": min[key],
+        }
+    return stats


-def aggregate_feature_stats(stats_ft_list: list[dict[str, dict]]) -> dict[str, dict[str, np.ndarray]]:
-    """Aggregates stats for a single feature."""
-    means = np.stack([s["mean"] for s in stats_ft_list])
-    variances = np.stack([s["std"] ** 2 for s in stats_ft_list])
-    counts = np.stack([s["count"] for s in stats_ft_list])
-    total_count = counts.sum(axis=0)
+def aggregate_stats(ls_datasets) -> dict[str, torch.Tensor]:
+    """Aggregate stats of multiple LeRobot datasets into one set of stats without recomputing from scratch.

-    # Prepare weighted mean by matching number of dimensions
-    while counts.ndim < means.ndim:
-        counts = np.expand_dims(counts, axis=-1)
+    The final stats will have the union of all data keys from each of the datasets.

-    # Compute the weighted mean
-    weighted_means = means * counts
-    total_mean = weighted_means.sum(axis=0) / total_count
-
-    # Compute the variance using the parallel algorithm
-    delta_means = means - total_mean
-    weighted_variances = (variances + delta_means**2) * counts
-    total_variance = weighted_variances.sum(axis=0) / total_count
-
-    return {
-        "min": np.min(np.stack([s["min"] for s in stats_ft_list]), axis=0),
-        "max": np.max(np.stack([s["max"] for s in stats_ft_list]), axis=0),
-        "mean": total_mean,
-        "std": np.sqrt(total_variance),
-        "count": total_count,
-    }
-
-
-def aggregate_stats(stats_list: list[dict[str, dict]]) -> dict[str, dict[str, np.ndarray]]:
-    """Aggregate stats from multiple compute_stats outputs into a single set of stats.
-
-    The final stats will have the union of all data keys from each of the stats dicts.
-
-    For instance:
-    - new_min = min(min_dataset_0, min_dataset_1, ...)
+    The final stats will have the union of all data keys from each of the datasets. For instance:
    - new_max = max(max_dataset_0, max_dataset_1, ...)
-    - new_mean = (mean of all data, weighted by counts)
+    - new_min = min(min_dataset_0, min_dataset_1, ...)
+    - new_mean = (mean of all data)
    - new_std = (std of all data)
    """
-
-    _assert_type_and_shape(stats_list)
-
-    data_keys = {key for stats in stats_list for key in stats}
-    aggregated_stats = {key: {} for key in data_keys}
-
-    for key in data_keys:
-        stats_with_key = [stats[key] for stats in stats_list if key in stats]
-        aggregated_stats[key] = aggregate_feature_stats(stats_with_key)
-
-    return aggregated_stats
+    data_keys = set()
+    for dataset in ls_datasets:
+        data_keys.update(dataset.stats.keys())
+    stats = {k: {} for k in data_keys}
+    for data_key in data_keys:
+        for stat_key in ["min", "max"]:
+            # compute `max(dataset_0["max"], dataset_1["max"], ...)`
+            stats[data_key][stat_key] = einops.reduce(
+                torch.stack([d.stats[data_key][stat_key] for d in ls_datasets if data_key in d.stats], dim=0),
+                "n ... -> ...",
+                stat_key,
+            )
+        total_samples = sum(d.num_samples for d in ls_datasets if data_key in d.stats)
+        # Compute the "sum" statistic by multiplying each mean by the number of samples in the respective
+        # dataset, then divide by total_samples to get the overall "mean".
+        # NOTE: the brackets around (d.num_samples / total_samples) are needed tor minimize the risk of
+        # numerical overflow!
+        stats[data_key]["mean"] = sum(
+            d.stats[data_key]["mean"] * (d.num_samples / total_samples)
+            for d in ls_datasets
+            if data_key in d.stats
+        )
+        # The derivation for standard deviation is a little more involved but is much in the same spirit as
+        # the computation of the mean.
+        # Given two sets of data where the statistics are known:
+        # σ_combined = sqrt[ (n1 * (σ1^2 + d1^2) + n2 * (σ2^2 + d2^2)) / (n1 + n2) ]
+        # where d1 = μ1 - μ_combined, d2 = μ2 - μ_combined
+        # NOTE: the brackets around (d.num_samples / total_samples) are needed tor minimize the risk of
+        # numerical overflow!
+        stats[data_key]["std"] = torch.sqrt(
+            sum(
+                (d.stats[data_key]["std"] ** 2 + (d.stats[data_key]["mean"] - stats[data_key]["mean"]) ** 2)
+                * (d.num_samples / total_samples)
+                for d in ls_datasets
+                if data_key in d.stats
+            )
+        )
+    return stats
--- a/lerobot/common/datasets/factory.py
+++ b/lerobot/common/datasets/factory.py
@@ -14,105 +14,104 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from pprint import pformat

 import torch
+from omegaconf import ListConfig, OmegaConf

-from lerobot.common.datasets.lerobot_dataset import (
-    LeRobotDataset,
-    LeRobotDatasetMetadata,
-    MultiLeRobotDataset,
-)
-from lerobot.common.datasets.transforms import ImageTransforms
-from lerobot.configs.policies import PreTrainedConfig
-from lerobot.configs.train import TrainPipelineConfig
-
-IMAGENET_STATS = {
-    "mean": [[[0.485]], [[0.456]], [[0.406]]],  # (c,1,1)
-    "std": [[[0.229]], [[0.224]], [[0.225]]],  # (c,1,1)
-}
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, MultiLeRobotDataset
+from lerobot.common.datasets.transforms import get_image_transforms


-def resolve_delta_timestamps(
-    cfg: PreTrainedConfig, ds_meta: LeRobotDatasetMetadata
-) -> dict[str, list] | None:
-    """Resolves delta_timestamps by reading from the 'delta_indices' properties of the PreTrainedConfig.
+def resolve_delta_timestamps(cfg):
+    """Resolves delta_timestamps config key (in-place) by using `eval`.

-    Args:
-        cfg (PreTrainedConfig): The PreTrainedConfig to read delta_indices from.
-        ds_meta (LeRobotDatasetMetadata): The dataset from which features and fps are used to build
-            delta_timestamps against.
-
-    Returns:
-        dict[str, list] | None: A dictionary of delta_timestamps, e.g.:
-            {
-                "observation.state": [-0.04, -0.02, 0]
-                "observation.action": [-0.02, 0, 0.02]
-            }
-            returns `None` if the resulting dict is empty.
+    Doesn't do anything if delta_timestamps is not specified or has already been resolve (as evidenced by
+    the data type of its values).
    """
-    delta_timestamps = {}
-    for key in ds_meta.features:
-        if key == "next.reward" and cfg.reward_delta_indices is not None:
-            delta_timestamps[key] = [i / ds_meta.fps for i in cfg.reward_delta_indices]
-        if key == "action" and cfg.action_delta_indices is not None:
-            delta_timestamps[key] = [i / ds_meta.fps for i in cfg.action_delta_indices]
-        if key.startswith("observation.") and cfg.observation_delta_indices is not None:
-            delta_timestamps[key] = [i / ds_meta.fps for i in cfg.observation_delta_indices]
-
-    if len(delta_timestamps) == 0:
-        delta_timestamps = None
-
-    return delta_timestamps
+    delta_timestamps = cfg.training.get("delta_timestamps")
+    if delta_timestamps is not None:
+        for key in delta_timestamps:
+            if isinstance(delta_timestamps[key], str):
+                # TODO(rcadene, alexander-soare): remove `eval` to avoid exploit
+                cfg.training.delta_timestamps[key] = eval(delta_timestamps[key])


-def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDataset:
-    """Handles the logic of setting up delta timestamps and image transforms before creating a dataset.
-
-    Args:
-        cfg (TrainPipelineConfig): A TrainPipelineConfig config which contains a DatasetConfig and a PreTrainedConfig.
-
-    Raises:
-        NotImplementedError: The MultiLeRobotDataset is currently deactivated.
-
-    Returns:
-        LeRobotDataset | MultiLeRobotDataset
+def make_dataset(cfg, split: str = "train") -> LeRobotDataset | MultiLeRobotDataset:
    """
-    image_transforms = (
-        ImageTransforms(cfg.dataset.image_transforms) if cfg.dataset.image_transforms.enable else None
-    )
-
-    if isinstance(cfg.dataset.repo_id, str):
-        ds_meta = LeRobotDatasetMetadata(
-            cfg.dataset.repo_id, root=cfg.dataset.root, revision=cfg.dataset.revision
+    Args:
+        cfg: A Hydra config as per the LeRobot config scheme.
+        split: Select the data subset used to create an instance of LeRobotDataset.
+            All datasets hosted on [lerobot](https://huggingface.co/lerobot) contain only one subset: "train".
+            Thus, by default, `split="train"` selects all the available data. `split` aims to work like the
+            slicer in the hugging face datasets:
+            https://huggingface.co/docs/datasets/v2.19.0/loading#slice-splits
+            As of now, it only supports `split="train[:n]"` to load the first n frames of the dataset or
+            `split="train[n:]"` to load the last n frames. For instance `split="train[:1000]"`.
+    Returns:
+        The LeRobotDataset.
+    """
+    if not isinstance(cfg.dataset_repo_id, (str, ListConfig)):
+        raise ValueError(
+            "Expected cfg.dataset_repo_id to be either a single string to load one dataset or a list of "
+            "strings to load multiple datasets."
        )
-        delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta)
+
+    # A soft check to warn if the environment matches the dataset. Don't check if we are using a real world env (dora).
+    if cfg.env.name != "dora":
+        if isinstance(cfg.dataset_repo_id, str):
+            dataset_repo_ids = [cfg.dataset_repo_id]  # single dataset
+        else:
+            dataset_repo_ids = cfg.dataset_repo_id  # multiple datasets
+
+        for dataset_repo_id in dataset_repo_ids:
+            if cfg.env.name not in dataset_repo_id:
+                logging.warning(
+                    f"There might be a mismatch between your training dataset ({dataset_repo_id=}) and your "
+                    f"environment ({cfg.env.name=})."
+                )
+
+    resolve_delta_timestamps(cfg)
+
+    image_transforms = None
+    if cfg.training.image_transforms.enable:
+        cfg_tf = cfg.training.image_transforms
+        image_transforms = get_image_transforms(
+            brightness_weight=cfg_tf.brightness.weight,
+            brightness_min_max=cfg_tf.brightness.min_max,
+            contrast_weight=cfg_tf.contrast.weight,
+            contrast_min_max=cfg_tf.contrast.min_max,
+            saturation_weight=cfg_tf.saturation.weight,
+            saturation_min_max=cfg_tf.saturation.min_max,
+            hue_weight=cfg_tf.hue.weight,
+            hue_min_max=cfg_tf.hue.min_max,
+            sharpness_weight=cfg_tf.sharpness.weight,
+            sharpness_min_max=cfg_tf.sharpness.min_max,
+            max_num_transforms=cfg_tf.max_num_transforms,
+            random_order=cfg_tf.random_order,
+        )
+
+    if isinstance(cfg.dataset_repo_id, str):
        dataset = LeRobotDataset(
-            cfg.dataset.repo_id,
-            root=cfg.dataset.root,
-            episodes=cfg.dataset.episodes,
-            delta_timestamps=delta_timestamps,
+            cfg.dataset_repo_id,
+            split=split,
+            delta_timestamps=cfg.training.get("delta_timestamps"),
            image_transforms=image_transforms,
-            revision=cfg.dataset.revision,
-            video_backend=cfg.dataset.video_backend,
+            video_backend=cfg.video_backend,
        )
    else:
-        raise NotImplementedError("The MultiLeRobotDataset isn't supported for now.")
        dataset = MultiLeRobotDataset(
-            cfg.dataset.repo_id,
-            # TODO(aliberts): add proper support for multi dataset
-            # delta_timestamps=delta_timestamps,
+            cfg.dataset_repo_id,
+            split=split,
+            delta_timestamps=cfg.training.get("delta_timestamps"),
            image_transforms=image_transforms,
-            video_backend=cfg.dataset.video_backend,
-        )
-        logging.info(
-            "Multiple datasets were provided. Applied the following index mapping to the provided datasets: "
-            f"{pformat(dataset.repo_id_to_index, indent=2)}"
+            video_backend=cfg.video_backend,
        )

-    if cfg.dataset.use_imagenet_stats:
-        for key in dataset.meta.camera_keys:
-            for stats_type, stats in IMAGENET_STATS.items():
-                dataset.meta.stats[key][stats_type] = torch.tensor(stats, dtype=torch.float32)
+    if cfg.get("override_dataset_stats"):
+        for key, stats_dict in cfg.override_dataset_stats.items():
+            for stats_type, listconfig in stats_dict.items():
+                # example of stats_type: min, max, mean, std
+                stats = OmegaConf.to_container(listconfig, resolve=True)
+                dataset.stats[key][stats_type] = torch.tensor(stats, dtype=torch.float32)

    return dataset
--- a/lerobot/common/datasets/image_writer.py
+++ b/lerobot/common/datasets/image_writer.py
@@ -1,178 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import multiprocessing
-import queue
-import threading
-from pathlib import Path
-
-import numpy as np
-import PIL.Image
-import torch
-
-
-def safe_stop_image_writer(func):
-    def wrapper(*args, **kwargs):
-        try:
-            return func(*args, **kwargs)
-        except Exception as e:
-            dataset = kwargs.get("dataset")
-            image_writer = getattr(dataset, "image_writer", None) if dataset else None
-            if image_writer is not None:
-                print("Waiting for image writer to terminate...")
-                image_writer.stop()
-            raise e
-
-    return wrapper
-
-
-def image_array_to_pil_image(image_array: np.ndarray, range_check: bool = True) -> PIL.Image.Image:
-    # TODO(aliberts): handle 1 channel and 4 for depth images
-    if image_array.ndim != 3:
-        raise ValueError(f"The array has {image_array.ndim} dimensions, but 3 is expected for an image.")
-
-    if image_array.shape[0] == 3:
-        # Transpose from pytorch convention (C, H, W) to (H, W, C)
-        image_array = image_array.transpose(1, 2, 0)
-
-    elif image_array.shape[-1] != 3:
-        raise NotImplementedError(
-            f"The image has {image_array.shape[-1]} channels, but 3 is required for now."
-        )
-
-    if image_array.dtype != np.uint8:
-        if range_check:
-            max_ = image_array.max().item()
-            min_ = image_array.min().item()
-            if max_ > 1.0 or min_ < 0.0:
-                raise ValueError(
-                    "The image data type is float, which requires values in the range [0.0, 1.0]. "
-                    f"However, the provided range is [{min_}, {max_}]. Please adjust the range or "
-                    "provide a uint8 image with values in the range [0, 255]."
-                )
-
-        image_array = (image_array * 255).astype(np.uint8)
-
-    return PIL.Image.fromarray(image_array)
-
-
-def write_image(image: np.ndarray | PIL.Image.Image, fpath: Path):
-    try:
-        if isinstance(image, np.ndarray):
-            img = image_array_to_pil_image(image)
-        elif isinstance(image, PIL.Image.Image):
-            img = image
-        else:
-            raise TypeError(f"Unsupported image type: {type(image)}")
-        img.save(fpath)
-    except Exception as e:
-        print(f"Error writing image {fpath}: {e}")
-
-
-def worker_thread_loop(queue: queue.Queue):
-    while True:
-        item = queue.get()
-        if item is None:
-            queue.task_done()
-            break
-        image_array, fpath = item
-        write_image(image_array, fpath)
-        queue.task_done()
-
-
-def worker_process(queue: queue.Queue, num_threads: int):
-    threads = []
-    for _ in range(num_threads):
-        t = threading.Thread(target=worker_thread_loop, args=(queue,))
-        t.daemon = True
-        t.start()
-        threads.append(t)
-    for t in threads:
-        t.join()
-
-
-class AsyncImageWriter:
-    """
-    This class abstract away the initialisation of processes or/and threads to
-    save images on disk asynchronously, which is critical to control a robot and record data
-    at a high frame rate.
-
-    When `num_processes=0`, it creates a threads pool of size `num_threads`.
-    When `num_processes>0`, it creates processes pool of size `num_processes`, where each subprocess starts
-    their own threads pool of size `num_threads`.
-
-    The optimal number of processes and threads depends on your computer capabilities.
-    We advise to use 4 threads per camera with 0 processes. If the fps is not stable, try to increase or lower
-    the number of threads. If it is still not stable, try to use 1 subprocess, or more.
-    """
-
-    def __init__(self, num_processes: int = 0, num_threads: int = 1):
-        self.num_processes = num_processes
-        self.num_threads = num_threads
-        self.queue = None
-        self.threads = []
-        self.processes = []
-        self._stopped = False
-
-        if num_threads <= 0 and num_processes <= 0:
-            raise ValueError("Number of threads and processes must be greater than zero.")
-
-        if self.num_processes == 0:
-            # Use threading
-            self.queue = queue.Queue()
-            for _ in range(self.num_threads):
-                t = threading.Thread(target=worker_thread_loop, args=(self.queue,))
-                t.daemon = True
-                t.start()
-                self.threads.append(t)
-        else:
-            # Use multiprocessing
-            self.queue = multiprocessing.JoinableQueue()
-            for _ in range(self.num_processes):
-                p = multiprocessing.Process(target=worker_process, args=(self.queue, self.num_threads))
-                p.daemon = True
-                p.start()
-                self.processes.append(p)
-
-    def save_image(self, image: torch.Tensor | np.ndarray | PIL.Image.Image, fpath: Path):
-        if isinstance(image, torch.Tensor):
-            # Convert tensor to numpy array to minimize main process time
-            image = image.cpu().numpy()
-        self.queue.put((image, fpath))
-
-    def wait_until_done(self):
-        self.queue.join()
-
-    def stop(self):
-        if self._stopped:
-            return
-
-        if self.num_processes == 0:
-            for _ in self.threads:
-                self.queue.put(None)
-            for t in self.threads:
-                t.join()
-        else:
-            num_nones = self.num_processes * self.num_threads
-            for _ in range(num_nones):
-                self.queue.put(None)
-            for p in self.processes:
-                p.join()
-                if p.is_alive():
-                    p.terminate()
-            self.queue.close()
-            self.queue.join_thread()
-
-        self._stopped = True
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
--- a/lerobot/common/datasets/online_buffer.py
+++ b/lerobot/common/datasets/online_buffer.py
@@ -187,7 +187,7 @@ class OnlineBuffer(torch.utils.data.Dataset):
        assert data[OnlineBuffer.INDEX_KEY][0].item() == 0

        # Shift the incoming indices if necessary.
-        if self.num_frames > 0:
+        if self.num_samples > 0:
            last_episode_index = self._data[OnlineBuffer.EPISODE_INDEX_KEY][next_index - 1]
            last_data_index = self._data[OnlineBuffer.INDEX_KEY][next_index - 1]
            data[OnlineBuffer.EPISODE_INDEX_KEY] += last_episode_index + 1
@@ -227,11 +227,11 @@ class OnlineBuffer(torch.utils.data.Dataset):
        )

    @property
-    def num_frames(self) -> int:
+    def num_samples(self) -> int:
        return np.count_nonzero(self._data[OnlineBuffer.OCCUPANCY_MASK_KEY])

    def __len__(self):
-        return self.num_frames
+        return self.num_samples

    def _item_to_tensors(self, item: dict) -> dict:
        item_ = {}
--- a/lerobot/common/datasets/populate_dataset.py
+++ b/lerobot/common/datasets/populate_dataset.py
@@ -0,0 +1,468 @@
+"""Functions to create an empty dataset, and populate it with frames."""
+# TODO(rcadene, aliberts): to adapt as class methods of next version of LeRobotDataset
+
+import concurrent
+import json
+import logging
+import multiprocessing
+import shutil
+from pathlib import Path
+
+import torch
+import tqdm
+from PIL import Image
+
+from lerobot.common.datasets.compute_stats import compute_stats
+from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
+from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import to_hf_dataset
+from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, get_default_encoding
+from lerobot.common.datasets.utils import calculate_episode_data_index, create_branch
+from lerobot.common.datasets.video_utils import encode_video_frames
+from lerobot.common.utils.utils import log_say
+from lerobot.scripts.push_dataset_to_hub import (
+    push_dataset_card_to_hub,
+    push_meta_data_to_hub,
+    push_videos_to_hub,
+    save_meta_data,
+)
+
+########################################################################################
+# Asynchrounous saving of images on disk
+########################################################################################
+
+
+def safe_stop_image_writer(func):
+    # TODO(aliberts): Allow to pass custom exceptions
+    # (e.g. ThreadServiceExit, KeyboardInterrupt, SystemExit, UnpluggedError, DynamixelCommError)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except Exception as e:
+            image_writer = kwargs.get("dataset", {}).get("image_writer")
+            if image_writer is not None:
+                print("Waiting for image writer to terminate...")
+                stop_image_writer(image_writer, timeout=20)
+            raise e
+
+    return wrapper
+
+
+def save_image(img_tensor, key, frame_index, episode_index, videos_dir: str):
+    img = Image.fromarray(img_tensor.numpy())
+    path = Path(videos_dir) / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
+    path.parent.mkdir(parents=True, exist_ok=True)
+    img.save(str(path), quality=100)
+
+
+def loop_to_save_images_in_threads(image_queue, num_threads):
+    if num_threads < 1:
+        raise NotImplementedError(f"Only `num_threads>=1` is supported for now, but {num_threads=} given.")
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
+        futures = []
+        while True:
+            # Blocks until a frame is available
+            frame_data = image_queue.get()
+
+            # As usually done, exit loop when receiving None to stop the worker
+            if frame_data is None:
+                break
+
+            image, key, frame_index, episode_index, videos_dir = frame_data
+            futures.append(executor.submit(save_image, image, key, frame_index, episode_index, videos_dir))
+
+        # Before exiting function, wait for all threads to complete
+        with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
+            concurrent.futures.wait(futures)
+            progress_bar.update(len(futures))
+
+
+def start_image_writer_processes(image_queue, num_processes, num_threads_per_process):
+    if num_processes < 1:
+        raise ValueError(f"Only `num_processes>=1` is supported, but {num_processes=} given.")
+
+    if num_threads_per_process < 1:
+        raise NotImplementedError(
+            "Only `num_threads_per_process>=1` is supported for now, but {num_threads_per_process=} given."
+        )
+
+    processes = []
+    for _ in range(num_processes):
+        process = multiprocessing.Process(
+            target=loop_to_save_images_in_threads,
+            args=(image_queue, num_threads_per_process),
+        )
+        process.start()
+        processes.append(process)
+    return processes
+
+
+def stop_processes(processes, queue, timeout):
+    # Send None to each process to signal them to stop
+    for _ in processes:
+        queue.put(None)
+
+    # Wait maximum 20 seconds for all processes to terminate
+    for process in processes:
+        process.join(timeout=timeout)
+
+    # If not terminated after 20 seconds, force termination
+    if process.is_alive():
+        process.terminate()
+
+    # Close the queue, no more items can be put in the queue
+    queue.close()
+
+    # Ensure all background queue threads have finished
+    queue.join_thread()
+
+
+def start_image_writer(num_processes, num_threads):
+    """This function abstract away the initialisation of processes or/and threads to
+    save images on disk asynchrounously, which is critical to control a robot and record data
+    at a high frame rate.
+
+    When `num_processes=0`, it returns a dictionary containing a threads pool of size `num_threads`.
+    When `num_processes>0`, it returns a dictionary containing a processes pool of size `num_processes`,
+    where each subprocess starts their own threads pool of size `num_threads`.
+
+    The optimal number of processes and threads depends on your computer capabilities.
+    We advise to use 4 threads per camera with 0 processes. If the fps is not stable, try to increase or lower
+    the number of threads. If it is still not stable, try to use 1 subprocess, or more.
+    """
+    image_writer = {}
+
+    if num_processes == 0:
+        futures = []
+        threads_pool = concurrent.futures.ThreadPoolExecutor(max_workers=num_threads)
+        image_writer["threads_pool"], image_writer["futures"] = threads_pool, futures
+    else:
+        # TODO(rcadene): When using num_processes>1, `multiprocessing.Manager().Queue()`
+        # might be better than `multiprocessing.Queue()`. Source: https://www.geeksforgeeks.org/python-multiprocessing-queue-vs-multiprocessing-manager-queue
+        image_queue = multiprocessing.Queue()
+        processes_pool = start_image_writer_processes(
+            image_queue, num_processes=num_processes, num_threads_per_process=num_threads
+        )
+        image_writer["processes_pool"], image_writer["image_queue"] = processes_pool, image_queue
+
+    return image_writer
+
+
+def async_save_image(image_writer, image, key, frame_index, episode_index, videos_dir):
+    """This function abstract away the saving of an image on disk asynchrounously. It uses a dictionary
+    called image writer which contains either a pool of processes or a pool of threads.
+    """
+    if "threads_pool" in image_writer:
+        threads_pool, futures = image_writer["threads_pool"], image_writer["futures"]
+        futures.append(threads_pool.submit(save_image, image, key, frame_index, episode_index, videos_dir))
+    else:
+        image_queue = image_writer["image_queue"]
+        image_queue.put((image, key, frame_index, episode_index, videos_dir))
+
+
+def stop_image_writer(image_writer, timeout):
+    if "threads_pool" in image_writer:
+        futures = image_writer["futures"]
+        # Before exiting function, wait for all threads to complete
+        with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
+            concurrent.futures.wait(futures, timeout=timeout)
+            progress_bar.update(len(futures))
+    else:
+        processes_pool, image_queue = image_writer["processes_pool"], image_writer["image_queue"]
+        stop_processes(processes_pool, image_queue, timeout=timeout)
+
+
+########################################################################################
+# Functions to initialize, resume and populate a dataset
+########################################################################################
+
+
+def init_dataset(
+    repo_id,
+    root,
+    force_override,
+    fps,
+    video,
+    write_images,
+    num_image_writer_processes,
+    num_image_writer_threads,
+):
+    local_dir = Path(root) / repo_id
+    if local_dir.exists() and force_override:
+        shutil.rmtree(local_dir)
+
+    episodes_dir = local_dir / "episodes"
+    episodes_dir.mkdir(parents=True, exist_ok=True)
+
+    videos_dir = local_dir / "videos"
+    videos_dir.mkdir(parents=True, exist_ok=True)
+
+    # Logic to resume data recording
+    rec_info_path = episodes_dir / "data_recording_info.json"
+    if rec_info_path.exists():
+        with open(rec_info_path) as f:
+            rec_info = json.load(f)
+        num_episodes = rec_info["last_episode_index"] + 1
+    else:
+        num_episodes = 0
+
+    dataset = {
+        "repo_id": repo_id,
+        "local_dir": local_dir,
+        "videos_dir": videos_dir,
+        "episodes_dir": episodes_dir,
+        "fps": fps,
+        "video": video,
+        "rec_info_path": rec_info_path,
+        "num_episodes": num_episodes,
+    }
+
+    if write_images:
+        # Initialize processes or/and threads dedicated to save images on disk asynchronously,
+        # which is critical to control a robot and record data at a high frame rate.
+        image_writer = start_image_writer(
+            num_processes=num_image_writer_processes,
+            num_threads=num_image_writer_threads,
+        )
+        dataset["image_writer"] = image_writer
+
+    return dataset
+
+
+def add_frame(dataset, observation, action):
+    if "current_episode" not in dataset:
+        # initialize episode dictionary
+        ep_dict = {}
+        for key in observation:
+            if key not in ep_dict:
+                ep_dict[key] = []
+        for key in action:
+            if key not in ep_dict:
+                ep_dict[key] = []
+
+        ep_dict["episode_index"] = []
+        ep_dict["frame_index"] = []
+        ep_dict["timestamp"] = []
+        ep_dict["next.done"] = []
+
+        dataset["current_episode"] = ep_dict
+        dataset["current_frame_index"] = 0
+
+    ep_dict = dataset["current_episode"]
+    episode_index = dataset["num_episodes"]
+    frame_index = dataset["current_frame_index"]
+    videos_dir = dataset["videos_dir"]
+    video = dataset["video"]
+    fps = dataset["fps"]
+
+    ep_dict["episode_index"].append(episode_index)
+    ep_dict["frame_index"].append(frame_index)
+    ep_dict["timestamp"].append(frame_index / fps)
+    ep_dict["next.done"].append(False)
+
+    img_keys = [key for key in observation if "image" in key]
+    non_img_keys = [key for key in observation if "image" not in key]
+
+    # Save all observed modalities except images
+    for key in non_img_keys:
+        ep_dict[key].append(observation[key])
+
+    # Save actions
+    for key in action:
+        ep_dict[key].append(action[key])
+
+    if "image_writer" not in dataset:
+        dataset["current_frame_index"] += 1
+        return
+
+    # Save images
+    image_writer = dataset["image_writer"]
+    for key in img_keys:
+        imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
+        async_save_image(
+            image_writer,
+            image=observation[key],
+            key=key,
+            frame_index=frame_index,
+            episode_index=episode_index,
+            videos_dir=str(videos_dir),
+        )
+
+        if video:
+            fname = f"{key}_episode_{episode_index:06d}.mp4"
+            frame_info = {"path": f"videos/{fname}", "timestamp": frame_index / fps}
+        else:
+            frame_info = str(imgs_dir / f"frame_{frame_index:06d}.png")
+
+        ep_dict[key].append(frame_info)
+
+    dataset["current_frame_index"] += 1
+
+
+def delete_current_episode(dataset):
+    del dataset["current_episode"]
+    del dataset["current_frame_index"]
+
+    # delete temporary images
+    episode_index = dataset["num_episodes"]
+    videos_dir = dataset["videos_dir"]
+    for tmp_imgs_dir in videos_dir.glob(f"*_episode_{episode_index:06d}"):
+        shutil.rmtree(tmp_imgs_dir)
+
+
+def save_current_episode(dataset):
+    episode_index = dataset["num_episodes"]
+    ep_dict = dataset["current_episode"]
+    episodes_dir = dataset["episodes_dir"]
+    rec_info_path = dataset["rec_info_path"]
+
+    ep_dict["next.done"][-1] = True
+
+    for key in ep_dict:
+        if "observation" in key and "image" not in key:
+            ep_dict[key] = torch.stack(ep_dict[key])
+
+    ep_dict["action"] = torch.stack(ep_dict["action"])
+    ep_dict["episode_index"] = torch.tensor(ep_dict["episode_index"])
+    ep_dict["frame_index"] = torch.tensor(ep_dict["frame_index"])
+    ep_dict["timestamp"] = torch.tensor(ep_dict["timestamp"])
+    ep_dict["next.done"] = torch.tensor(ep_dict["next.done"])
+
+    ep_path = episodes_dir / f"episode_{episode_index}.pth"
+    torch.save(ep_dict, ep_path)
+
+    rec_info = {
+        "last_episode_index": episode_index,
+    }
+    with open(rec_info_path, "w") as f:
+        json.dump(rec_info, f)
+
+    # force re-initialization of episode dictionnary during add_frame
+    del dataset["current_episode"]
+
+    dataset["num_episodes"] += 1
+
+
+def encode_videos(dataset, image_keys, play_sounds):
+    log_say("Encoding videos", play_sounds)
+
+    num_episodes = dataset["num_episodes"]
+    videos_dir = dataset["videos_dir"]
+    local_dir = dataset["local_dir"]
+    fps = dataset["fps"]
+
+    # Use ffmpeg to convert frames stored as png into mp4 videos
+    for episode_index in tqdm.tqdm(range(num_episodes)):
+        for key in image_keys:
+            # key = f"observation.images.{name}"
+            tmp_imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
+            fname = f"{key}_episode_{episode_index:06d}.mp4"
+            video_path = local_dir / "videos" / fname
+            if video_path.exists():
+                # Skip if video is already encoded. Could be the case when resuming data recording.
+                continue
+            # note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
+            # since video encoding with ffmpeg is already using multithreading.
+            encode_video_frames(tmp_imgs_dir, video_path, fps, overwrite=True)
+            shutil.rmtree(tmp_imgs_dir)
+
+
+def from_dataset_to_lerobot_dataset(dataset, play_sounds):
+    log_say("Consolidate episodes", play_sounds)
+
+    num_episodes = dataset["num_episodes"]
+    episodes_dir = dataset["episodes_dir"]
+    videos_dir = dataset["videos_dir"]
+    video = dataset["video"]
+    fps = dataset["fps"]
+    repo_id = dataset["repo_id"]
+
+    ep_dicts = []
+    for episode_index in tqdm.tqdm(range(num_episodes)):
+        ep_path = episodes_dir / f"episode_{episode_index}.pth"
+        ep_dict = torch.load(ep_path)
+        ep_dicts.append(ep_dict)
+    data_dict = concatenate_episodes(ep_dicts)
+
+    if video:
+        image_keys = [key for key in data_dict if "image" in key]
+        encode_videos(dataset, image_keys, play_sounds)
+
+    hf_dataset = to_hf_dataset(data_dict, video)
+    episode_data_index = calculate_episode_data_index(hf_dataset)
+
+    info = {
+        "codebase_version": CODEBASE_VERSION,
+        "fps": fps,
+        "video": video,
+    }
+    if video:
+        info["encoding"] = get_default_encoding()
+
+    lerobot_dataset = LeRobotDataset.from_preloaded(
+        repo_id=repo_id,
+        hf_dataset=hf_dataset,
+        episode_data_index=episode_data_index,
+        info=info,
+        videos_dir=videos_dir,
+    )
+
+    return lerobot_dataset
+
+
+def save_lerobot_dataset_on_disk(lerobot_dataset):
+    hf_dataset = lerobot_dataset.hf_dataset
+    info = lerobot_dataset.info
+    stats = lerobot_dataset.stats
+    episode_data_index = lerobot_dataset.episode_data_index
+    local_dir = lerobot_dataset.videos_dir.parent
+    meta_data_dir = local_dir / "meta_data"
+
+    hf_dataset = hf_dataset.with_format(None)  # to remove transforms that cant be saved
+    hf_dataset.save_to_disk(str(local_dir / "train"))
+
+    save_meta_data(info, stats, episode_data_index, meta_data_dir)
+
+
+def push_lerobot_dataset_to_hub(lerobot_dataset, tags):
+    hf_dataset = lerobot_dataset.hf_dataset
+    local_dir = lerobot_dataset.videos_dir.parent
+    videos_dir = lerobot_dataset.videos_dir
+    repo_id = lerobot_dataset.repo_id
+    video = lerobot_dataset.video
+    meta_data_dir = local_dir / "meta_data"
+
+    if not (local_dir / "train").exists():
+        raise ValueError(
+            "You need to run `save_lerobot_dataset_on_disk(lerobot_dataset)` before pushing to the hub."
+        )
+
+    hf_dataset.push_to_hub(repo_id, revision="main")
+    push_meta_data_to_hub(repo_id, meta_data_dir, revision="main")
+    push_dataset_card_to_hub(repo_id, revision="main", tags=tags)
+    if video:
+        push_videos_to_hub(repo_id, videos_dir, revision="main")
+    create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
+
+
+def create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds):
+    if "image_writer" in dataset:
+        logging.info("Waiting for image writer to terminate...")
+        image_writer = dataset["image_writer"]
+        stop_image_writer(image_writer, timeout=20)
+
+    lerobot_dataset = from_dataset_to_lerobot_dataset(dataset, play_sounds)
+
+    if run_compute_stats:
+        log_say("Computing dataset statistics", play_sounds)
+        lerobot_dataset.stats = compute_stats(lerobot_dataset)
+    else:
+        logging.info("Skipping computation of the dataset statistics")
+        lerobot_dataset.stats = {}
+
+    save_lerobot_dataset_on_disk(lerobot_dataset)
+
+    if push_to_hub:
+        push_lerobot_dataset_to_hub(lerobot_dataset, tags)
+
+    return lerobot_dataset
--- a/lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md
+++ b/lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md
@@ -0,0 +1,56 @@
+## Using / Updating `CODEBASE_VERSION` (for maintainers)
+
+Since our dataset pushed to the hub are decoupled with the evolution of this repo, we ensure compatibility of
+the datasets with our code, we use a `CODEBASE_VERSION` (defined in
+lerobot/common/datasets/lerobot_dataset.py) variable.
+
+For instance, [`lerobot/pusht`](https://huggingface.co/datasets/lerobot/pusht) has many versions to maintain backward compatibility between LeRobot codebase versions:
+- [v1.0](https://huggingface.co/datasets/lerobot/pusht/tree/v1.0)
+- [v1.1](https://huggingface.co/datasets/lerobot/pusht/tree/v1.1)
+- [v1.2](https://huggingface.co/datasets/lerobot/pusht/tree/v1.2)
+- [v1.3](https://huggingface.co/datasets/lerobot/pusht/tree/v1.3)
+- [v1.4](https://huggingface.co/datasets/lerobot/pusht/tree/v1.4)
+- [v1.5](https://huggingface.co/datasets/lerobot/pusht/tree/v1.5)
+- [v1.6](https://huggingface.co/datasets/lerobot/pusht/tree/v1.6) <-- last version
+- [main](https://huggingface.co/datasets/lerobot/pusht/tree/main) <-- points to the last version
+
+Starting with v1.6, every dataset pushed to the hub or saved locally also have this version number in their
+`info.json` metadata.
+
+### Uploading a new dataset
+If you are pushing a new dataset, you don't need to worry about any of the instructions below, nor to be
+compatible with previous codebase versions. The `push_dataset_to_hub.py` script will automatically tag your
+dataset with the current `CODEBASE_VERSION`.
+
+### Updating an existing dataset
+If you want to update an existing dataset, you need to change the `CODEBASE_VERSION` from `lerobot_dataset.py`
+before running `push_dataset_to_hub.py`. This is especially useful if you introduce a breaking change
+intentionally or not (i.e. something not backward compatible such as modifying the reward functions used,
+deleting some frames at the end of an episode, etc.). That way, people running a previous version of the
+codebase won't be affected by your change and backward compatibility is maintained.
+
+However, you will need to update the version of ALL the other datasets so that they have the new
+`CODEBASE_VERSION` as a branch in their hugging face dataset repository. Don't worry, there is an easy way
+that doesn't require to run `push_dataset_to_hub.py`. You can just "branch-out" from the `main` branch on HF
+dataset repo by running this script which corresponds to a `git checkout -b` (so no copy or upload needed):
+
+```python
+from huggingface_hub import HfApi
+
+from lerobot import available_datasets
+from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
+
+api = HfApi()
+
+for repo_id in available_datasets:
+    dataset_info = api.list_repo_refs(repo_id, repo_type="dataset")
+    branches = [b.name for b in dataset_info.branches]
+    if CODEBASE_VERSION in branches:
+        print(f"{repo_id} already @{CODEBASE_VERSION}, skipping.")
+        continue
+    else:
+        # Now create a branch named after the new version by branching out from "main"
+        # which is expected to be the preceding version
+        api.create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION, revision="main")
+        print(f"{repo_id} successfully updated @{CODEBASE_VERSION}")
+```
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_cabinet.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_cabinet.txt
@@ -0,0 +1,85 @@
+https://drive.google.com/file/d/1_SOJkgfP5yZyVjMhTt3nwhvyUjcnlI51/view?usp=drive_link
+https://drive.google.com/file/d/1rmgN8UUzph1qwJnzG1d-uOafodn-gLvb/view?usp=drive_link
+https://drive.google.com/file/d/1NYQ-XxsBVinB6dUoZmVWweT83367P3i2/view?usp=drive_link
+https://drive.google.com/file/d/1oAv_j74zxxCJieMG7r5Vl2BeHK1__3s3/view?usp=drive_link
+https://drive.google.com/file/d/1wFUJQROsrTJt64YRuIeExhFjr2wnK5uu/view?usp=drive_link
+https://drive.google.com/file/d/1KzL3Tt0Le7jVl58XVRUcmigmXjyiuhbK/view?usp=drive_link
+https://drive.google.com/file/d/1qy_YBladeHtianSSGtgAPSHtMin7msvf/view?usp=drive_link
+https://drive.google.com/file/d/1rA_F0V_qL_nyuC_0aBKCisF4-0TIkF2Y/view?usp=drive_link
+https://drive.google.com/file/d/1hw-8qMpz9VgSt62XoASqNRuPECpCwJQP/view?usp=drive_link
+https://drive.google.com/file/d/1BpHOl9rKMzdvNGka6js7C0s40hH6vnDA/view?usp=drive_link
+https://drive.google.com/file/d/1PazhkhiDnJ-OUMyDVDFxEZNKQQqHiNWS/view?usp=drive_link
+https://drive.google.com/file/d/1lZ665R6ATl57dypxH4dGJ2NSt6XYnbuz/view?usp=drive_link
+https://drive.google.com/file/d/1V9HzLaf-tlG15wUzT7KrTDCS_z1vi5NV/view?usp=drive_link
+https://drive.google.com/file/d/1aKauWiXoKqbNwn_2xs4MrmLlaNYlVNmO/view?usp=drive_link
+https://drive.google.com/file/d/1WVD5DFhriO1YmmOgiVHhacR6HWoTPxav/view?usp=drive_link
+https://drive.google.com/file/d/1_X43WgeBAsfkhH9EmpyPki8U9joMeAGC/view?usp=drive_link
+https://drive.google.com/file/d/1t8x0GqWoNKWtnBsB7_D40Z34nL9ak4kf/view?usp=drive_link
+https://drive.google.com/file/d/15V_f26WaKOXjKnq2T3HRWAmtQUi4lbu2/view?usp=drive_link
+https://drive.google.com/file/d/11VFIAsiSDsMOBANgrOcZBpKB9AFWnLy7/view?usp=drive_link
+https://drive.google.com/file/d/1M0NS7vVaxJv3FHnuRYtdwTFYF7We4LxP/view?usp=drive_link
+https://drive.google.com/file/d/1mR0OItTNqFnVLoczcyKYlm6drAy778lO/view?usp=drive_link
+https://drive.google.com/file/d/1NbVFWDQAh-z4JJ4D-Zw6Lps9kdvpqh2j/view?usp=drive_link
+https://drive.google.com/file/d/1JQoZGBzl4W3QG26-n39tefcGN0fDRMbB/view?usp=drive_link
+https://drive.google.com/file/d/1VBjHl-TvZpncopvasIP5G9gecbB2a5f6/view?usp=drive_link
+https://drive.google.com/file/d/1VzSf6zaB21nahm7MsPwroXbJ84NIwq0b/view?usp=drive_link
+https://drive.google.com/file/d/1OtNnfMEydNtZOcivs4k6E_uJSpf8PkGy/view?usp=drive_link
+https://drive.google.com/file/d/14nVvpvsrFr_03Pa_N7MKzwnRwibOUYM6/view?usp=drive_link
+https://drive.google.com/file/d/1M8li6duiO2r3lv_9HhF_XJn0oZUIEK5F/view?usp=drive_link
+https://drive.google.com/file/d/1Cpzea6fO14lxAaNfSBifqoa4ekhCiLD1/view?usp=drive_link
+https://drive.google.com/file/d/1mbxRTm5vlbsY9UJ0jfjM6j9D7kPJjBpG/view?usp=drive_link
+https://drive.google.com/file/d/1RXD1i6IfWsHRlCxVmG04h2h5Ycm_WwZN/view?usp=drive_link
+https://drive.google.com/file/d/1QFqFSwDGOk1BkgGmqgCcc2BRWnJ6R3MA/view?usp=drive_link
+https://drive.google.com/file/d/1bFqWR8DQM0ZUxxtS2bl-RANQvukeFLzp/view?usp=drive_link
+https://drive.google.com/file/d/1pR-rH3yNGoyPdD4hJ6-3lXQ-PstBx9du/view?usp=drive_link
+https://drive.google.com/file/d/107OAwLY-hva9HeQLIK7VCh-ytdDabVjr/view?usp=drive_link
+https://drive.google.com/file/d/1Tpl08QOaSZ37GTO4awFWSdD8wBR9xdlT/view?usp=drive_link
+https://drive.google.com/file/d/1MR164AOM-0S1T6RX8xKTV2IHyaCvpqAW/view?usp=drive_link
+https://drive.google.com/file/d/1_wknJfVnStIhJ82lU_QtcrwahsqYIsr8/view?usp=drive_link
+https://drive.google.com/file/d/1ZuEktWrbYkTx0l5pj3WiZ2CJrfbDOHNo/view?usp=drive_link
+https://drive.google.com/file/d/15G_10hkkkq6yxvyI5NGZirlF-RzduR2F/view?usp=drive_link
+https://drive.google.com/file/d/1DBKxg3ONqh7dhLuX6oh1Yyo2x383V1Hp/view?usp=drive_link
+https://drive.google.com/file/d/1B5iDBkTUr5vopDddV_fHud18SqAHhauS/view?usp=drive_link
+https://drive.google.com/file/d/1acwFV0eenRkki1QcjSKH5xqOtys-P3Pr/view?usp=drive_link
+https://drive.google.com/file/d/1S47BI83xyrh-FKXsvAQqer98Biu_p8XK/view?usp=drive_link
+https://drive.google.com/file/d/1JL6DmBZl3uyq9dyLfgSqtGF06e7E9JwM/view?usp=drive_link
+https://drive.google.com/file/d/16WvRS4Kjog8Pxgr0E3sGGnI01YwL9Uql/view?usp=drive_link
+https://drive.google.com/file/d/12ttGqL33IPWg0-s1SD44rr22M6LiSQBr/view?usp=drive_link
+https://drive.google.com/file/d/1OyZqqnldTU_DliRbr6x0C4a_iWPwIN7j/view?usp=drive_link
+https://drive.google.com/file/d/1oYk00IpLnR9fesLfD15Ebe7nVBffEbcS/view?usp=drive_link
+https://drive.google.com/file/d/1eyE2-MQduCEqCd-5_kl5zsoOEERAzpZD/view?usp=drive_link
+https://drive.google.com/file/d/1ir1Ya-vO0d97pfvbePlUeuKTTRc0qIMU/view?usp=drive_link
+https://drive.google.com/file/d/1hOi-JnqlMt47gVnLZHMTqeojyYVErohl/view?usp=drive_link
+https://drive.google.com/file/d/1NFFw5_PqigQ7xGqsL-MNq2B1r5yAscCf/view?usp=drive_link
+https://drive.google.com/file/d/1uftq1-Zlh8d2sNLWrlVcKYQUwZTD7o24/view?usp=drive_link
+https://drive.google.com/file/d/1-ax19dSLPacVgk000T-m3l4flPcg07pM/view?usp=drive_link
+https://drive.google.com/file/d/126y-lgn86-ZmCz8hooF1THKJGGObw3OB/view?usp=drive_link
+https://drive.google.com/file/d/1JiDniK0VmDIkk92AbBILb8J2Ba59PWML/view?usp=drive_link
+https://drive.google.com/file/d/1kr8nPIRljiU0R4J9SMgj80o1FPQxzu9z/view?usp=drive_link
+https://drive.google.com/file/d/1bbThWRij1pKBh_kFgV8FwK0sXtTHBoLX/view?usp=drive_link
+https://drive.google.com/file/d/1WenzDW6lxk1xkOFm-OiGFfc0ROskAuKU/view?usp=drive_link
+https://drive.google.com/file/d/1MiKRzuzUn1yN-k_6kPJJzIGy7dT-nnsD/view?usp=drive_link
+https://drive.google.com/file/d/17rRg2tcmB-gNhQ0KoZJQmNfyFeoij1jH/view?usp=drive_link
+https://drive.google.com/file/d/11mokBpvrY3ld6sY5WztREtJ1jgqfQV70/view?usp=drive_link
+https://drive.google.com/file/d/1Il_6IOx9NDp1bX_KHizJfBwzTufTmn86/view?usp=drive_link
+https://drive.google.com/file/d/1KswtJGsxJ7eeBDAmNA_aeLjOxcH6MIxa/view?usp=drive_link
+https://drive.google.com/file/d/1gzMhi5uWu4C3Y6WbQ3L-08V96GxTZrRR/view?usp=drive_link
+https://drive.google.com/file/d/1nRQFtaBxfUCYc2W90Qibh0kHCt6YQCfc/view?usp=drive_link
+https://drive.google.com/file/d/1vs-gyW-KheqHbUATwAhA2mmR9GOGw7f_/view?usp=drive_link
+https://drive.google.com/file/d/1MuxzGOA2fgLaHryq82KkQumtuRJGcUOC/view?usp=drive_link
+https://drive.google.com/file/d/1IIwxZnGlqrXLUXqG6yMO0r7uhCvhpk9e/view?usp=drive_link
+https://drive.google.com/file/d/1vE7XPyaFcXP4DtTY5Y9WKIt7zWgmX-Cr/view?usp=drive_link
+https://drive.google.com/file/d/1j-bIV09gr21RC3-x1N_pK4RPLV3fmWKz/view?usp=drive_link
+https://drive.google.com/file/d/1t3nW1rD3S-EL0Oymb5U7ZAj5UMkydkln/view?usp=drive_link
+https://drive.google.com/file/d/14hbfHCdMKtJZ41F9CQReMec2jeRFTOqR/view?usp=drive_link
+https://drive.google.com/file/d/1x-hUyOSne5BW0AzQ3W6_Pf4g5yXQWi9M/view?usp=drive_link
+https://drive.google.com/file/d/1sw9JqRg6E-3P84I3ZhzTrJMu0vuiaMmP/view?usp=drive_link
+https://drive.google.com/file/d/1LuqhQlL4MGZhB_6THmkovRxrlP26BbdC/view?usp=drive_link
+https://drive.google.com/file/d/15C5K6v_lkjnMSmUvVyqHQKwh2N166e7K/view?usp=drive_link
+https://drive.google.com/file/d/1ns_9eSsQeeoZ10nlbkLy8tu0GmJFSnkt/view?usp=drive_link
+https://drive.google.com/file/d/1NpzWJeK6CqjxzjIMYe6aYdX8xGsQwD4o/view?usp=drive_link
+https://drive.google.com/file/d/1NMLezwufKJ9_8xTc9KQThSzVVD71B9Ui/view?usp=drive_link
+https://drive.google.com/file/d/1aa71DCUqs6oXlIxX35jgsmsgm-NlDxPV/view?usp=drive_link
+https://drive.google.com/file/d/1UJzkIZzAL0j-D5YQBnoq7mHvttASy12O/view?usp=drive_link
+https://drive.google.com/file/d/1nPgx36HIJFb7oI94VbRzWjpPP2GANxzG/view?usp=drive_link
+https://drive.google.com/file/d/1NovAP-KVJjqcuvWy3d6G4ptGGAIDqcCx/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_chair.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_chair.txt
@@ -0,0 +1,55 @@
+https://drive.google.com/file/d/11M3Ye0r5agMaaicPbVGD0q2Hb3rGklbb/view?usp=drive_link
+https://drive.google.com/file/d/1-tx7SvYYgSvXCvnf_EI2OVdwK-CkFY6S/view?usp=drive_link
+https://drive.google.com/file/d/1EWJunmOpMHaU1hE106wwpbkGYcjQXYAF/view?usp=drive_link
+https://drive.google.com/file/d/1IDn95Z7FSiCckrSENtGV4u3RyFHNQSDY/view?usp=drive_link
+https://drive.google.com/file/d/1CwzvWj1i7QOtqrZvsCZ6BdZaKNDfpN32/view?usp=drive_link
+https://drive.google.com/file/d/1HvAvlhm77nAD3Td24QPSeq8lw-Rl_aOh/view?usp=drive_link
+https://drive.google.com/file/d/1t-suKYOPhXH666RpAYNRp2QU_DOy3AeM/view?usp=drive_link
+https://drive.google.com/file/d/18xpKgWh7RWyjMN5PkLTOo-AxsAadAuRw/view?usp=drive_link
+https://drive.google.com/file/d/1oci5Eto-ztv-AQNz8EnwZveBIhxvk-xJ/view?usp=drive_link
+https://drive.google.com/file/d/1Y-t_4vxdE6NpHO0DLJR8f3mD0Q-Wj5-c/view?usp=drive_link
+https://drive.google.com/file/d/1lylRqbbbB8bgtpsBWMPACmHJreuKmllv/view?usp=drive_link
+https://drive.google.com/file/d/1yliSyMig_NXShWfQx6qyW7Ijf2Y5lFK6/view?usp=drive_link
+https://drive.google.com/file/d/1XXhwJsJbeb7KXAooGvJapnm9bjnGUmxS/view?usp=drive_link
+https://drive.google.com/file/d/1_xs1f3hW2JArKyvfF7UWubWjyROGTLs6/view?usp=drive_link
+https://drive.google.com/file/d/1WVEHpr6EqKCZbkHapQSTXJq4xE4SWFT-/view?usp=drive_link
+https://drive.google.com/file/d/1RqOHv9pEQGvW8NUA7ynffFmG999TL_Az/view?usp=drive_link
+https://drive.google.com/file/d/1cu5AgD2gh-uA3PFJmzxxzNaF3qOSlYY1/view?usp=drive_link
+https://drive.google.com/file/d/1SsrXqiPclNrnYToPZ9Uq-k3y0C4qdHT1/view?usp=drive_link
+https://drive.google.com/file/d/1-J7EXf0vjkLIfSqT8ICEsP6CTjzSLBop/view?usp=drive_link
+https://drive.google.com/file/d/11O7ewUmoZXfyyKjy_6B5RW4DpjICxqBT/view?usp=drive_link
+https://drive.google.com/file/d/1iic44kZoCsjNsfAz2cMstZ9-WQvAhblF/view?usp=drive_link
+https://drive.google.com/file/d/1yLV1lVX-2WnWQldGlnQZ0x7QBuDiVkL3/view?usp=drive_link
+https://drive.google.com/file/d/1Tybp9ru98TTbGn4eyROpUQwDFuALWXmk/view?usp=drive_link
+https://drive.google.com/file/d/13E9OTMiipVJByDs5-J19oWwAz7l94LTN/view?usp=drive_link
+https://drive.google.com/file/d/1EeTpJQdMSliw4JzSMtJ6CyTvVdexjM4M/view?usp=drive_link
+https://drive.google.com/file/d/1NHyNwoFqzeAu-1_PSpq5JfxaiD_xbpn9/view?usp=drive_link
+https://drive.google.com/file/d/1fJcS0phDp4xm_FyGaJ5wr9Pe4KqtHaxD/view?usp=drive_link
+https://drive.google.com/file/d/12AqrLUaewDPEcFRqPZeZFb_TQ0Lfi3At/view?usp=drive_link
+https://drive.google.com/file/d/1x_hd4Qsq1oJS-aj2t3qM7WbbV7KZj05b/view?usp=drive_link
+https://drive.google.com/file/d/14OUSUArmsB068hs6BuEIXQhI1Cyz8Sf0/view?usp=drive_link
+https://drive.google.com/file/d/16zlzh1T5zeUJQnFf382NXkFEKEnDub4O/view?usp=drive_link
+https://drive.google.com/file/d/1IbDltmN-NEFCNtr1TO4ILxEgQ94rtjWv/view?usp=drive_link
+https://drive.google.com/file/d/15gmlf8Gx9455pZ1AlqcCSwh3nDPxMzSr/view?usp=drive_link
+https://drive.google.com/file/d/1qHpRL1oZfIMo_vxnm8qfwQ-7l0BZIVva/view?usp=drive_link
+https://drive.google.com/file/d/1H1xskIgiFZivkYn23rMzH3xePGOh3VTC/view?usp=drive_link
+https://drive.google.com/file/d/1avls6Pv0kYiCMNVknbc1zQsgy64MUDMM/view?usp=drive_link
+https://drive.google.com/file/d/1MmWVgCj5khc8KMIifmt3EzF1o-CtPyyn/view?usp=drive_link
+https://drive.google.com/file/d/1U0kCc_xqW0WNppf4sbnK14euWKdPZtzB/view?usp=drive_link
+https://drive.google.com/file/d/16CaEyQscOuhLj23PEGDTL9DeyNkohkMn/view?usp=drive_link
+https://drive.google.com/file/d/1Iu8uM6UUJ0zW8tvN-9UiOe_4oSNzEutg/view?usp=drive_link
+https://drive.google.com/file/d/1UImqiBaIxCR-1DNJaZhHqeHhaySOtVIr/view?usp=drive_link
+https://drive.google.com/file/d/1VpU2V_leIoRIyv_lAvE7eLHBG8DxCTnp/view?usp=drive_link
+https://drive.google.com/file/d/1_Q8J27OT3Xby7QY6yHvIJauFRWEMxkRm/view?usp=drive_link
+https://drive.google.com/file/d/1bantmVo1L9Xz4tbiNw_a1UC2Z_HPO1wT/view?usp=drive_link
+https://drive.google.com/file/d/1IRIXMJMCBDkBjbaHvAlEiBogSvZ1jK_3/view?usp=drive_link
+https://drive.google.com/file/d/1mAHXKjiFbjwydypW2t5Lv8_H5x6nHegl/view?usp=drive_link
+https://drive.google.com/file/d/1SfyY796fLrBCMY39OcyuxZafqSCRZPZk/view?usp=drive_link
+https://drive.google.com/file/d/1X-44sZ8CcfzIskc0dvSx882o1yFhHaZB/view?usp=drive_link
+https://drive.google.com/file/d/1BOIWCCCk6DLD4Bmvc75ZbbLi9AQm-1ao/view?usp=drive_link
+https://drive.google.com/file/d/1RuyDtRE1kk76sw-wP8vx5SgLoPF3PA_H/view?usp=drive_link
+https://drive.google.com/file/d/1c4eoQiBbGuy3CTAQDUSkd84Ponh1roAQ/view?usp=drive_link
+https://drive.google.com/file/d/19PXB9z4Ljq6dsbf9TqcOrrP5SRbw2Tc_/view?usp=drive_link
+https://drive.google.com/file/d/1nn1VVZVoIXWdYDozR7XHXE4mPLQG80PQ/view?usp=drive_link
+https://drive.google.com/file/d/1MBdFGOKPV8GUhwoSsJ_Ky3qAMLM2Bv3K/view?usp=drive_link
+https://drive.google.com/file/d/1of3k_M-7Nh3I1TndcWedxK4ca9dn8Sc5/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_elevator.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_elevator.txt
@@ -0,0 +1,20 @@
+https://drive.google.com/file/d/12ctkOAdkCNGN1JLbZb5ww3XTBn2LFpGI/view?usp=drive_link
+https://drive.google.com/file/d/1G_Vd46_4fq6O64gHHjUbJX5Ld44ZZx0y/view?usp=drive_link
+https://drive.google.com/file/d/1uKgUy73B3xBogQAOUhfZjO0X5qZGsi2c/view?usp=drive_link
+https://drive.google.com/file/d/1fu9cIrfI-fE2LhdGUxbx7-8Ci_PF8Ypm/view?usp=drive_link
+https://drive.google.com/file/d/1Ygk9ZPJzx8xw2A9JF3NHbJ44TqnvSTQR/view?usp=drive_link
+https://drive.google.com/file/d/18m5xPuccNsEB20WPshm3zhxmXc6k63ED/view?usp=drive_link
+https://drive.google.com/file/d/1DiqqxC44rriviRQpqogcv0-EB-Y6nr9g/view?usp=drive_link
+https://drive.google.com/file/d/1qPdaoTVDizJXkfXLioWU7iJ8hqCXSyOQ/view?usp=drive_link
+https://drive.google.com/file/d/1Fj9kIA_mG7f67WFfACJEaZ7izcHG7vUm/view?usp=drive_link
+https://drive.google.com/file/d/1WpYehZnI2P7dUdJPfkE-ij1rqCnjZEbB/view?usp=drive_link
+https://drive.google.com/file/d/1_zwWkT4jPyzB38STWb6whlzsPzXmfA9r/view?usp=drive_link
+https://drive.google.com/file/d/1U6-J4I_fPlSFFGfhZPxS5_YzKXwXIZYp/view?usp=drive_link
+https://drive.google.com/file/d/1pRhxxcTfZp5tQo_EScvJUwfc3amiS6Vk/view?usp=drive_link
+https://drive.google.com/file/d/1lWLntqra83RlYU_gN7Vostnfydf6gutd/view?usp=drive_link
+https://drive.google.com/file/d/1vIBKo0x-NYEHV1FvRpco1lQMpRdAWAIL/view?usp=drive_link
+https://drive.google.com/file/d/1pdrLV3JTQou_XH0Aap61Ssf60iVKm1jJ/view?usp=drive_link
+https://drive.google.com/file/d/1QTsLoQ7SwmKdQHjBGVDaR2uTwfFwtrOf/view?usp=drive_link
+https://drive.google.com/file/d/1Gytai8M_12J36GY6L_TulEcOC-035jwS/view?usp=drive_link
+https://drive.google.com/file/d/14LJudNc629NT-i8xreXtzl27ce_DxOFJ/view?usp=drive_link
+https://drive.google.com/file/d/1sBvPCODbzxGAI0S3lgN5cSG9Go3lRi00/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_shrimp.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_shrimp.txt
@@ -0,0 +1,18 @@
+https://drive.google.com/file/d/1MJn9GbC8p9lN4gC9KDMLEkTkP_gGpXj0/view?usp=drive_link
+https://drive.google.com/file/d/1-4LXgjl7ZCOgp-8GCJmFRD8OeqN5Jf7-/view?usp=drive_link
+https://drive.google.com/file/d/1Ho06Ce0SPbqU3juaMxNUwAt3zCRLGC8W/view?usp=drive_link
+https://drive.google.com/file/d/1ivHoj7_7olBSxH-Y8kqXEW7ttITK-45j/view?usp=drive_link
+https://drive.google.com/file/d/1qjY4hM_IvZ8cq2II_n9MeJbvyeuN4oBP/view?usp=drive_link
+https://drive.google.com/file/d/1rKVhO_f92-7sw13T8hTVrza3B9oAVgoy/view?usp=drive_link
+https://drive.google.com/file/d/1pcLPHO8fBkc1-CRa88tyQtEueE4xiXNi/view?usp=drive_link
+https://drive.google.com/file/d/1Vev_chCsIeEdvQ8poEYNsOJFGy_QU8kZ/view?usp=drive_link
+https://drive.google.com/file/d/1l5G4zpRkxSLCQjvGPYSN4zfCvVRQuzMz/view?usp=drive_link
+https://drive.google.com/file/d/14vgthE1eoakXkr2-DRw50E6lAqYOiUuE/view?usp=drive_link
+https://drive.google.com/file/d/17nPSmKKmgQ2B7zkzWrZYiLM3RBuFod82/view?usp=drive_link
+https://drive.google.com/file/d/1QcDsxplVvb_ID9BVrihl5FvlC-j7waXi/view?usp=drive_link
+https://drive.google.com/file/d/18pEejBpI-eEVaWAAjBCyC0vgbX3T1Esj/view?usp=drive_link
+https://drive.google.com/file/d/1H8eH6_IRODtEFT6WoM77ltR5OoOrqXmI/view?usp=drive_link
+https://drive.google.com/file/d/1IWlpFRZhoxyG4nS13CWK4leZVk5wbNx4/view?usp=drive_link
+https://drive.google.com/file/d/1PbZA8_OCGmMLxNP9xbkLRSChniL4uGxl/view?usp=drive_link
+https://drive.google.com/file/d/1p9XAdmG2f_WeflNO4DIJ_tr1rK6M9B4B/view?usp=drive_link
+https://drive.google.com/file/d/1nS59Et1cNAvKo3Y4SeSGRuZD5TvBbCF3/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wash_pan.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wash_pan.txt
@@ -0,0 +1 @@
+https://drive.google.com/drive/folders/1S8eFg98IaGAIKVZ8QFWG1bx4mHa-O204
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wipe_wine.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wipe_wine.txt
@@ -0,0 +1,4 @@
+https://drive.google.com/drive/folders/1tC_g1AJ8lglBLY-fjsQrG6DMBa3Ucp-0
+https://drive.google.com/file/d/1fG_Yi2MJrFjiUVN3XoiWXLtTxHlwwaDv/view?usp=drive_link
+https://drive.google.com/file/d/1WX32VWfzzX3Blmd06DRxLwFbMJfVe7P4/view?usp=drive_link
+https://drive.google.com/file/d/18onsX3vXg3xkFwP5bVUCjdV4n9TRn0C9/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_human.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_human.txt
@@ -0,0 +1,3 @@
+https://drive.google.com/drive/folders/1RgyD0JgTX30H4IM5XZn8I3zSV_mr8pyF
+https://drive.google.com/file/d/18Cudl6nikDtgRolea7je8iF_gGKzynOP/view?usp=drive_link
+https://drive.google.com/file/d/1C1kZYyROzs-PrLc0SkDgUgMi4-L3lauE/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_scripted.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_scripted.txt
@@ -0,0 +1,3 @@
+https://drive.google.com/drive/folders/1TsojQQSXtHEoGnqgJ3gmpPQR2DPLtS2N
+https://drive.google.com/file/d/1wfMSZ24oOh5KR_0aaP3Cnu_c4ZCveduB/view?usp=drive_link
+https://drive.google.com/file/d/17EuCUWS6uCCr6yyNzpXdcdE-_TTNCKtf/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_human.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_human.txt
@@ -0,0 +1,3 @@
+https://drive.google.com/drive/folders/1sc-E4QYW7A0o23m1u2VWNGVq5smAsfCo
+https://drive.google.com/file/d/18smMymtr8tIxaNUQ61gW6dG50pt3MvGq/view?usp=drive_link
+https://drive.google.com/file/d/1Nk7l53d9sJoGDBKAOnNrExX5nLacATc6/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_scripted.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_scripted.txt
@@ -0,0 +1,3 @@
+https://drive.google.com/drive/folders/1aRyoOhQwxhyt1J8XgEig4s6kzaw__LXj
+https://drive.google.com/file/d/1pnGIOd-E4-rhz2P3VxpknMKRZCoKt6eI/view?usp=drive_link
+https://drive.google.com/file/d/1GKReZHrXU73NMiC5zKCq_UtqPVtYq8eo/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_battery.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_battery.txt
@@ -0,0 +1,2 @@
+https://drive.google.com/drive/folders/19qS_n7vKgDcPeTMnvDHQ5-n73xEbJz5D
+https://drive.google.com/file/d/1oC31By0A2bsBeHyUwBdQw1z4ng6yi9Za/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_candy.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_candy.txt
@@ -0,0 +1,2 @@
+https://drive.google.com/drive/folders/1m5rQ6UVH8Q9RQp_6c0CxkQ88-L-ScO7q
+https://drive.google.com/file/d/1wHz2qcmwcVG0C0CZ9MjQDQcmj4OY9_a3/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee.txt
@@ -0,0 +1,2 @@
+https://drive.google.com/drive/folders/1seQGay470nGQ-knBI5TjsTr8iL9Qws5q
+https://drive.google.com/file/d/1T89hSX5U99wLGvGTE7yUBaQPOpyj6Sai/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee_new.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee_new.txt
@@ -0,0 +1,2 @@
+https://drive.google.com/drive/folders/1t3eDc5Rg0DveyRe8oTm6Dia_FYU5mXyf
+https://drive.google.com/file/d/1TXFaduTakvS0ZWJqKCX-HIvYglum_5CY/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_cups_open.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_cups_open.txt
@@ -0,0 +1,2 @@
+https://drive.google.com/drive/folders/1Z9X3DNzd6LS0FFjQemNUMoMA5yk5VQOh
+https://drive.google.com/file/d/1Wlyc0vTkjXuWB6zbaVOWhEfD7BmPgUV_/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_fork_pick_up.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_fork_pick_up.txt
@@ -0,0 +1,53 @@
+https://drive.google.com/drive/folders/1DYgB4ifX4uIid9m9jnC0Zdz8Nf7ZC0fc
+https://drive.google.com/file/d/1Eb-NRNk_FmVleCbU_Ng5Y4dfcjTKN7Rv/view?usp=drive_link
+https://drive.google.com/file/d/1dkhjEADakT-44l9jf-nK4x89kr4yG_qb/view?usp=drive_link
+https://drive.google.com/file/d/14hDhgcZkVqNExGb4tIXpSjMshhqZETch/view?usp=drive_link
+https://drive.google.com/file/d/1zVMEHpHbuNyP5A_lYU7RPSLB-4V0yfZw/view?usp=drive_link
+https://drive.google.com/file/d/1JtgDjBvy7FnRpFzrx_foC3quorYQFAR-/view?usp=drive_link
+https://drive.google.com/file/d/1EHdneB6F-PP0dQlX8qPaXbxmKoBy_YwO/view?usp=drive_link
+https://drive.google.com/file/d/17Z0jjVBy1OPKREPu77_n_rQzorDiapji/view?usp=drive_link
+https://drive.google.com/file/d/1F4i23qPJ_qTf5jWjfLo4ARGJChznYWt3/view?usp=drive_link
+https://drive.google.com/file/d/1kZtXWM3uS0-rLblydBfJ0mMcVnMMXw9w/view?usp=drive_link
+https://drive.google.com/file/d/1mNODox87xFfY5Z_o5mcLsr8SHb39jDik/view?usp=drive_link
+https://drive.google.com/file/d/1Ob44VdmEUA93FKDECiRb5Ogz2xQg5IWp/view?usp=drive_link
+https://drive.google.com/file/d/1fdQLdjj3Cwv33R1wZhfrLz9Del8mqgHb/view?usp=drive_link
+https://drive.google.com/file/d/1Yu3L3ft21zP__XL8pCfhb788ZleuW1n5/view?usp=drive_link
+https://drive.google.com/file/d/1ozBBWXVZ9hXDh9ooHUNroHdYm8UDqnhJ/view?usp=drive_link
+https://drive.google.com/file/d/1o0TGqvfWw_Lunxb5ubKDS21Lr_WC0h75/view?usp=drive_link
+https://drive.google.com/file/d/1jZnd5eP5L6BH5l98BPN6OnoQx3fu8e9n/view?usp=drive_link
+https://drive.google.com/file/d/1S5sYbz8wcLYp0V67v13i4PRcBxodn4Hg/view?usp=drive_link
+https://drive.google.com/file/d/1rFeg_x6ftJYwPtBv34D3h2L2cpDLeR4G/view?usp=drive_link
+https://drive.google.com/file/d/1GvS3lcm4o6nm_scUk0XxKeVFNmzjucDZ/view?usp=drive_link
+https://drive.google.com/file/d/1-9i0riphC7NhhDahcQfD1QoBXP5gF90A/view?usp=drive_link
+https://drive.google.com/file/d/15p_IqGsMbKuvzMS872THAZr-3SBtb1Fr/view?usp=drive_link
+https://drive.google.com/file/d/1ToyYcBfJL8gbQn0q_59zPLsFmm7dmMJo/view?usp=drive_link
+https://drive.google.com/file/d/1e_7PNH7CYafE4pAebP7ZdI7XFbmEcy_i/view?usp=drive_link
+https://drive.google.com/file/d/1JoabvGVsIQdug2xOhUIhetEIyDM91y_Y/view?usp=drive_link
+https://drive.google.com/file/d/1kOMw1y0lmnVaCjwZICfzCsx6e0Z8MNGR/view?usp=drive_link
+https://drive.google.com/file/d/16it_wd1JOevUQTK2_CvF_pBACTgpIPgM/view?usp=drive_link
+https://drive.google.com/file/d/1IRcCj9HnJSfbyMgr5XEERGlEnWeZQwOc/view?usp=drive_link
+https://drive.google.com/file/d/1Z2dIJfq_S3liGmPN9Rphvkmucnmw7tlb/view?usp=drive_link
+https://drive.google.com/file/d/1J3NoAjzndGx9yNyaBOJHdNny1epzUoBt/view?usp=drive_link
+https://drive.google.com/file/d/18nOvxV1k8FSmBrhT4TPo2sKKSZXougyx/view?usp=drive_link
+https://drive.google.com/file/d/1CT8FxclafFMjSd7gCWVw3VSeryeiF04i/view?usp=drive_link
+https://drive.google.com/file/d/16M9KVqQMFfSsXfypK0bocFft8Nz3j2Rt/view?usp=drive_link
+https://drive.google.com/file/d/18QPVkw6bj6HW8LTPrQLWrrUX4R6RcF42/view?usp=drive_link
+https://drive.google.com/file/d/1hQTVtA5hBTE_StXpJafTZJ3tgt2VQQ_t/view?usp=drive_link
+https://drive.google.com/file/d/1Dn-d5g69H6EgAWgsFdrcbJKtz7ySsCQ8/view?usp=drive_link
+https://drive.google.com/file/d/13hMr16483P7ALYv73yMRUN37fJdVQM62/view?usp=drive_link
+https://drive.google.com/file/d/1848yN3XMN5zJMEgApt6KzrWgfRPfimtv/view?usp=drive_link
+https://drive.google.com/file/d/1oAD9kSnS0fTgj-CjD4u9VdZ5X67IOIMa/view?usp=drive_link
+https://drive.google.com/file/d/1ilzIWLCCG5b_KgF5s0wdN2I5-lFNpwC1/view?usp=drive_link
+https://drive.google.com/file/d/1rjsT2YBjnidxod1s9s-myAYz8boHr-WB/view?usp=drive_link
+https://drive.google.com/file/d/18Gg48HTub15bd8qzbhiCUufbVy0fbN5G/view?usp=drive_link
+https://drive.google.com/file/d/1WsSnQSqmMTVSRwrhT1Y-v782My2zcjLm/view?usp=drive_link
+https://drive.google.com/file/d/1ea9ZCvoyc-xqiFXgeDcA_mOWsw7VUuoi/view?usp=drive_link
+https://drive.google.com/file/d/1wv1v3-XhPgbNzp62BXbJTDzMPu2tlDUc/view?usp=drive_link
+https://drive.google.com/file/d/18-ikzt8LoZ83Gi3goKCELs4U4z8hrRoF/view?usp=drive_link
+https://drive.google.com/file/d/16Bjhp7JNCXkGuLvyNcZowAx3W-Y-15DV/view?usp=drive_link
+https://drive.google.com/file/d/1Gc-KRI-xwcp1fMR55ugbrLg_5y3SPde-/view?usp=drive_link
+https://drive.google.com/file/d/1oP72Q386Z4Sy5MMm-t5yNogIe5Van_9k/view?usp=drive_link
+https://drive.google.com/file/d/112T90eDUDVH-SyOV7UnZl5bscAH2hcfq/view?usp=drive_link
+https://drive.google.com/file/d/1y-uKOesRRhjgDtFbG_j65f4SGg0v8XDg/view?usp=drive_link
+https://drive.google.com/file/d/1LOP05OagoI3km-ZKQBrS204A85UVk7Ok/view?usp=drive_link
+https://drive.google.com/file/d/1QkHQKgasVzWsmdPvkXgGhWyQ84d93_Az/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pingpong_test.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pingpong_test.txt
@@ -0,0 +1 @@
+https://drive.google.com/drive/folders/1Ut2cv6o6Pkfgg46DgwVUM7Z5PkNG8eJ-
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pro_pencil.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pro_pencil.txt
@@ -0,0 +1 @@
+https://drive.google.com/drive/folders/1FqxPV0PgvgIu8XFjtvZSPSExuNcxVVAY
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_screw_driver.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_screw_driver.txt
@@ -0,0 +1,2 @@
+https://drive.google.com/drive/folders/1SKtG0ct9q0nVdYssJNMWSOjikcXliT58
+https://drive.google.com/file/d/1nchD21O30B3i3LDoqramo1zgW5YvpJIN/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_tape.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_tape.txt
@@ -0,0 +1,2 @@
+https://drive.google.com/drive/folders/1_4DHf2cma0xsChLQFghwigX6Ukti5-zQ
+https://drive.google.com/file/d/1_8vS4hDNDgUQY-SmekrNaa7dF67QJYU-/view?usp=drive_link
--- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_thread_velcro.txt
+++ b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_thread_velcro.txt
@@ -0,0 +1,2 @@
+https://drive.google.com/drive/folders/1_4DHf2cma0xsChLQFghwigX6Ukti5-zQ
+https://drive.google.com/file/d/1_8vS4hDNDgUQY-SmekrNaa7dF67QJYU-/view?usp=drive_link
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Michel Aractingi	14490148f3	added tdmpc2 to policy factory; shape fixes in tdmpc2	2024-11-26 11:58:29 +00:00
Michel Aractingi	16edbbdeee	fixes and updated comments	2024-11-26 09:46:59 +00:00
Michel Aractingi	15090c2544	config comments	2024-11-25 09:51:33 +00:00
Michel Aractingi	166c1fc776	updated configuration parameters	2024-11-22 17:11:47 +00:00
Michel Aractingi	31984645da	simplified estimate_value function in policy	2024-11-21 17:03:30 +00:00
Michel Aractingi	c41ec08ec1	remove self.model_target and added a target q ensemble only without the need to copy the entire policy	2024-11-21 15:00:03 +00:00
Michel Aractingi	a146544765	added new implementation of tdmpc2	2024-11-20 17:30:19 +00:00
				`@@ -1 +0,0 @@`
				`../../lerobot/common/robots/koch_follower/koch.mdx`
				`@@ -1 +0,0 @@`
				`../../lerobot/common/robots/lekiwi/lekiwi.mdx`
				`@@ -1 +0,0 @@`
				`../../lerobot/common/robots/so100_follower/so100.mdx`
				`@@ -1 +0,0 @@`
				`../../lerobot/common/robots/so101_follower/so101.mdx`
				`@@ -0,0 +1 @@`
				`https://drive.google.com/drive/folders/1S8eFg98IaGAIKVZ8QFWG1bx4mHa-O204`