Compare commits
9 Commits
hf-papers
...
pre-commit
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8d20ee7655 | ||
|
|
1537d0ab90 | ||
|
|
2be7f3a3ff | ||
|
|
0cf864870c | ||
|
|
1786916a16 | ||
|
|
0507ad4f68 | ||
|
|
bed90e3a41 | ||
|
|
6163daaaa4 | ||
|
|
8e2a394442 |
24
.github/workflows/build-docker-images.yml
vendored
24
.github/workflows/build-docker-images.yml
vendored
@@ -40,24 +40,24 @@ jobs:
|
|||||||
git lfs install
|
git lfs install
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||||
with:
|
with:
|
||||||
cache-binary: false
|
cache-binary: false
|
||||||
|
|
||||||
- name: Check out code
|
- name: Check out code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
lfs: true
|
lfs: true
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: Login to DockerHub
|
- name: Login to DockerHub
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
|
||||||
- name: Build and Push CPU
|
- name: Build and Push CPU
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./docker/lerobot-cpu/Dockerfile
|
file: ./docker/lerobot-cpu/Dockerfile
|
||||||
@@ -78,24 +78,24 @@ jobs:
|
|||||||
git lfs install
|
git lfs install
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||||
with:
|
with:
|
||||||
cache-binary: false
|
cache-binary: false
|
||||||
|
|
||||||
- name: Check out code
|
- name: Check out code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
lfs: true
|
lfs: true
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: Login to DockerHub
|
- name: Login to DockerHub
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
|
||||||
- name: Build and Push GPU
|
- name: Build and Push GPU
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./docker/lerobot-gpu/Dockerfile
|
file: ./docker/lerobot-gpu/Dockerfile
|
||||||
@@ -110,23 +110,23 @@ jobs:
|
|||||||
group: aws-general-8-plus
|
group: aws-general-8-plus
|
||||||
steps:
|
steps:
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||||
with:
|
with:
|
||||||
cache-binary: false
|
cache-binary: false
|
||||||
|
|
||||||
- name: Check out code
|
- name: Check out code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: Login to DockerHub
|
- name: Login to DockerHub
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
|
||||||
- name: Build and Push GPU dev
|
- name: Build and Push GPU dev
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./docker/lerobot-gpu-dev/Dockerfile
|
file: ./docker/lerobot-gpu-dev/Dockerfile
|
||||||
|
|||||||
4
.github/workflows/nightly-tests.yml
vendored
4
.github/workflows/nightly-tests.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
|||||||
runs-on:
|
runs-on:
|
||||||
group: aws-general-8-plus
|
group: aws-general-8-plus
|
||||||
container:
|
container:
|
||||||
image: huggingface/lerobot-cpu:latest
|
image: huggingface/lerobot-cpu:latest # zizmor: ignore[unpinned-images]
|
||||||
options: --shm-size "16gb"
|
options: --shm-size "16gb"
|
||||||
credentials:
|
credentials:
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
@@ -60,7 +60,7 @@ jobs:
|
|||||||
CUDA_VISIBLE_DEVICES: "0"
|
CUDA_VISIBLE_DEVICES: "0"
|
||||||
TEST_TYPE: "single_gpu"
|
TEST_TYPE: "single_gpu"
|
||||||
container:
|
container:
|
||||||
image: huggingface/lerobot-gpu:latest
|
image: huggingface/lerobot-gpu:latest # zizmor: ignore[unpinned-images]
|
||||||
options: --gpus all --shm-size "16gb"
|
options: --gpus all --shm-size "16gb"
|
||||||
credentials:
|
credentials:
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
|||||||
8
.github/workflows/quality.yml
vendored
8
.github/workflows/quality.yml
vendored
@@ -33,12 +33,12 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # v4.9.1
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.PYTHON_VERSION }}
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
@@ -64,9 +64,9 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: typos-action
|
- name: typos-action
|
||||||
uses: crate-ci/typos@v1.29.10
|
uses: crate-ci/typos@db35ee91e80fbb447f33b0e5fbddb24d2a1a884f # v1.29.10
|
||||||
|
|||||||
8
.github/workflows/test-docker-build.yml
vendored
8
.github/workflows/test-docker-build.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
|||||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
steps:
|
steps:
|
||||||
- name: Check out code
|
- name: Check out code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
@@ -64,17 +64,17 @@ jobs:
|
|||||||
docker-file: ${{ fromJson(needs.get_changed_files.outputs.matrix) }}
|
docker-file: ${{ fromJson(needs.get_changed_files.outputs.matrix) }}
|
||||||
steps:
|
steps:
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||||
with:
|
with:
|
||||||
cache-binary: false
|
cache-binary: false
|
||||||
|
|
||||||
- name: Check out code
|
- name: Check out code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: Build Docker image
|
- name: Build Docker image
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
|
||||||
with:
|
with:
|
||||||
file: ${{ matrix.docker-file }}
|
file: ${{ matrix.docker-file }}
|
||||||
context: .
|
context: .
|
||||||
|
|||||||
12
.github/workflows/test.yml
vendored
12
.github/workflows/test.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
MUJOCO_GL: egl
|
MUJOCO_GL: egl
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
lfs: true # Ensure LFS files are pulled
|
lfs: true # Ensure LFS files are pulled
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
@@ -62,7 +62,7 @@ jobs:
|
|||||||
sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev
|
sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev
|
||||||
|
|
||||||
- name: Install uv and python
|
- name: Install uv and python
|
||||||
uses: astral-sh/setup-uv@v5
|
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
|
||||||
with:
|
with:
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
version: ${{ env.UV_VERSION }}
|
version: ${{ env.UV_VERSION }}
|
||||||
@@ -85,7 +85,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
MUJOCO_GL: egl
|
MUJOCO_GL: egl
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
lfs: true # Ensure LFS files are pulled
|
lfs: true # Ensure LFS files are pulled
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
@@ -94,7 +94,7 @@ jobs:
|
|||||||
run: sudo apt-get update && sudo apt-get install -y ffmpeg
|
run: sudo apt-get update && sudo apt-get install -y ffmpeg
|
||||||
|
|
||||||
- name: Install uv and python
|
- name: Install uv and python
|
||||||
uses: astral-sh/setup-uv@v5
|
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
|
||||||
with:
|
with:
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
version: ${{ env.UV_VERSION }}
|
version: ${{ env.UV_VERSION }}
|
||||||
@@ -117,7 +117,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
MUJOCO_GL: egl
|
MUJOCO_GL: egl
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
lfs: true # Ensure LFS files are pulled
|
lfs: true # Ensure LFS files are pulled
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
@@ -129,7 +129,7 @@ jobs:
|
|||||||
sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev
|
sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev
|
||||||
|
|
||||||
- name: Install uv and python
|
- name: Install uv and python
|
||||||
uses: astral-sh/setup-uv@v5
|
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
|
||||||
with:
|
with:
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
version: ${{ env.UV_VERSION }}
|
version: ${{ env.UV_VERSION }}
|
||||||
|
|||||||
4
.github/workflows/trufflehog.yml
vendored
4
.github/workflows/trufflehog.yml
vendored
@@ -24,12 +24,12 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: Secret Scanning
|
- name: Secret Scanning
|
||||||
uses: trufflesecurity/trufflehog@main
|
uses: trufflesecurity/trufflehog@90694bf9af66e7536abc5824e7a87246dbf933cb # v3.88.35
|
||||||
with:
|
with:
|
||||||
extra_args: --only-verified
|
extra_args: --only-verified
|
||||||
|
|||||||
@@ -37,18 +37,18 @@ repos:
|
|||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
|
|
||||||
- repo: https://github.com/adhtruong/mirrors-typos
|
- repo: https://github.com/adhtruong/mirrors-typos
|
||||||
rev: v1.31.1
|
rev: v1.32.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: typos
|
- id: typos
|
||||||
args: [--force-exclude]
|
args: [--force-exclude]
|
||||||
|
|
||||||
- repo: https://github.com/asottile/pyupgrade
|
- repo: https://github.com/asottile/pyupgrade
|
||||||
rev: v3.19.1
|
rev: v3.20.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: pyupgrade
|
- id: pyupgrade
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.11.5
|
rev: v0.11.12
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
args: [--fix]
|
args: [--fix]
|
||||||
@@ -57,12 +57,12 @@ repos:
|
|||||||
|
|
||||||
##### Security #####
|
##### Security #####
|
||||||
- repo: https://github.com/gitleaks/gitleaks
|
- repo: https://github.com/gitleaks/gitleaks
|
||||||
rev: v8.24.3
|
rev: v8.27.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: gitleaks
|
- id: gitleaks
|
||||||
|
|
||||||
- repo: https://github.com/woodruffw/zizmor-pre-commit
|
- repo: https://github.com/woodruffw/zizmor-pre-commit
|
||||||
rev: v1.5.2
|
rev: v1.9.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: zizmor
|
- id: zizmor
|
||||||
|
|
||||||
|
|||||||
@@ -360,7 +360,7 @@ with profile(
|
|||||||
If you want, you can cite this work with:
|
If you want, you can cite this work with:
|
||||||
```bibtex
|
```bibtex
|
||||||
@misc{cadene2024lerobot,
|
@misc{cadene2024lerobot,
|
||||||
author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Gallouedec, Quentin and Zouitine, Adil and Wolf, Thomas},
|
author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Gallouedec, Quentin and Zouitine, Adil and Palma, Steven and Kooijmans, Pepijn and Aractingi, Michel and Shukor, Mustafa and Aubakirova, Dana and Russi, Martino and Capuano, Francesco and Pascale, Caroline and Choghari, Jade and Moss, Jess and Wolf, Thomas},
|
||||||
title = {LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch},
|
title = {LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch},
|
||||||
howpublished = "\url{https://github.com/huggingface/lerobot}",
|
howpublished = "\url{https://github.com/huggingface/lerobot}",
|
||||||
year = {2024}
|
year = {2024}
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ conda install ffmpeg -c conda-forge
|
|||||||
|
|
||||||
Install 🤗 LeRobot:
|
Install 🤗 LeRobot:
|
||||||
```bash
|
```bash
|
||||||
cd lerobot && pip install ".[feetech]"
|
cd lerobot && pip install -e ".[feetech]"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ python lerobot/scripts/configure_motor.py \
|
|||||||
--ID 1
|
--ID 1
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: These motors are currently limitated. They can take values between 0 and 4096 only, which corresponds to a full turn. They can't turn more than that. 2048 is at the middle of this range, so we can take -2048 steps (180 degrees anticlockwise) and reach the maximum range, or take +2048 steps (180 degrees clockwise) and reach the maximum range. The configuration step also sets the homing offset to 0, so that if you misassembled the arm, you can always update the homing offset to account for a shift up to ± 2048 steps (± 180 degrees).
|
Note: These motors are currently limited. They can take values between 0 and 4096 only, which corresponds to a full turn. They can't turn more than that. 2048 is at the middle of this range, so we can take -2048 steps (180 degrees anticlockwise) and reach the maximum range, or take +2048 steps (180 degrees clockwise) and reach the maximum range. The configuration step also sets the homing offset to 0, so that if you misassembled the arm, you can always update the homing offset to account for a shift up to ± 2048 steps (± 180 degrees).
|
||||||
|
|
||||||
Then unplug your motor and plug the second motor and set its ID to 2.
|
Then unplug your motor and plug the second motor and set its ID to 2.
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ conda install ffmpeg -c conda-forge
|
|||||||
|
|
||||||
Install 🤗 LeRobot:
|
Install 🤗 LeRobot:
|
||||||
```bash
|
```bash
|
||||||
cd lerobot && pip install ".[feetech]"
|
cd lerobot && pip install -e ".[feetech]"
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ def worker_process(queue: queue.Queue, num_threads: int):
|
|||||||
class AsyncImageWriter:
|
class AsyncImageWriter:
|
||||||
"""
|
"""
|
||||||
This class abstract away the initialisation of processes or/and threads to
|
This class abstract away the initialisation of processes or/and threads to
|
||||||
save images on disk asynchrounously, which is critical to control a robot and record data
|
save images on disk asynchronously, which is critical to control a robot and record data
|
||||||
at a high frame rate.
|
at a high frame rate.
|
||||||
|
|
||||||
When `num_processes=0`, it creates a threads pool of size `num_threads`.
|
When `num_processes=0`, it creates a threads pool of size `num_threads`.
|
||||||
|
|||||||
@@ -944,7 +944,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
def stop_image_writer(self) -> None:
|
def stop_image_writer(self) -> None:
|
||||||
"""
|
"""
|
||||||
Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to
|
Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to
|
||||||
remove the image_writer in order for the LeRobotDataset object to be pickleable and parallelized.
|
remove the image_writer in order for the LeRobotDataset object to be picklable and parallelized.
|
||||||
"""
|
"""
|
||||||
if self.image_writer is not None:
|
if self.image_writer is not None:
|
||||||
self.image_writer.stop()
|
self.image_writer.stop()
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ ALOHA_MOBILE_INFO = {
|
|||||||
"robot_config": AlohaRobotConfig(),
|
"robot_config": AlohaRobotConfig(),
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://mobile-aloha.github.io/",
|
"url": "https://mobile-aloha.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2401.02117",
|
"paper": "https://arxiv.org/abs/2401.02117",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{fu2024mobile,
|
@inproceedings{fu2024mobile,
|
||||||
author = {Fu, Zipeng and Zhao, Tony Z. and Finn, Chelsea},
|
author = {Fu, Zipeng and Zhao, Tony Z. and Finn, Chelsea},
|
||||||
@@ -49,7 +49,7 @@ ALOHA_STATIC_INFO = {
|
|||||||
"robot_config": AlohaRobotConfig(),
|
"robot_config": AlohaRobotConfig(),
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://tonyzhaozh.github.io/aloha/",
|
"url": "https://tonyzhaozh.github.io/aloha/",
|
||||||
"paper": "https://huggingface.co/papers/2304.13705",
|
"paper": "https://arxiv.org/abs/2304.13705",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{Zhao2023LearningFB,
|
@article{Zhao2023LearningFB,
|
||||||
title={Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware},
|
title={Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware},
|
||||||
@@ -57,13 +57,13 @@ ALOHA_STATIC_INFO = {
|
|||||||
journal={RSS},
|
journal={RSS},
|
||||||
year={2023},
|
year={2023},
|
||||||
volume={abs/2304.13705},
|
volume={abs/2304.13705},
|
||||||
url={https://huggingface.co/papers/2304.13705}
|
url={https://arxiv.org/abs/2304.13705}
|
||||||
}""").lstrip(),
|
}""").lstrip(),
|
||||||
}
|
}
|
||||||
PUSHT_INFO = {
|
PUSHT_INFO = {
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://diffusion-policy.cs.columbia.edu/",
|
"url": "https://diffusion-policy.cs.columbia.edu/",
|
||||||
"paper": "https://huggingface.co/papers/2303.04137v5",
|
"paper": "https://arxiv.org/abs/2303.04137v5",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{chi2024diffusionpolicy,
|
@article{chi2024diffusionpolicy,
|
||||||
author = {Cheng Chi and Zhenjia Xu and Siyuan Feng and Eric Cousineau and Yilun Du and Benjamin Burchfiel and Russ Tedrake and Shuran Song},
|
author = {Cheng Chi and Zhenjia Xu and Siyuan Feng and Eric Cousineau and Yilun Du and Benjamin Burchfiel and Russ Tedrake and Shuran Song},
|
||||||
@@ -75,7 +75,7 @@ PUSHT_INFO = {
|
|||||||
XARM_INFO = {
|
XARM_INFO = {
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://www.nicklashansen.com/td-mpc/",
|
"url": "https://www.nicklashansen.com/td-mpc/",
|
||||||
"paper": "https://huggingface.co/papers/2203.04955",
|
"paper": "https://arxiv.org/abs/2203.04955",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{Hansen2022tdmpc,
|
@inproceedings{Hansen2022tdmpc,
|
||||||
title={Temporal Difference Learning for Model Predictive Control},
|
title={Temporal Difference Learning for Model Predictive Control},
|
||||||
@@ -244,7 +244,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://ut-austin-rpl.github.io/BUDS-website/",
|
"url": "https://ut-austin-rpl.github.io/BUDS-website/",
|
||||||
"paper": "https://huggingface.co/papers/2109.13841",
|
"paper": "https://arxiv.org/abs/2109.13841",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{zhu2022bottom,
|
@article{zhu2022bottom,
|
||||||
title={Bottom-Up Skill Discovery From Unsegmented Demonstrations for Long-Horizon Robot Manipulation},
|
title={Bottom-Up Skill Discovery From Unsegmented Demonstrations for Long-Horizon Robot Manipulation},
|
||||||
@@ -261,7 +261,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://ut-austin-rpl.github.io/sailor/",
|
"url": "https://ut-austin-rpl.github.io/sailor/",
|
||||||
"paper": "https://huggingface.co/papers/2210.11435",
|
"paper": "https://arxiv.org/abs/2210.11435",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{nasiriany2022sailor,
|
@inproceedings{nasiriany2022sailor,
|
||||||
title={Learning and Retrieval from Prior Data for Skill-based Imitation Learning},
|
title={Learning and Retrieval from Prior Data for Skill-based Imitation Learning},
|
||||||
@@ -274,7 +274,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://ut-austin-rpl.github.io/sirius/",
|
"url": "https://ut-austin-rpl.github.io/sirius/",
|
||||||
"paper": "https://huggingface.co/papers/2211.08416",
|
"paper": "https://arxiv.org/abs/2211.08416",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{liu2022robot,
|
@inproceedings{liu2022robot,
|
||||||
title = {Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment},
|
title = {Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment},
|
||||||
@@ -298,14 +298,14 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "cc-by-4.0",
|
"license": "cc-by-4.0",
|
||||||
"url": "https://sites.google.com/view/cablerouting/home",
|
"url": "https://sites.google.com/view/cablerouting/home",
|
||||||
"paper": "https://huggingface.co/papers/2307.08927",
|
"paper": "https://arxiv.org/abs/2307.08927",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{luo2023multistage,
|
@article{luo2023multistage,
|
||||||
author = {Jianlan Luo and Charles Xu and Xinyang Geng and Gilbert Feng and Kuan Fang and Liam Tan and Stefan Schaal and Sergey Levine},
|
author = {Jianlan Luo and Charles Xu and Xinyang Geng and Gilbert Feng and Kuan Fang and Liam Tan and Stefan Schaal and Sergey Levine},
|
||||||
title = {Multi-Stage Cable Routing through Hierarchical Imitation Learning},
|
title = {Multi-Stage Cable Routing through Hierarchical Imitation Learning},
|
||||||
journal = {arXiv pre-print},
|
journal = {arXiv pre-print},
|
||||||
year = {2023},
|
year = {2023},
|
||||||
url = {https://huggingface.co/papers/2307.08927},
|
url = {https://arxiv.org/abs/2307.08927},
|
||||||
}""").lstrip(),
|
}""").lstrip(),
|
||||||
},
|
},
|
||||||
"berkeley_fanuc_manipulation": {
|
"berkeley_fanuc_manipulation": {
|
||||||
@@ -322,7 +322,7 @@ DATASETS = {
|
|||||||
"berkeley_gnm_cory_hall": {
|
"berkeley_gnm_cory_hall": {
|
||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"paper": "https://huggingface.co/papers/1709.10489",
|
"paper": "https://arxiv.org/abs/1709.10489",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{kahn2018self,
|
@inproceedings{kahn2018self,
|
||||||
title={Self-supervised deep reinforcement learning with generalized computation graphs for robot navigation},
|
title={Self-supervised deep reinforcement learning with generalized computation graphs for robot navigation},
|
||||||
@@ -337,7 +337,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://sites.google.com/view/recon-robot",
|
"url": "https://sites.google.com/view/recon-robot",
|
||||||
"paper": "https://huggingface.co/papers/2104.05859",
|
"paper": "https://arxiv.org/abs/2104.05859",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{shah2021rapid,
|
@inproceedings{shah2021rapid,
|
||||||
title={Rapid Exploration for Open-World Navigation with Latent Goal Models},
|
title={Rapid Exploration for Open-World Navigation with Latent Goal Models},
|
||||||
@@ -351,7 +351,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://sites.google.com/view/SACSoN-review",
|
"url": "https://sites.google.com/view/SACSoN-review",
|
||||||
"paper": "https://huggingface.co/papers/2306.01874",
|
"paper": "https://arxiv.org/abs/2306.01874",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{hirose2023sacson,
|
@article{hirose2023sacson,
|
||||||
title={SACSoN: Scalable Autonomous Data Collection for Social Navigation},
|
title={SACSoN: Scalable Autonomous Data Collection for Social Navigation},
|
||||||
@@ -363,7 +363,7 @@ DATASETS = {
|
|||||||
"berkeley_mvp": {
|
"berkeley_mvp": {
|
||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"paper": "https://huggingface.co/papers/2203.06173",
|
"paper": "https://arxiv.org/abs/2203.06173",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@InProceedings{Radosavovic2022,
|
@InProceedings{Radosavovic2022,
|
||||||
title = {Real-World Robot Learning with Masked Visual Pre-training},
|
title = {Real-World Robot Learning with Masked Visual Pre-training},
|
||||||
@@ -375,7 +375,7 @@ DATASETS = {
|
|||||||
"berkeley_rpt": {
|
"berkeley_rpt": {
|
||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"paper": "https://huggingface.co/papers/2306.10007",
|
"paper": "https://arxiv.org/abs/2306.10007",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{Radosavovic2023,
|
@article{Radosavovic2023,
|
||||||
title={Robot Learning with Sensorimotor Pre-training},
|
title={Robot Learning with Sensorimotor Pre-training},
|
||||||
@@ -388,7 +388,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://human-world-model.github.io/",
|
"url": "https://human-world-model.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2308.10901",
|
"paper": "https://arxiv.org/abs/2308.10901",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{mendonca2023structured,
|
@inproceedings{mendonca2023structured,
|
||||||
title={Structured World Models from Human Videos},
|
title={Structured World Models from Human Videos},
|
||||||
@@ -401,7 +401,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://play-fusion.github.io/",
|
"url": "https://play-fusion.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2312.04549",
|
"paper": "https://arxiv.org/abs/2312.04549",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{chen2023playfusion,
|
@inproceedings{chen2023playfusion,
|
||||||
title={PlayFusion: Skill Acquisition via Diffusion from Language-Annotated Play},
|
title={PlayFusion: Skill Acquisition via Diffusion from Language-Annotated Play},
|
||||||
@@ -414,7 +414,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://robo-affordances.github.io/",
|
"url": "https://robo-affordances.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2304.08488",
|
"paper": "https://arxiv.org/abs/2304.08488",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{bahl2023affordances,
|
@inproceedings{bahl2023affordances,
|
||||||
title={Affordances from Human Videos as a Versatile Representation for Robotics},
|
title={Affordances from Human Videos as a Versatile Representation for Robotics},
|
||||||
@@ -433,7 +433,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://diffusion-policy.cs.columbia.edu/",
|
"url": "https://diffusion-policy.cs.columbia.edu/",
|
||||||
"paper": "https://huggingface.co/papers/2303.04137",
|
"paper": "https://arxiv.org/abs/2303.04137v5",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{chi2023diffusionpolicy,
|
@inproceedings{chi2023diffusionpolicy,
|
||||||
title={Diffusion Policy: Visuomotor Policy Learning via Action Diffusion},
|
title={Diffusion Policy: Visuomotor Policy Learning via Action Diffusion},
|
||||||
@@ -505,7 +505,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://droid-dataset.github.io/",
|
"url": "https://droid-dataset.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2403.12945",
|
"paper": "https://arxiv.org/abs/2403.12945",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{khazatsky2024droid,
|
@article{khazatsky2024droid,
|
||||||
title = {DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset},
|
title = {DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset},
|
||||||
@@ -517,7 +517,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "cc-by-4.0",
|
"license": "cc-by-4.0",
|
||||||
"url": "https://functional-manipulation-benchmark.github.io/",
|
"url": "https://functional-manipulation-benchmark.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2401.08553",
|
"paper": "https://arxiv.org/abs/2401.08553",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{luo2024fmb,
|
@article{luo2024fmb,
|
||||||
title={FMB: a Functional Manipulation Benchmark for Generalizable Robotic Learning},
|
title={FMB: a Functional Manipulation Benchmark for Generalizable Robotic Learning},
|
||||||
@@ -530,7 +530,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://openreview.net/forum?id=WuBv9-IGDUA",
|
"url": "https://openreview.net/forum?id=WuBv9-IGDUA",
|
||||||
"paper": "https://huggingface.co/papers/2401.14502",
|
"paper": "https://arxiv.org/abs/2401.14502",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{saxena2023multiresolution,
|
@inproceedings{saxena2023multiresolution,
|
||||||
title={Multi-Resolution Sensing for Real-Time Control with Vision-Language Models},
|
title={Multi-Resolution Sensing for Real-Time Control with Vision-Language Models},
|
||||||
@@ -575,7 +575,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://jyopari.github.io/VINN/",
|
"url": "https://jyopari.github.io/VINN/",
|
||||||
"paper": "https://huggingface.co/papers/2112.01511",
|
"paper": "https://arxiv.org/abs/2112.01511",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@misc{pari2021surprising,
|
@misc{pari2021surprising,
|
||||||
title={The Surprising Effectiveness of Representation Learning for Visual Imitation},
|
title={The Surprising Effectiveness of Representation Learning for Visual Imitation},
|
||||||
@@ -590,7 +590,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://play-to-policy.github.io/",
|
"url": "https://play-to-policy.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2210.10047",
|
"paper": "https://arxiv.org/abs/2210.10047",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{cui2022play,
|
@article{cui2022play,
|
||||||
title = {From Play to Policy: Conditional Behavior Generation from Uncurated Robot Data},
|
title = {From Play to Policy: Conditional Behavior Generation from Uncurated Robot Data},
|
||||||
@@ -603,7 +603,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://rot-robot.github.io/",
|
"url": "https://rot-robot.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2206.15469",
|
"paper": "https://arxiv.org/abs/2206.15469",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{haldar2023watch,
|
@inproceedings{haldar2023watch,
|
||||||
title={Watch and match: Supercharging imitation with regularized optimal transport},
|
title={Watch and match: Supercharging imitation with regularized optimal transport},
|
||||||
@@ -633,7 +633,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://sites.google.com/view/hydra-il-2023",
|
"url": "https://sites.google.com/view/hydra-il-2023",
|
||||||
"paper": "https://huggingface.co/papers/2306.17237",
|
"paper": "https://arxiv.org/abs/2306.17237",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{belkhale2023hydra,
|
@article{belkhale2023hydra,
|
||||||
title={HYDRA: Hybrid Robot Actions for Imitation Learning},
|
title={HYDRA: Hybrid Robot Actions for Imitation Learning},
|
||||||
@@ -646,21 +646,21 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://sites.google.com/view/visionandtouch",
|
"url": "https://sites.google.com/view/visionandtouch",
|
||||||
"paper": "https://huggingface.co/papers/1810.10191",
|
"paper": "https://arxiv.org/abs/1810.10191",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{lee2019icra,
|
@inproceedings{lee2019icra,
|
||||||
title={Making sense of vision and touch: Self-supervised learning of multimodal representations for contact-rich tasks},
|
title={Making sense of vision and touch: Self-supervised learning of multimodal representations for contact-rich tasks},
|
||||||
author={Lee, Michelle A and Zhu, Yuke and Srinivasan, Krishnan and Shah, Parth and Savarese, Silvio and Fei-Fei, Li and Garg, Animesh and Bohg, Jeannette},
|
author={Lee, Michelle A and Zhu, Yuke and Srinivasan, Krishnan and Shah, Parth and Savarese, Silvio and Fei-Fei, Li and Garg, Animesh and Bohg, Jeannette},
|
||||||
booktitle={2019 IEEE International Conference on Robotics and Automation (ICRA)},
|
booktitle={2019 IEEE International Conference on Robotics and Automation (ICRA)},
|
||||||
year={2019},
|
year={2019},
|
||||||
url={https://huggingface.co/papers/1810.10191}
|
url={https://arxiv.org/abs/1810.10191}
|
||||||
}""").lstrip(),
|
}""").lstrip(),
|
||||||
},
|
},
|
||||||
"stanford_robocook": {
|
"stanford_robocook": {
|
||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://hshi74.github.io/robocook/",
|
"url": "https://hshi74.github.io/robocook/",
|
||||||
"paper": "https://huggingface.co/papers/2306.14447",
|
"paper": "https://arxiv.org/abs/2306.14447",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{shi2023robocook,
|
@article{shi2023robocook,
|
||||||
title={RoboCook: Long-Horizon Elasto-Plastic Object Manipulation with Diverse Tools},
|
title={RoboCook: Long-Horizon Elasto-Plastic Object Manipulation with Diverse Tools},
|
||||||
@@ -673,7 +673,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "cc-by-4.0",
|
"license": "cc-by-4.0",
|
||||||
"url": "https://www.kaggle.com/datasets/oiermees/taco-robot",
|
"url": "https://www.kaggle.com/datasets/oiermees/taco-robot",
|
||||||
"paper": "https://huggingface.co/papers/2209.08959, https://huggingface.co/papers/2210.01911",
|
"paper": "https://arxiv.org/abs/2209.08959, https://arxiv.org/abs/2210.01911",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{rosete2022tacorl,
|
@inproceedings{rosete2022tacorl,
|
||||||
author = {Erick Rosete-Beas and Oier Mees and Gabriel Kalweit and Joschka Boedecker and Wolfram Burgard},
|
author = {Erick Rosete-Beas and Oier Mees and Gabriel Kalweit and Joschka Boedecker and Wolfram Burgard},
|
||||||
@@ -693,7 +693,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "URL",
|
"url": "URL",
|
||||||
"paper": "https://huggingface.co/papers/2107.05842",
|
"paper": "https://arxiv.org/abs/2107.05842",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@Article{Osa22,
|
@Article{Osa22,
|
||||||
author = {Takayuki Osa},
|
author = {Takayuki Osa},
|
||||||
@@ -709,7 +709,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://toto-benchmark.org/",
|
"url": "https://toto-benchmark.org/",
|
||||||
"paper": "https://huggingface.co/papers/2306.00942",
|
"paper": "https://arxiv.org/abs/2306.00942",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{zhou2023train,
|
@inproceedings{zhou2023train,
|
||||||
author={Zhou, Gaoyue and Dean, Victoria and Srirama, Mohan Kumar and Rajeswaran, Aravind and Pari, Jyothish and Hatch, Kyle and Jain, Aryan and Yu, Tianhe and Abbeel, Pieter and Pinto, Lerrel and Finn, Chelsea and Gupta, Abhinav},
|
author={Zhou, Gaoyue and Dean, Victoria and Srirama, Mohan Kumar and Rajeswaran, Aravind and Pari, Jyothish and Hatch, Kyle and Jain, Aryan and Yu, Tianhe and Abbeel, Pieter and Pinto, Lerrel and Finn, Chelsea and Gupta, Abhinav},
|
||||||
@@ -733,7 +733,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://owmcorl.github.io/#",
|
"url": "https://owmcorl.github.io/#",
|
||||||
"paper": "https://huggingface.co/papers/2310.16029",
|
"paper": "https://arxiv.org/abs/2310.16029",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@preprint{Feng2023Finetuning,
|
@preprint{Feng2023Finetuning,
|
||||||
title={Finetuning Offline World Models in the Real World},
|
title={Finetuning Offline World Models in the Real World},
|
||||||
@@ -745,7 +745,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://robopil.github.io/d3fields/",
|
"url": "https://robopil.github.io/d3fields/",
|
||||||
"paper": "https://huggingface.co/papers/2309.16118",
|
"paper": "https://arxiv.org/abs/2309.16118",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{wang2023d3field,
|
@article{wang2023d3field,
|
||||||
title={D^3Field: Dynamic 3D Descriptor Fields for Generalizable Robotic Manipulation},
|
title={D^3Field: Dynamic 3D Descriptor Fields for Generalizable Robotic Manipulation},
|
||||||
@@ -758,7 +758,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://uscresl.github.io/dmfd/",
|
"url": "https://uscresl.github.io/dmfd/",
|
||||||
"paper": "https://huggingface.co/papers/2207.10148",
|
"paper": "https://arxiv.org/abs/2207.10148",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{salhotra2022dmfd,
|
@article{salhotra2022dmfd,
|
||||||
author={Salhotra, Gautam and Liu, I-Chun Arthur and Dominguez-Kuhne, Marcus and Sukhatme, Gaurav S.},
|
author={Salhotra, Gautam and Liu, I-Chun Arthur and Dominguez-Kuhne, Marcus and Sukhatme, Gaurav S.},
|
||||||
@@ -775,7 +775,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://ut-austin-rpl.github.io/MUTEX/",
|
"url": "https://ut-austin-rpl.github.io/MUTEX/",
|
||||||
"paper": "https://huggingface.co/papers/2309.14320",
|
"paper": "https://arxiv.org/abs/2309.14320",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@inproceedings{shah2023mutex,
|
@inproceedings{shah2023mutex,
|
||||||
title={{MUTEX}: Learning Unified Policies from Multimodal Task Specifications},
|
title={{MUTEX}: Learning Unified Policies from Multimodal Task Specifications},
|
||||||
@@ -811,7 +811,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://saytap.github.io/",
|
"url": "https://saytap.github.io/",
|
||||||
"paper": "https://huggingface.co/papers/2306.07580",
|
"paper": "https://arxiv.org/abs/2306.07580",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{saytap2023,
|
@article{saytap2023,
|
||||||
author = {Yujin Tang and Wenhao Yu and Jie Tan and Heiga Zen and Aleksandra Faust and
|
author = {Yujin Tang and Wenhao Yu and Jie Tan and Heiga Zen and Aleksandra Faust and
|
||||||
@@ -847,7 +847,7 @@ DATASETS = {
|
|||||||
"tasks_col": "language_instruction",
|
"tasks_col": "language_instruction",
|
||||||
"license": "mit",
|
"license": "mit",
|
||||||
"url": "https://ut-austin-rpl.github.io/VIOLA/",
|
"url": "https://ut-austin-rpl.github.io/VIOLA/",
|
||||||
"paper": "https://huggingface.co/papers/2210.11339",
|
"paper": "https://arxiv.org/abs/2210.11339",
|
||||||
"citation_bibtex": dedent(r"""
|
"citation_bibtex": dedent(r"""
|
||||||
@article{zhu2022viola,
|
@article{zhu2022viola,
|
||||||
title={VIOLA: Imitation Learning for Vision-Based Manipulation with Object Proposal Priors},
|
title={VIOLA: Imitation Learning for Vision-Based Manipulation with Object Proposal Priors},
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ def decode_video_frames_torchvision(
|
|||||||
keyframes_only = False
|
keyframes_only = False
|
||||||
torchvision.set_video_backend(backend)
|
torchvision.set_video_backend(backend)
|
||||||
if backend == "pyav":
|
if backend == "pyav":
|
||||||
keyframes_only = True # pyav doesnt support accuracte seek
|
keyframes_only = True # pyav doesn't support accurate seek
|
||||||
|
|
||||||
# set a video stream reader
|
# set a video stream reader
|
||||||
# TODO(rcadene): also load audio stream at the same time
|
# TODO(rcadene): also load audio stream at the same time
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Action Chunking Transformer Policy
|
"""Action Chunking Transformer Policy
|
||||||
|
|
||||||
As per Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware (https://huggingface.co/papers/2304.13705).
|
As per Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware (https://arxiv.org/abs/2304.13705).
|
||||||
The majority of changes here involve removing unused code, unifying naming, and adding helpful comments.
|
The majority of changes here involve removing unused code, unifying naming, and adding helpful comments.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -41,7 +41,7 @@ from lerobot.common.policies.pretrained import PreTrainedPolicy
|
|||||||
class ACTPolicy(PreTrainedPolicy):
|
class ACTPolicy(PreTrainedPolicy):
|
||||||
"""
|
"""
|
||||||
Action Chunking Transformer Policy as per Learning Fine-Grained Bimanual Manipulation with Low-Cost
|
Action Chunking Transformer Policy as per Learning Fine-Grained Bimanual Manipulation with Low-Cost
|
||||||
Hardware (paper: https://huggingface.co/papers/2304.13705, code: https://github.com/tonyzhaozh/act)
|
Hardware (paper: https://arxiv.org/abs/2304.13705, code: https://github.com/tonyzhaozh/act)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
config_class = ACTConfig
|
config_class = ACTConfig
|
||||||
@@ -161,7 +161,7 @@ class ACTPolicy(PreTrainedPolicy):
|
|||||||
# Calculate Dₖₗ(latent_pdf || standard_normal). Note: After computing the KL-divergence for
|
# Calculate Dₖₗ(latent_pdf || standard_normal). Note: After computing the KL-divergence for
|
||||||
# each dimension independently, we sum over the latent dimension to get the total
|
# each dimension independently, we sum over the latent dimension to get the total
|
||||||
# KL-divergence per batch element, then take the mean over the batch.
|
# KL-divergence per batch element, then take the mean over the batch.
|
||||||
# (See App. B of https://huggingface.co/papers/1312.6114 for more details).
|
# (See App. B of https://arxiv.org/abs/1312.6114 for more details).
|
||||||
mean_kld = (
|
mean_kld = (
|
||||||
(-0.5 * (1 + log_sigma_x2_hat - mu_hat.pow(2) - (log_sigma_x2_hat).exp())).sum(-1).mean()
|
(-0.5 * (1 + log_sigma_x2_hat - mu_hat.pow(2) - (log_sigma_x2_hat).exp())).sum(-1).mean()
|
||||||
)
|
)
|
||||||
@@ -175,7 +175,7 @@ class ACTPolicy(PreTrainedPolicy):
|
|||||||
|
|
||||||
class ACTTemporalEnsembler:
|
class ACTTemporalEnsembler:
|
||||||
def __init__(self, temporal_ensemble_coeff: float, chunk_size: int) -> None:
|
def __init__(self, temporal_ensemble_coeff: float, chunk_size: int) -> None:
|
||||||
"""Temporal ensembling as described in Algorithm 2 of https://huggingface.co/papers/2304.13705.
|
"""Temporal ensembling as described in Algorithm 2 of https://arxiv.org/abs/2304.13705.
|
||||||
|
|
||||||
The weights are calculated as wᵢ = exp(-temporal_ensemble_coeff * i) where w₀ is the oldest action.
|
The weights are calculated as wᵢ = exp(-temporal_ensemble_coeff * i) where w₀ is the oldest action.
|
||||||
They are then normalized to sum to 1 by dividing by Σwᵢ. Here's some intuition around how the
|
They are then normalized to sum to 1 by dividing by Σwᵢ. Here's some intuition around how the
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ class DiffusionConfig(PreTrainedConfig):
|
|||||||
n_groups: Number of groups used in the group norm of the Unet's convolutional blocks.
|
n_groups: Number of groups used in the group norm of the Unet's convolutional blocks.
|
||||||
diffusion_step_embed_dim: The Unet is conditioned on the diffusion timestep via a small non-linear
|
diffusion_step_embed_dim: The Unet is conditioned on the diffusion timestep via a small non-linear
|
||||||
network. This is the output dimension of that network, i.e., the embedding dimension.
|
network. This is the output dimension of that network, i.e., the embedding dimension.
|
||||||
use_film_scale_modulation: FiLM (https://huggingface.co/papers/1709.07871) is used for the Unet conditioning.
|
use_film_scale_modulation: FiLM (https://arxiv.org/abs/1709.07871) is used for the Unet conditioning.
|
||||||
Bias modulation is used be default, while this parameter indicates whether to also use scale
|
Bias modulation is used be default, while this parameter indicates whether to also use scale
|
||||||
modulation.
|
modulation.
|
||||||
noise_scheduler_type: Name of the noise scheduler to use. Supported options: ["DDPM", "DDIM"].
|
noise_scheduler_type: Name of the noise scheduler to use. Supported options: ["DDPM", "DDIM"].
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ from lerobot.common.policies.utils import (
|
|||||||
class DiffusionPolicy(PreTrainedPolicy):
|
class DiffusionPolicy(PreTrainedPolicy):
|
||||||
"""
|
"""
|
||||||
Diffusion Policy as per "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion"
|
Diffusion Policy as per "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion"
|
||||||
(paper: https://huggingface.co/papers/2303.04137, code: https://github.com/real-stanford/diffusion_policy).
|
(paper: https://arxiv.org/abs/2303.04137, code: https://github.com/real-stanford/diffusion_policy).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
config_class = DiffusionConfig
|
config_class = DiffusionConfig
|
||||||
@@ -370,7 +370,7 @@ class DiffusionModel(nn.Module):
|
|||||||
class SpatialSoftmax(nn.Module):
|
class SpatialSoftmax(nn.Module):
|
||||||
"""
|
"""
|
||||||
Spatial Soft Argmax operation described in "Deep Spatial Autoencoders for Visuomotor Learning" by Finn et al.
|
Spatial Soft Argmax operation described in "Deep Spatial Autoencoders for Visuomotor Learning" by Finn et al.
|
||||||
(https://huggingface.co/papers/1509.06113). A minimal port of the robomimic implementation.
|
(https://arxiv.org/pdf/1509.06113). A minimal port of the robomimic implementation.
|
||||||
|
|
||||||
At a high level, this takes 2D feature maps (from a convnet/ViT) and returns the "center of mass"
|
At a high level, this takes 2D feature maps (from a convnet/ViT) and returns the "center of mass"
|
||||||
of activations of each channel, i.e., keypoints in the image space for the policy to focus on.
|
of activations of each channel, i.e., keypoints in the image space for the policy to focus on.
|
||||||
@@ -728,7 +728,7 @@ class DiffusionConditionalResidualBlock1d(nn.Module):
|
|||||||
|
|
||||||
self.conv1 = DiffusionConv1dBlock(in_channels, out_channels, kernel_size, n_groups=n_groups)
|
self.conv1 = DiffusionConv1dBlock(in_channels, out_channels, kernel_size, n_groups=n_groups)
|
||||||
|
|
||||||
# FiLM modulation (https://huggingface.co/papers/1709.07871) outputs per-channel bias and (maybe) scale.
|
# FiLM modulation (https://arxiv.org/abs/1709.07871) outputs per-channel bias and (maybe) scale.
|
||||||
cond_channels = out_channels * 2 if use_film_scale_modulation else out_channels
|
cond_channels = out_channels * 2 if use_film_scale_modulation else out_channels
|
||||||
self.cond_encoder = nn.Sequential(nn.Mish(), nn.Linear(cond_dim, cond_channels))
|
self.cond_encoder = nn.Sequential(nn.Mish(), nn.Linear(cond_dim, cond_channels))
|
||||||
|
|
||||||
|
|||||||
@@ -357,7 +357,7 @@ class PI0Policy(PreTrainedPolicy):
|
|||||||
if self.config.resize_imgs_with_padding is not None:
|
if self.config.resize_imgs_with_padding is not None:
|
||||||
img = resize_with_pad(img, *self.config.resize_imgs_with_padding, pad_value=0)
|
img = resize_with_pad(img, *self.config.resize_imgs_with_padding, pad_value=0)
|
||||||
|
|
||||||
# Normalize from range [0,1] to [-1,1] as expacted by siglip
|
# Normalize from range [0,1] to [-1,1] as expected by siglip
|
||||||
img = img * 2.0 - 1.0
|
img = img * 2.0 - 1.0
|
||||||
|
|
||||||
bsize = img.shape[0]
|
bsize = img.shape[0]
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
"""
|
"""
|
||||||
π0+FAST: Efficient Action Tokenization for Vision-Language-Action Models
|
π0+FAST: Efficient Action Tokenization for Vision-Language-Action Models
|
||||||
|
|
||||||
[Paper](https://huggingface.co/papers/2501.09747)
|
[Paper](https://arxiv.org/abs/2501.09747)
|
||||||
[Jax code](https://github.com/Physical-Intelligence/openpi)
|
[Jax code](https://github.com/Physical-Intelligence/openpi)
|
||||||
|
|
||||||
Designed by Physical Intelligence. Ported from Jax by Hugging Face.
|
Designed by Physical Intelligence. Ported from Jax by Hugging Face.
|
||||||
@@ -516,7 +516,7 @@ class PI0FAST(nn.Module):
|
|||||||
interpolate_like_pi=self.config.interpolate_like_pi,
|
interpolate_like_pi=self.config.interpolate_like_pi,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Normalize from range [0,1] to [-1,1] as expacted by siglip
|
# Normalize from range [0,1] to [-1,1] as expected by siglip
|
||||||
img = img * 2.0 - 1.0
|
img = img * 2.0 - 1.0
|
||||||
|
|
||||||
bsize = img.shape[0]
|
bsize = img.shape[0]
|
||||||
|
|||||||
@@ -17,8 +17,8 @@
|
|||||||
"""Implementation of Finetuning Offline World Models in the Real World.
|
"""Implementation of Finetuning Offline World Models in the Real World.
|
||||||
|
|
||||||
The comments in this code may sometimes refer to these references:
|
The comments in this code may sometimes refer to these references:
|
||||||
TD-MPC paper: Temporal Difference Learning for Model Predictive Control (https://huggingface.co/papers/2203.04955)
|
TD-MPC paper: Temporal Difference Learning for Model Predictive Control (https://arxiv.org/abs/2203.04955)
|
||||||
FOWM paper: Finetuning Offline World Models in the Real World (https://huggingface.co/papers/2310.16029)
|
FOWM paper: Finetuning Offline World Models in the Real World (https://arxiv.org/abs/2310.16029)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# ruff: noqa: N806
|
# ruff: noqa: N806
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ class VQBeTPolicy(PreTrainedPolicy):
|
|||||||
batch = dict(batch) # shallow copy so that adding a key doesn't modify the original
|
batch = dict(batch) # shallow copy so that adding a key doesn't modify the original
|
||||||
batch["observation.images"] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
|
batch["observation.images"] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
|
||||||
batch = self.normalize_targets(batch)
|
batch = self.normalize_targets(batch)
|
||||||
# VQ-BeT discretizes action using VQ-VAE before training BeT (please refer to section 3.2 in the VQ-BeT paper https://huggingface.co/papers/2403.03181)
|
# VQ-BeT discretizes action using VQ-VAE before training BeT (please refer to section 3.2 in the VQ-BeT paper https://arxiv.org/pdf/2403.03181)
|
||||||
if not self.vqbet.action_head.vqvae_model.discretized.item():
|
if not self.vqbet.action_head.vqvae_model.discretized.item():
|
||||||
# loss: total loss of training RVQ
|
# loss: total loss of training RVQ
|
||||||
# n_different_codes: how many of the total possible VQ codes are being used in single batch (how many of them have at least one encoder embedding as a nearest neighbor). This can be at most `vqvae_n_embed * number of layers of RVQ (=2)`.
|
# n_different_codes: how many of the total possible VQ codes are being used in single batch (how many of them have at least one encoder embedding as a nearest neighbor). This can be at most `vqvae_n_embed * number of layers of RVQ (=2)`.
|
||||||
@@ -185,7 +185,7 @@ class VQBeTPolicy(PreTrainedPolicy):
|
|||||||
class SpatialSoftmax(nn.Module):
|
class SpatialSoftmax(nn.Module):
|
||||||
"""
|
"""
|
||||||
Spatial Soft Argmax operation described in "Deep Spatial Autoencoders for Visuomotor Learning" by Finn et al.
|
Spatial Soft Argmax operation described in "Deep Spatial Autoencoders for Visuomotor Learning" by Finn et al.
|
||||||
(https://huggingface.co/papers/1509.06113). A minimal port of the robomimic implementation.
|
(https://arxiv.org/pdf/1509.06113). A minimal port of the robomimic implementation.
|
||||||
|
|
||||||
At a high level, this takes 2D feature maps (from a convnet/ViT) and returns the "center of mass"
|
At a high level, this takes 2D feature maps (from a convnet/ViT) and returns the "center of mass"
|
||||||
of activations of each channel, i.e., keypoints in the image space for the policy to focus on.
|
of activations of each channel, i.e., keypoints in the image space for the policy to focus on.
|
||||||
@@ -387,7 +387,7 @@ class VQBeTModel(nn.Module):
|
|||||||
|
|
||||||
# only extract the output tokens at the position of action query:
|
# only extract the output tokens at the position of action query:
|
||||||
# Behavior Transformer (BeT), and VQ-BeT are both sequence-to-sequence prediction models,
|
# Behavior Transformer (BeT), and VQ-BeT are both sequence-to-sequence prediction models,
|
||||||
# mapping sequential observation to sequential action (please refer to section 2.2 in BeT paper https://huggingface.co/papers/2206.11251).
|
# mapping sequential observation to sequential action (please refer to section 2.2 in BeT paper https://arxiv.org/pdf/2206.11251).
|
||||||
# Thus, it predicts a historical action sequence, in addition to current and future actions (predicting future actions : optional).
|
# Thus, it predicts a historical action sequence, in addition to current and future actions (predicting future actions : optional).
|
||||||
if len_additional_action_token > 0:
|
if len_additional_action_token > 0:
|
||||||
features = torch.cat(
|
features = torch.cat(
|
||||||
@@ -824,8 +824,8 @@ class VqVae(nn.Module):
|
|||||||
return einops.rearrange(output, "N (T A) -> N T A", A=self.config.action_feature.shape[0])
|
return einops.rearrange(output, "N (T A) -> N T A", A=self.config.action_feature.shape[0])
|
||||||
|
|
||||||
def get_code(self, state):
|
def get_code(self, state):
|
||||||
# in phase 2 of VQ-BeT training, we need a `ground truth labels of action data` to calculate the Focal loss for code prediction head. (please refer to section 3.3 in the paper https://huggingface.co/papers/2403.03181)
|
# in phase 2 of VQ-BeT training, we need a `ground truth labels of action data` to calculate the Focal loss for code prediction head. (please refer to section 3.3 in the paper https://arxiv.org/pdf/2403.03181)
|
||||||
# this function outputs the `GT code` of given action using frozen encoder and quantization layers. (please refer to Figure 2. in the paper https://huggingface.co/papers/2403.03181)
|
# this function outputs the `GT code` of given action using frozen encoder and quantization layers. (please refer to Figure 2. in the paper https://arxiv.org/pdf/2403.03181)
|
||||||
state = einops.rearrange(state, "N T A -> N (T A)")
|
state = einops.rearrange(state, "N T A -> N (T A)")
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
state_rep = self.encoder(state)
|
state_rep = self.encoder(state)
|
||||||
@@ -838,7 +838,7 @@ class VqVae(nn.Module):
|
|||||||
return state_vq, vq_code
|
return state_vq, vq_code
|
||||||
|
|
||||||
def vqvae_forward(self, state):
|
def vqvae_forward(self, state):
|
||||||
# This function passes the given data through Residual VQ with Encoder and Decoder. Please refer to section 3.2 in the paper https://huggingface.co/papers/2403.03181).
|
# This function passes the given data through Residual VQ with Encoder and Decoder. Please refer to section 3.2 in the paper https://arxiv.org/pdf/2403.03181).
|
||||||
state = einops.rearrange(state, "N T A -> N (T A)")
|
state = einops.rearrange(state, "N T A -> N (T A)")
|
||||||
# We start with passing action (or action chunk) at:t+n through the encoder ϕ.
|
# We start with passing action (or action chunk) at:t+n through the encoder ϕ.
|
||||||
state_rep = self.encoder(state)
|
state_rep = self.encoder(state)
|
||||||
|
|||||||
@@ -336,7 +336,7 @@ class ResidualVQ(nn.Module):
|
|||||||
"""
|
"""
|
||||||
Residual VQ is composed of multiple VectorQuantize layers.
|
Residual VQ is composed of multiple VectorQuantize layers.
|
||||||
|
|
||||||
Follows Algorithm 1. in https://huggingface.co/papers/2107.03312
|
Follows Algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf
|
||||||
"Residual Vector Quantizer (a.k.a. multi-stage vector quantizer [36]) cascades Nq layers of VQ as follows. The unquantized input vector is
|
"Residual Vector Quantizer (a.k.a. multi-stage vector quantizer [36]) cascades Nq layers of VQ as follows. The unquantized input vector is
|
||||||
passed through a first VQ and quantization residuals are computed. The residuals are then iteratively quantized by a sequence of additional
|
passed through a first VQ and quantization residuals are computed. The residuals are then iteratively quantized by a sequence of additional
|
||||||
Nq -1 vector quantizers, as described in Algorithm 1."
|
Nq -1 vector quantizers, as described in Algorithm 1."
|
||||||
@@ -1006,7 +1006,7 @@ def gumbel_sample(
|
|||||||
if not straight_through or temperature <= 0.0 or not training:
|
if not straight_through or temperature <= 0.0 or not training:
|
||||||
return ind, one_hot
|
return ind, one_hot
|
||||||
|
|
||||||
# use reinmax for better second-order accuracy - https://huggingface.co/papers/2304.08612
|
# use reinmax for better second-order accuracy - https://arxiv.org/abs/2304.08612
|
||||||
# algorithm 2
|
# algorithm 2
|
||||||
|
|
||||||
if reinmax:
|
if reinmax:
|
||||||
@@ -1156,7 +1156,7 @@ def batched_embedding(indices, embeds):
|
|||||||
|
|
||||||
|
|
||||||
def orthogonal_loss_fn(t):
|
def orthogonal_loss_fn(t):
|
||||||
# eq (2) from https://huggingface.co/papers/2112.00384
|
# eq (2) from https://arxiv.org/abs/2112.00384
|
||||||
h, n = t.shape[:2]
|
h, n = t.shape[:2]
|
||||||
normed_codes = F.normalize(t, p=2, dim=-1)
|
normed_codes = F.normalize(t, p=2, dim=-1)
|
||||||
cosine_sim = einsum("h i d, h j d -> h i j", normed_codes, normed_codes)
|
cosine_sim = einsum("h i d, h j d -> h i j", normed_codes, normed_codes)
|
||||||
|
|||||||
@@ -243,6 +243,11 @@ def control_loop(
|
|||||||
|
|
||||||
timestamp = 0
|
timestamp = 0
|
||||||
start_episode_t = time.perf_counter()
|
start_episode_t = time.perf_counter()
|
||||||
|
|
||||||
|
# Controls starts, if policy is given it needs cleaning up
|
||||||
|
if policy is not None:
|
||||||
|
policy.reset()
|
||||||
|
|
||||||
while timestamp < control_time_s:
|
while timestamp < control_time_s:
|
||||||
start_loop_t = time.perf_counter()
|
start_loop_t = time.perf_counter()
|
||||||
|
|
||||||
|
|||||||
@@ -63,13 +63,13 @@ dependencies = [
|
|||||||
"opencv-python-headless>=4.9.0",
|
"opencv-python-headless>=4.9.0",
|
||||||
"packaging>=24.2",
|
"packaging>=24.2",
|
||||||
"av>=14.2.0",
|
"av>=14.2.0",
|
||||||
"pymunk>=6.6.0",
|
"pymunk>=6.6.0,<7.0.0",
|
||||||
"pynput>=1.7.7",
|
"pynput>=1.7.7",
|
||||||
"pyzmq>=26.2.1",
|
"pyzmq>=26.2.1",
|
||||||
"rerun-sdk>=0.21.0",
|
"rerun-sdk>=0.21.0",
|
||||||
"termcolor>=2.4.0",
|
"termcolor>=2.4.0",
|
||||||
"torch>=2.2.1,<2.7",
|
"torch>=2.2.1",
|
||||||
"torchcodec==0.2.1; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')",
|
"torchcodec>=0.2.1; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')",
|
||||||
"torchvision>=0.21.0",
|
"torchvision>=0.21.0",
|
||||||
"wandb>=0.16.3",
|
"wandb>=0.16.3",
|
||||||
"zarr>=2.17.0",
|
"zarr>=2.17.0",
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:0389a716d51c1c615fb2a3bfa386d89f00b0deca08c4fa21b23e020a939d0213
|
oid sha256:6b1e600768a8771c5fe650e038a1193597e3810f032041b2a0d021e4496381c1
|
||||||
size 3686488
|
size 3686488
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ from lerobot.common.datasets.transforms import (
|
|||||||
from lerobot.common.utils.random_utils import seeded_context
|
from lerobot.common.utils.random_utils import seeded_context
|
||||||
|
|
||||||
ARTIFACT_DIR = Path("tests/artifacts/image_transforms")
|
ARTIFACT_DIR = Path("tests/artifacts/image_transforms")
|
||||||
DATASET_REPO_ID = "lerobot/aloha_mobile_shrimp"
|
DATASET_REPO_ID = "lerobot/aloha_static_cups_open"
|
||||||
|
|
||||||
|
|
||||||
def save_default_config_transform(original_frame: torch.Tensor, output_dir: Path):
|
def save_default_config_transform(original_frame: torch.Tensor, output_dir: Path):
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:0dc691503e7d90b2086bb408e89a65f772ce5ee6e3562ef8c127bcb09bd90851
|
oid sha256:9d4ebab73eabddc58879a4e770289d19e00a1a4cf2fa5fa33cd3a3246992bc90
|
||||||
size 40551392
|
size 40551392
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:cc67af1d60f95d84c98d6c9ebd648990e0f0705368bd6b72d2b39533950b0179
|
oid sha256:f3e4c8e85e146b043fd4e4984947c2a6f01627f174a19f18b5914cf690579d77
|
||||||
size 5104
|
size 5104
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:64518cf652105d15f5fd2cfc13d0681f66a4ec4797dc5d5dc2f7b0d91fe5dfd6
|
oid sha256:1a7a8b1a457149109f843c32bcbb047d09de2201847b9b79f7501b447f77ecf4
|
||||||
size 31672
|
size 31672
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:32b6d14fab4244b5140adb345e47f662b6739c04974e04b21c3127caa988abbb
|
oid sha256:5e6ce85296b2009e7c2060d336c0429b1c7197d9adb159e7df0ba18003067b36
|
||||||
size 68
|
size 68
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:e1904ef0338f7b6efdec70ec235ee931b5751008bf4eb433edb0b3fa0838a4f1
|
oid sha256:9b5f557e30aead3731c38cbd85af8c706395d8689a918ad88805b5a886245603
|
||||||
size 33400
|
size 33400
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:fa544a97f00bf46393a09b006b44c2499bbf7d177782360a8c21cacbf200c07a
|
oid sha256:2e6625cabfeb4800abc80252cf9112a9271c154edd01eb291658f143c951610b
|
||||||
size 515400
|
size 515400
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:83c7a8ae912300b5cedba31904f7ba22542059fd60dd86548a95e415713f719e
|
oid sha256:224b5fa4828aa88171b68c036e8919c1eae563e2113f03b6461eadf5bf8525a6
|
||||||
size 31672
|
size 31672
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:5a010633237b3a1141603c65174c551daa9e7b4c474af5a1376d73e5425bfb5d
|
oid sha256:016d2fa8fe5f58017dfd46f4632fdc19dfd751e32a2c7cde2077c6f95546d6bd
|
||||||
size 68
|
size 68
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:ec8b5c440e9fcec190c9be48b28ebb79f82ae63626afe7c811e4bb0c3dd08842
|
oid sha256:021562ee3e4814425e367ed0c144d6fbe2eb28838247085716cf0b58fd69a075
|
||||||
size 33400
|
size 33400
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
from torchvision.transforms import v2
|
from torchvision.transforms import v2
|
||||||
from torchvision.transforms.v2 import functional as F # noqa: N812
|
from torchvision.transforms.v2 import functional as F # noqa: N812
|
||||||
@@ -253,7 +254,14 @@ def test_backward_compatibility_single_transforms(
|
|||||||
|
|
||||||
|
|
||||||
@require_x86_64_kernel
|
@require_x86_64_kernel
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
version.parse(torch.__version__) < version.parse("2.7.0"),
|
||||||
|
reason="Test artifacts were generated with PyTorch >= 2.7.0 which has different multinomial behavior",
|
||||||
|
)
|
||||||
def test_backward_compatibility_default_config(img_tensor, default_transforms):
|
def test_backward_compatibility_default_config(img_tensor, default_transforms):
|
||||||
|
# NOTE: PyTorch versions have different randomness, it might break this test.
|
||||||
|
# See this PR: https://github.com/huggingface/lerobot/pull/1127.
|
||||||
|
|
||||||
cfg = ImageTransformsConfig(enable=True)
|
cfg = ImageTransformsConfig(enable=True)
|
||||||
default_tf = ImageTransforms(cfg)
|
default_tf = ImageTransforms(cfg)
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,6 @@ def test_diffuser_scheduler(optimizer):
|
|||||||
"base_lrs": [0.001],
|
"base_lrs": [0.001],
|
||||||
"last_epoch": 1,
|
"last_epoch": 1,
|
||||||
"lr_lambdas": [None],
|
"lr_lambdas": [None],
|
||||||
"verbose": False,
|
|
||||||
}
|
}
|
||||||
assert scheduler.state_dict() == expected_state_dict
|
assert scheduler.state_dict() == expected_state_dict
|
||||||
|
|
||||||
@@ -56,7 +55,6 @@ def test_vqbet_scheduler(optimizer):
|
|||||||
"base_lrs": [0.001],
|
"base_lrs": [0.001],
|
||||||
"last_epoch": 1,
|
"last_epoch": 1,
|
||||||
"lr_lambdas": [None],
|
"lr_lambdas": [None],
|
||||||
"verbose": False,
|
|
||||||
}
|
}
|
||||||
assert scheduler.state_dict() == expected_state_dict
|
assert scheduler.state_dict() == expected_state_dict
|
||||||
|
|
||||||
@@ -77,7 +75,6 @@ def test_cosine_decay_with_warmup_scheduler(optimizer):
|
|||||||
"base_lrs": [0.001],
|
"base_lrs": [0.001],
|
||||||
"last_epoch": 1,
|
"last_epoch": 1,
|
||||||
"lr_lambdas": [None],
|
"lr_lambdas": [None],
|
||||||
"verbose": False,
|
|
||||||
}
|
}
|
||||||
assert scheduler.state_dict() == expected_state_dict
|
assert scheduler.state_dict() == expected_state_dict
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from pathlib import Path
|
|||||||
import einops
|
import einops
|
||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
|
|
||||||
from lerobot import available_policies
|
from lerobot import available_policies
|
||||||
@@ -408,7 +409,16 @@ def test_backward_compatibility(ds_repo_id: str, policy_name: str, policy_kwargs
|
|||||||
4. Check that this test now passes.
|
4. Check that this test now passes.
|
||||||
5. Remember to restore `tests/scripts/save_policy_to_safetensors.py` to its original state.
|
5. Remember to restore `tests/scripts/save_policy_to_safetensors.py` to its original state.
|
||||||
6. Remember to stage and commit the resulting changes to `tests/artifacts`.
|
6. Remember to stage and commit the resulting changes to `tests/artifacts`.
|
||||||
|
|
||||||
|
NOTE: If the test does not pass, and you don't change the policy, it is likely that the test artifact
|
||||||
|
is out of date. For example, some PyTorch versions have different randomness, see this PR:
|
||||||
|
https://github.com/huggingface/lerobot/pull/1127.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# NOTE: ACT policy has different randomness, after PyTorch 2.7.0
|
||||||
|
if policy_name == "act" and version.parse(torch.__version__) < version.parse("2.7.0"):
|
||||||
|
pytest.skip(f"Skipping act policy test with PyTorch {torch.__version__}. Requires PyTorch >= 2.7.0")
|
||||||
|
|
||||||
ds_name = ds_repo_id.split("/")[-1]
|
ds_name = ds_repo_id.split("/")[-1]
|
||||||
artifact_dir = Path("tests/artifacts/policies") / f"{ds_name}_{policy_name}_{file_name_extra}"
|
artifact_dir = Path("tests/artifacts/policies") / f"{ds_name}_{policy_name}_{file_name_extra}"
|
||||||
saved_output_dict = load_file(artifact_dir / "output_dict.safetensors")
|
saved_output_dict = load_file(artifact_dir / "output_dict.safetensors")
|
||||||
|
|||||||
Reference in New Issue
Block a user