diff --git a/.github/workflows/pr_style_bot.yml b/.github/workflows/pr_style_bot.yml new file mode 100644 index 00000000..68530645 --- /dev/null +++ b/.github/workflows/pr_style_bot.yml @@ -0,0 +1,161 @@ +# Adapted from https://github.com/huggingface/diffusers/blob/main/.github/workflows/pr_style_bot.yml +name: PR Style Bot + +on: + issue_comment: + types: [created] + +permissions: {} + +env: + PYTHON_VERSION: "3.10" + +jobs: + check-permissions: + if: > + contains(github.event.comment.body, '@bot /style') && + github.event.issue.pull_request != null + runs-on: ubuntu-latest + outputs: + is_authorized: ${{ steps.check_user_permission.outputs.has_permission }} + steps: + - name: Check user permission + id: check_user_permission + uses: actions/github-script@v6 + with: + script: | + const comment_user = context.payload.comment.user.login; + const { data: permission } = await github.rest.repos.getCollaboratorPermissionLevel({ + owner: context.repo.owner, + repo: context.repo.repo, + username: comment_user + }); + + const authorized = + permission.permission === 'admin' || + permission.permission === 'write'; + + console.log( + `User ${comment_user} has permission level: ${permission.permission}, ` + + `authorized: ${authorized} (admins & maintainers allowed)` + ); + + core.setOutput('has_permission', authorized); + + run-style-bot: + needs: check-permissions + if: needs.check-permissions.outputs.is_authorized == 'true' + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - name: Extract PR details + id: pr_info + uses: actions/github-script@v6 + with: + script: | + const prNumber = context.payload.issue.number; + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber + }); + + // We capture both the branch ref and the "full_name" of the head repo + // so that we can check out the correct repository & branch (including forks). + core.setOutput("prNumber", prNumber); + core.setOutput("headRef", pr.head.ref); + core.setOutput("headRepoFullName", pr.head.repo.full_name); + + - name: Check out PR branch + uses: actions/checkout@v4 + env: + HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }} + HEADREF: ${{ steps.pr_info.outputs.headRef }} + with: + persist-credentials: true + # Instead of checking out the base repo, use the contributor's repo name + repository: ${{ env.HEADREPOFULLNAME }} + ref: ${{ env.HEADREF }} + # You may need fetch-depth: 0 for being able to push + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Debug + env: + HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }} + HEADREF: ${{ steps.pr_info.outputs.headRef }} + PRNUMBER: ${{ steps.pr_info.outputs.prNumber }} + run: | + echo "PR number: ${PRNUMBER}" + echo "Head Ref: ${HEADREF}" + echo "Head Repo Full Name: ${HEADREPOFULLNAME}" + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Get Ruff Version from pre-commit-config.yaml + id: get-ruff-version + run: | + RUFF_VERSION=$(awk '/repo: https:\/\/github.com\/astral-sh\/ruff-pre-commit/{flag=1;next}/rev:/{if(flag){print $2;exit}}' .pre-commit-config.yaml) + echo "ruff_version=${RUFF_VERSION}" >> $GITHUB_OUTPUT + + - name: Install Ruff + env: + RUFF_VERSION: ${{ steps.get-ruff-version.outputs.ruff_version }} + run: python -m pip install "ruff==${RUFF_VERSION}" + + - name: Ruff check + run: ruff check --fix + + - name: Ruff format + run: ruff format + + - name: Commit and push changes + id: commit_and_push + env: + HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }} + HEADREF: ${{ steps.pr_info.outputs.headRef }} + PRNUMBER: ${{ steps.pr_info.outputs.prNumber }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "HEADREPOFULLNAME: ${HEADREPOFULLNAME}, HEADREF: ${HEADREF}" + # Configure git with the Actions bot user + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git config --local lfs.https://github.com/.locksverify false + + # Make sure your 'origin' remote is set to the contributor's fork + git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${HEADREPOFULLNAME}.git" + + # If there are changes after running style/quality, commit them + if [ -n "$(git status --porcelain)" ]; then + git add . + git commit -m "Apply style fixes" + # Push to the original contributor's forked branch + git push origin HEAD:${HEADREF} + echo "changes_pushed=true" >> $GITHUB_OUTPUT + else + echo "No changes to commit." + echo "changes_pushed=false" >> $GITHUB_OUTPUT + fi + + - name: Comment on PR with workflow run link + if: steps.commit_and_push.outputs.changes_pushed == 'true' + uses: actions/github-script@v6 + with: + script: | + const prNumber = parseInt(process.env.prNumber, 10); + const runUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}` + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: `Style fixes have been applied. [View the workflow run here](${runUrl}).` + }); + env: + prNumber: ${{ steps.pr_info.outputs.prNumber }} diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index b42539e6..f785d52f 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -32,13 +32,27 @@ jobs: id: get-ruff-version run: | RUFF_VERSION=$(awk '/repo: https:\/\/github.com\/astral-sh\/ruff-pre-commit/{flag=1;next}/rev:/{if(flag){print $2;exit}}' .pre-commit-config.yaml) - echo "RUFF_VERSION=${RUFF_VERSION}" >> $GITHUB_ENV + echo "ruff_version=${RUFF_VERSION}" >> $GITHUB_OUTPUT - name: Install Ruff - run: python -m pip install "ruff==${{ env.RUFF_VERSION }}" + env: + RUFF_VERSION: ${{ steps.get-ruff-version.outputs.ruff_version }} + run: python -m pip install "ruff==${RUFF_VERSION}" - name: Ruff check run: ruff check --output-format=github - name: Ruff format run: ruff format --diff + + typos: + name: Typos + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: typos-action + uses: crate-ci/typos@v1.29.10 diff --git a/.github/workflows/test-docker-build.yml b/.github/workflows/test-docker-build.yml index 4d6e9ce5..3ee84a27 100644 --- a/.github/workflows/test-docker-build.yml +++ b/.github/workflows/test-docker-build.yml @@ -43,7 +43,7 @@ jobs: needs: get_changed_files runs-on: group: aws-general-8-plus - if: ${{ needs.get_changed_files.outputs.matrix }} != '' + if: needs.get_changed_files.outputs.matrix != '' strategy: fail-fast: false matrix: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 00b538e8..dba87705 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,7 @@ exclude: ^(tests/data) default_language_version: python: python3.10 repos: + ##### Style / Misc. ##### - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: @@ -13,21 +14,34 @@ repos: - id: check-toml - id: end-of-file-fixer - id: trailing-whitespace + - repo: https://github.com/crate-ci/typos + rev: v1.30.0 + hooks: + - id: typos + args: [--force-exclude] - repo: https://github.com/asottile/pyupgrade rev: v3.19.1 hooks: - id: pyupgrade - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.6 + rev: v0.9.9 hooks: - id: ruff args: [--fix] - id: ruff-format + + ##### Security ##### - repo: https://github.com/gitleaks/gitleaks - rev: v8.23.3 + rev: v8.24.0 hooks: - id: gitleaks - repo: https://github.com/woodruffw/zizmor-pre-commit - rev: v1.3.1 + rev: v1.4.1 hooks: - id: zizmor + - repo: https://github.com/PyCQA/bandit + rev: 1.8.3 + hooks: + - id: bandit + args: ["-c", "pyproject.toml"] + additional_dependencies: ["bandit[toml]"] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 61fa2eb9..8aff26d8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -228,7 +228,7 @@ Follow these steps to start contributing: git commit ``` - Note, if you already commited some changes that have a wrong formatting, you can use: + Note, if you already committed some changes that have a wrong formatting, you can use: ```bash pre-commit run --all-files ``` diff --git a/README.md b/README.md index 5125ace5..59929341 100644 --- a/README.md +++ b/README.md @@ -210,7 +210,7 @@ A `LeRobotDataset` is serialised using several widespread file formats for each - videos are stored in mp4 format to save space - metadata are stored in plain json/jsonl files -Dataset can be uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can use the `local_files_only` argument and specify its location with the `root` argument if it's not in the default `~/.cache/huggingface/lerobot` location. +Dataset can be uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can specify its location with the `root` argument if it's not in the default `~/.cache/huggingface/lerobot` location. ### Evaluate a pretrained policy diff --git a/benchmarks/video/README.md b/benchmarks/video/README.md index 56cd1d1e..49e49811 100644 --- a/benchmarks/video/README.md +++ b/benchmarks/video/README.md @@ -114,7 +114,7 @@ We tried to measure the most impactful parameters for both encoding and decoding Additional encoding parameters exist that are not included in this benchmark. In particular: - `-preset` which allows for selecting encoding presets. This represents a collection of options that will provide a certain encoding speed to compression ratio. By leaving this parameter unspecified, it is considered to be `medium` for libx264 and libx265 and `8` for libsvtav1. -- `-tune` which allows to optimize the encoding for certains aspects (e.g. film quality, fast decoding, etc.). +- `-tune` which allows to optimize the encoding for certain aspects (e.g. film quality, fast decoding, etc.). See the documentation mentioned above for more detailed info on these settings and for a more comprehensive list of other parameters. diff --git a/docker/lerobot-cpu/Dockerfile b/docker/lerobot-cpu/Dockerfile index 06673092..13a45d24 100644 --- a/docker/lerobot-cpu/Dockerfile +++ b/docker/lerobot-cpu/Dockerfile @@ -1,33 +1,29 @@ # Configure image ARG PYTHON_VERSION=3.10 - FROM python:${PYTHON_VERSION}-slim -ARG PYTHON_VERSION -ARG DEBIAN_FRONTEND=noninteractive -# Install apt dependencies +# Configure environment variables +ARG PYTHON_VERSION +ENV DEBIAN_FRONTEND=noninteractive +ENV MUJOCO_GL="egl" +ENV PATH="/opt/venv/bin:$PATH" + +# Install dependencies and set up Python in a single layer RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential cmake git git-lfs \ + build-essential cmake git \ libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ speech-dispatcher libgeos-dev \ - && apt-get clean && rm -rf /var/lib/apt/lists/* + && ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python \ + && python -m venv /opt/venv \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && echo "source /opt/venv/bin/activate" >> /root/.bashrc -# Create virtual environment -RUN ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python -RUN python -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" -RUN echo "source /opt/venv/bin/activate" >> /root/.bashrc - -# Install LeRobot -RUN git lfs install -RUN git clone https://github.com/huggingface/lerobot.git /lerobot +# Clone repository and install LeRobot in a single layer +COPY . /lerobot WORKDIR /lerobot -RUN pip install --upgrade --no-cache-dir pip -RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]" \ - --extra-index-url https://download.pytorch.org/whl/cpu - -# Set EGL as the rendering backend for MuJoCo -ENV MUJOCO_GL="egl" +RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \ + && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]" \ + --extra-index-url https://download.pytorch.org/whl/cpu # Execute in bash shell rather than python CMD ["/bin/bash"] diff --git a/docker/lerobot-gpu/Dockerfile b/docker/lerobot-gpu/Dockerfile index 65ca4377..642a8ded 100644 --- a/docker/lerobot-gpu/Dockerfile +++ b/docker/lerobot-gpu/Dockerfile @@ -1,31 +1,24 @@ FROM nvidia/cuda:12.4.1-base-ubuntu22.04 -# Configure image +# Configure environment variables ARG PYTHON_VERSION=3.10 -ARG DEBIAN_FRONTEND=noninteractive +ENV DEBIAN_FRONTEND=noninteractive +ENV MUJOCO_GL="egl" +ENV PATH="/opt/venv/bin:$PATH" - -# Install apt dependencies +# Install dependencies and set up Python in a single layer RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential cmake git git-lfs \ + build-essential cmake git \ libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ speech-dispatcher libgeos-dev \ python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ - && apt-get clean && rm -rf /var/lib/apt/lists/* + && ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python \ + && python -m venv /opt/venv \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && echo "source /opt/venv/bin/activate" >> /root/.bashrc - -# Create virtual environment -RUN ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python -RUN python -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" -RUN echo "source /opt/venv/bin/activate" >> /root/.bashrc - -# Install LeRobot -RUN git lfs install -RUN git clone https://github.com/huggingface/lerobot.git /lerobot +# Clone repository and install LeRobot in a single layer +COPY . /lerobot WORKDIR /lerobot -RUN pip install --upgrade --no-cache-dir pip -RUN pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]" - -# Set EGL as the rendering backend for MuJoCo -ENV MUJOCO_GL="egl" +RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \ + && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]" diff --git a/examples/10_use_so100.md b/examples/10_use_so100.md index 7912884c..b39a0239 100644 --- a/examples/10_use_so100.md +++ b/examples/10_use_so100.md @@ -335,7 +335,7 @@ python lerobot/scripts/control_robot.py \ --control.push_to_hub=true ``` -Note: You can resume recording by adding `--control.resume=true`. Also if you didn't push your dataset yet, add `--control.local_files_only=true`. +Note: You can resume recording by adding `--control.resume=true`. ## H. Visualize a dataset @@ -344,7 +344,7 @@ If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you c echo ${HF_USER}/so100_test ``` -If you didn't upload with `--control.push_to_hub=false`, you can also visualize it locally with: +If you didn't upload with `--control.push_to_hub=false`, you can also visualize it locally with (a window can be opened in the browser `http://127.0.0.1:9090` with the visualization tool): ```bash python lerobot/scripts/visualize_dataset_html.py \ --repo-id ${HF_USER}/so100_test \ @@ -363,8 +363,6 @@ python lerobot/scripts/control_robot.py \ --control.episode=0 ``` -Note: If you didn't push your dataset yet, add `--control.local_files_only=true`. - ## J. Train a policy To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command: @@ -378,8 +376,6 @@ python lerobot/scripts/train.py \ --wandb.enable=true ``` -Note: If you didn't push your dataset yet, add `--control.local_files_only=true`. - Let's explain it: 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/so100_test`. 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset. @@ -416,4 +412,4 @@ As you can see, it's almost the same command as previously used to record your t Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth tutorial on controlling real robots with LeRobot. > [!TIP] -> If you have any questions or need help, please reach out on Discord in the channel [`#so100-arm`](https://discord.com/channels/1216765309076115607/1237741463832363039). +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb) in the channel [`#so100-arm`](https://discord.com/channels/1216765309076115607/1237741463832363039). diff --git a/examples/11_use_lekiwi.md b/examples/11_use_lekiwi.md new file mode 100644 index 00000000..224a1854 --- /dev/null +++ b/examples/11_use_lekiwi.md @@ -0,0 +1,463 @@ +# Using the [LeKiwi](https://github.com/SIGRobotics-UIUC/LeKiwi) Robot with LeRobot + +## Table of Contents + + - [A. Source the parts](#a-source-the-parts) + - [B. Install software Pi](#b-install-software-on-pi) + - [C. Setup LeRobot laptop/pc](#c-install-lerobot-on-laptop) + - [D. Assemble the arms](#d-assembly) + - [E. Calibrate](#e-calibration) + - [F. Teleoperate](#f-teleoperate) + - [G. Record a dataset](#g-record-a-dataset) + - [H. Visualize a dataset](#h-visualize-a-dataset) + - [I. Replay an episode](#i-replay-an-episode) + - [J. Train a policy](#j-train-a-policy) + - [K. Evaluate your policy](#k-evaluate-your-policy) + +> [!TIP] +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb) in the channel [`#mobile-so-100-arm`](https://discord.com/channels/1216765309076115607/1318390825528332371). + +## A. Source the parts + +Follow this [README](https://github.com/SIGRobotics-UIUC/LeKiwi). It contains the bill of materials, with a link to source the parts, as well as the instructions to 3D print the parts, and advice if it's your first time printing or if you don't own a 3D printer. + +Before assembling, you will first need to configure your motors. To this end, we provide a nice script, so let's first install LeRobot. After configuration, we will also guide you through assembly. + +## B. Install software on Pi +Now we have to setup the remote PC that will run on the LeKiwi Robot. This is normally a Raspberry Pi, but can be any PC that can run on 5V and has enough usb ports (2 or more) for the cameras and motor control board. + +### Install OS +For setting up the Raspberry Pi and its SD-card see: [Setup PI](https://www.raspberrypi.com/documentation/computers/getting-started.html). Here is explained how to download the [Imager](https://www.raspberrypi.com/software/) to install Raspberry Pi OS or Ubuntu. + +### Setup SSH +After setting up your Pi, you should enable and setup [SSH](https://www.raspberrypi.com/news/coding-on-raspberry-pi-remotely-with-visual-studio-code/) (Secure Shell Protocol) so you can login into the Pi from your laptop without requiring a screen, keyboard and mouse in the Pi. A great tutorial on how to do this can be found [here](https://www.raspberrypi.com/documentation/computers/remote-access.html#ssh). Logging into your Pi can be done in your Command Prompt (cmd) or if you use VSCode you can use [this](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-ssh) extension. + +### Install LeRobot + +On your Raspberry Pi: + +#### 1. [Install Miniconda](https://docs.anaconda.com/miniconda/install/#quick-command-line-install): + +#### 2. Restart shell +Copy paste in your shell: `source ~/.bashrc` or for Mac: `source ~/.bash_profile` or `source ~/.zshrc` if you're using zshell + +#### 3. Create and activate a fresh conda environment for lerobot + +
+Video install instructions + + + +
+ +```bash +conda create -y -n lerobot python=3.10 +``` + +Then activate your conda environment (do this each time you open a shell to use lerobot!): +```bash +conda activate lerobot +``` + +#### 4. Clone LeRobot: +```bash +git clone https://github.com/huggingface/lerobot.git ~/lerobot +``` + +#### 5. Install LeRobot with dependencies for the feetech motors: +```bash +cd ~/lerobot && pip install -e ".[feetech]" +``` + +## C. Install LeRobot on laptop +If you already have install LeRobot on your laptop you can skip this step, otherwise please follow along as we do the same steps we did on the Pi. + +> [!TIP] +> We use the Command Prompt (cmd) quite a lot. If you are not comfortable using the cmd or want to brush up using the command line you can have a look here: [Command line crash course](https://developer.mozilla.org/en-US/docs/Learn_web_development/Getting_started/Environment_setup/Command_line) + +On your computer: + +#### 1. [Install Miniconda](https://docs.anaconda.com/miniconda/install/#quick-command-line-install): + +#### 2. Restart shell +Copy paste in your shell: `source ~/.bashrc` or for Mac: `source ~/.bash_profile` or `source ~/.zshrc` if you're using zshell + +#### 3. Create and activate a fresh conda environment for lerobot + +
+Video install instructions + + + +
+ +```bash +conda create -y -n lerobot python=3.10 +``` + +Then activate your conda environment (do this each time you open a shell to use lerobot!): +```bash +conda activate lerobot +``` + +#### 4. Clone LeRobot: +```bash +git clone https://github.com/huggingface/lerobot.git ~/lerobot +``` + +#### 5. Install LeRobot with dependencies for the feetech motors: +```bash +cd ~/lerobot && pip install -e ".[feetech]" +``` + +*EXTRA: For Linux only (not Mac)*: install extra dependencies for recording datasets: +```bash +conda install -y -c conda-forge ffmpeg +pip uninstall -y opencv-python +conda install -y -c conda-forge "opencv>=4.10.0" +``` +Great :hugs:! You are now done installing LeRobot and we can begin assembling the SO100 arms and Mobile base :robot:. +Every time you now want to use LeRobot you can go to the `~/lerobot` folder where we installed LeRobot and run one of the commands. + +# D. Assembly + +First we will assemble the two SO100 arms. One to attach to the mobile base and one for teleoperation. Then we will assemble the mobile base. + +## SO100 Arms +### Configure motors +The instructions for configuring the motors can be found [Here](https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md#c-configure-the-motors) in step C of the SO100 tutorial. Besides the ID's for the arm motors we also need to set the motor ID's for the mobile base. These needs to be in a specific order to work. Below an image of the motor ID's and motor mounting positions for the mobile base. Note that we only use one Motor Control board on LeKiwi. This means the motor ID's for the wheels are 7, 8 and 9. + +Motor ID's for mobile robot + +### Assemble arms +[Assemble arms instruction](https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md#d-assemble-the-arms) + +## Mobile base (LeKiwi) +[Assemble LeKiwi](https://github.com/SIGRobotics-UIUC/LeKiwi) + +### Update config +Both config files on the LeKiwi LeRobot and on the laptop should be the same. First we should find the Ip address of the Raspberry Pi of the mobile manipulator. This is the same Ip address used in SSH. We also need the usb port of the control board of the leader arm on the laptop and the port of the control board on LeKiwi. We can find these ports with the following script. + +#### a. Run the script to find port + +
+Video finding port + + +
+ +To find the port for each bus servo adapter, run the utility script: +```bash +python lerobot/scripts/find_motors_bus_port.py +``` + +#### b. Example outputs + +Example output when identifying the leader arm's port (e.g., `/dev/tty.usbmodem575E0031751` on Mac, or possibly `/dev/ttyACM0` on Linux): +``` +Finding all available ports for the MotorBus. +['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751'] +Remove the usb cable from your DynamixelMotorsBus and press Enter when done. + +[...Disconnect leader arm and press Enter...] + +The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0031751 +Reconnect the usb cable. +``` +Example output when identifying the follower arm's port (e.g., `/dev/tty.usbmodem575E0032081`, or possibly `/dev/ttyACM1` on Linux): +``` +Finding all available ports for the MotorBus. +['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751'] +Remove the usb cable from your DynamixelMotorsBus and press Enter when done. + +[...Disconnect follower arm and press Enter...] + +The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0032081 +Reconnect the usb cable. +``` + +#### c. Troubleshooting +On Linux, you might need to give access to the USB ports by running: +```bash +sudo chmod 666 /dev/ttyACM0 +sudo chmod 666 /dev/ttyACM1 +``` + +#### d. Update config file + +IMPORTANTLY: Now that you have your ports of leader and follower arm and ip address of the mobile-so100, update the **ip** in Network configuration, **port** in leader_arms and **port** in lekiwi. In the [`LeKiwiRobotConfig`](../lerobot/common/robot_devices/robots/configs.py) file. Where you will find something like: +```python +@RobotConfig.register_subclass("lekiwi") +@dataclass +class LeKiwiRobotConfig(RobotConfig): + # `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes. + # Set this to a positive scalar to have the same value for all motors, or a list that is the same length as + # the number of motors in your follower arms. + max_relative_target: int | None = None + + # Network Configuration + ip: str = "172.17.133.91" + port: int = 5555 + video_port: int = 5556 + + cameras: dict[str, CameraConfig] = field( + default_factory=lambda: { + "mobile": OpenCVCameraConfig(camera_index="/dev/video0", fps=30, width=640, height=480), + "mobile2": OpenCVCameraConfig(camera_index="/dev/video2", fps=30, width=640, height=480), + } + ) + + calibration_dir: str = ".cache/calibration/lekiwi" + + leader_arms: dict[str, MotorsBusConfig] = field( + default_factory=lambda: { + "main": FeetechMotorsBusConfig( + port="/dev/tty.usbmodem585A0077581", + motors={ + # name: (index, model) + "shoulder_pan": [1, "sts3215"], + "shoulder_lift": [2, "sts3215"], + "elbow_flex": [3, "sts3215"], + "wrist_flex": [4, "sts3215"], + "wrist_roll": [5, "sts3215"], + "gripper": [6, "sts3215"], + }, + ), + } + ) + + follower_arms: dict[str, MotorsBusConfig] = field( + default_factory=lambda: { + "main": FeetechMotorsBusConfig( + port="/dev/ttyACM0", + motors={ + # name: (index, model) + "shoulder_pan": [1, "sts3215"], + "shoulder_lift": [2, "sts3215"], + "elbow_flex": [3, "sts3215"], + "wrist_flex": [4, "sts3215"], + "wrist_roll": [5, "sts3215"], + "gripper": [6, "sts3215"], + "left_wheel": (7, "sts3215"), + "back_wheel": (8, "sts3215"), + "right_wheel": (9, "sts3215"), + }, + ), + } + ) + + mock: bool = False +``` + +# E. Calibration +Now we have to calibrate the leader arm and the follower arm. The wheel motors don't have to be calibrated. + + +### Calibrate follower arm (on mobile base) +> [!IMPORTANT] +> Contrarily to step 6 of the [assembly video](https://youtu.be/FioA2oeFZ5I?t=724) which illustrates the auto calibration, we will actually do manual calibration of follower for now. + +You will need to move the follower arm to these positions sequentially: + +| 1. Zero position | 2. Rotated position | 3. Rest position | +|---|---|---| +| SO-100 follower arm zero position | SO-100 follower arm rotated position | SO-100 follower arm rest position | + +Make sure the arm is connected to the Raspberry Pi and run this script (on the Raspberry Pi) to launch manual calibration: +```bash +python lerobot/scripts/control_robot.py \ + --robot.type=lekiwi \ + --robot.cameras='{}' \ + --control.type=calibrate \ + --control.arms='["main_follower"]' +``` + +### Calibrate leader arm +Then to calibrate the leader arm (which is attached to the laptop/pc). You will need to move the leader arm to these positions sequentially: + +| 1. Zero position | 2. Rotated position | 3. Rest position | +|---|---|---| +| SO-100 leader arm zero position | SO-100 leader arm rotated position | SO-100 leader arm rest position | + +Run this script (on your laptop/pc) to launch manual calibration: +```bash +python lerobot/scripts/control_robot.py \ + --robot.type=lekiwi \ + --robot.cameras='{}' \ + --control.type=calibrate \ + --control.arms='["main_leader"]' +``` + +# F. Teleoperate +To teleoperate SSH into your Raspberry Pi, and run `conda activate lerobot` and this script: +```bash +python lerobot/scripts/control_robot.py \ + --robot.type=lekiwi \ + --control.type=remote_robot +``` + +Then on your laptop, also run `conda activate lerobot` and this script: +```bash +python lerobot/scripts/control_robot.py \ + --robot.type=lekiwi \ + --control.type=teleoperate \ + --control.fps=30 +``` + +You should see on your laptop something like this: ```[INFO] Connected to remote robot at tcp://172.17.133.91:5555 and video stream at tcp://172.17.133.91:5556.``` Now you can move the leader arm and use the keyboard (w,a,s,d) to drive forward, left, backwards, right. And use (z,x) to turn left or turn right. You can use (r,f) to increase and decrease the speed of the mobile robot. There are three speed modes, see the table below: +| Speed Mode | Linear Speed (m/s) | Rotation Speed (deg/s) | +|------------|-------------------|-----------------------| +| Fast | 0.4 | 90 | +| Medium | 0.25 | 60 | +| Slow | 0.1 | 30 | + + +| Key | Action | +|------|--------------------------------| +| W | Move forward | +| A | Move left | +| S | Move backward | +| D | Move right | +| Z | Turn left | +| X | Turn right | +| R | Increase speed | +| F | Decrease speed | + +> [!TIP] +> If you use a different keyboard you can change the keys for each command in the [`LeKiwiRobotConfig`](../lerobot/common/robot_devices/robots/configs.py). + +## Troubleshoot communication + +If you are having trouble connecting to the Mobile SO100, follow these steps to diagnose and resolve the issue. + +### 1. Verify IP Address Configuration +Make sure that the correct ip for the Pi is set in the configuration file. To check the Raspberry Pi's IP address, run (on the Pi command line): +```bash +hostname -I +``` + +### 2. Check if Pi is reachable from laptop/pc +Try pinging the Raspberry Pi from your laptop: +```bach +ping +``` + +If the ping fails: +- Ensure the Pi is powered on and connected to the same network. +- Check if SSH is enabled on the Pi. + +### 3. Try SSH connection +If you can't SSH into the Pi, it might not be properly connected. Use: +```bash +ssh @ +``` +If you get a connection error: +- Ensure SSH is enabled on the Pi by running: + ```bash + sudo raspi-config + ``` + Then navigate to: **Interfacing Options -> SSH** and enable it. + +### 4. Same config file +Make sure the configuration file on both your laptop/pc and the Raspberry Pi is the same. + +# G. Record a dataset +Once you're familiar with teleoperation, you can record your first dataset with LeKiwi. + +If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens): +```bash +huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential +``` + +Store your Hugging Face repository name in a variable to run these commands: +```bash +HF_USER=$(huggingface-cli whoami | head -n 1) +echo $HF_USER +``` + +Record 2 episodes and upload your dataset to the hub: +```bash +python lerobot/scripts/control_robot.py \ + --robot.type=lekiwi \ + --control.type=record \ + --control.fps=30 \ + --control.single_task="Grasp a lego block and put it in the bin." \ + --control.repo_id=${HF_USER}/lekiwi_test \ + --control.tags='["tutorial"]' \ + --control.warmup_time_s=5 \ + --control.episode_time_s=30 \ + --control.reset_time_s=30 \ + --control.num_episodes=2 \ + --control.push_to_hub=true +``` + +Note: You can resume recording by adding `--control.resume=true`. + +# H. Visualize a dataset + +If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by: +```bash +echo ${HF_USER}/lekiwi_test +``` + +If you didn't upload with `--control.push_to_hub=false`, you can also visualize it locally with (a window can be opened in the browser `http://127.0.0.1:9090` with the visualization tool): +```bash +python lerobot/scripts/visualize_dataset_html.py \ + --repo-id ${HF_USER}/lekiwi_test \ + --local-files-only 1 +``` + +# I. Replay an episode +Now try to replay the first episode on your robot: +```bash +python lerobot/scripts/control_robot.py \ + --robot.type=lekiwi \ + --control.type=replay \ + --control.fps=30 \ + --control.repo_id=${HF_USER}/lekiwi_test \ + --control.episode=0 +``` + +## J. Train a policy + +To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command: +```bash +python lerobot/scripts/train.py \ + --dataset.repo_id=${HF_USER}/lekiwi_test \ + --policy.type=act \ + --output_dir=outputs/train/act_lekiwi_test \ + --job_name=act_lekiwi_test \ + --device=cuda \ + --wandb.enable=true +``` + +Let's explain it: +1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/lekiwi_test`. +2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset. +4. We provided `device=cuda` since we are training on a Nvidia GPU, but you could use `device=mps` to train on Apple silicon. +5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`. + +Training should take several hours. You will find checkpoints in `outputs/train/act_lekiwi_test/checkpoints`. + +## K. Evaluate your policy + +You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes: +```bash +python lerobot/scripts/control_robot.py \ + --robot.type=lekiwi \ + --control.type=record \ + --control.fps=30 \ + --control.single_task="Drive to the red block and pick it up" \ + --control.repo_id=${HF_USER}/eval_act_lekiwi_test \ + --control.tags='["tutorial"]' \ + --control.warmup_time_s=5 \ + --control.episode_time_s=30 \ + --control.reset_time_s=30 \ + --control.num_episodes=10 \ + --control.push_to_hub=true \ + --control.policy.path=outputs/train/act_lekiwi_test/checkpoints/last/pretrained_model +``` + +As you can see, it's almost the same command as previously used to record your training dataset. Two things changed: +1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_lekiwi_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_lekiwi_test`). +2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_lekiwi_test`). diff --git a/examples/11_use_moss.md b/examples/11_use_moss.md index e35ba9b2..67f8157e 100644 --- a/examples/11_use_moss.md +++ b/examples/11_use_moss.md @@ -2,7 +2,7 @@ This tutorial explains how to use [Moss v1](https://github.com/jess-moss/moss-ro ## Source the parts -Follow this [README](https://github.com/jess-moss/moss-robot-arms). It contains the bill of materials, with link to source the parts, as well as the instructions to 3D print the parts, and advices if it's your first time printing or if you don't own a 3D printer already. +Follow this [README](https://github.com/jess-moss/moss-robot-arms). It contains the bill of materials with link to source the parts, as well as the instructions to 3D print the parts and advice if it's your first time printing or if you don't own a 3D printer already. **Important**: Before assembling, you will first need to configure your motors. To this end, we provide a nice script, so let's first install LeRobot. After configuration, we will also guide you through assembly. @@ -256,7 +256,7 @@ python lerobot/scripts/control_robot.py \ --control.push_to_hub=true ``` -Note: You can resume recording by adding `--control.resume=true`. Also if you didn't push your dataset yet, add `--control.local_files_only=true`. +Note: You can resume recording by adding `--control.resume=true`. ## Visualize a dataset @@ -284,8 +284,6 @@ python lerobot/scripts/control_robot.py \ --control.episode=0 ``` -Note: If you didn't push your dataset yet, add `--control.local_files_only=true`. - ## Train a policy To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command: @@ -299,8 +297,6 @@ python lerobot/scripts/train.py \ --wandb.enable=true ``` -Note: If you didn't push your dataset yet, add `--control.local_files_only=true`. - Let's explain it: 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/moss_test`. 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset. diff --git a/examples/7_get_started_with_real_robot.md b/examples/7_get_started_with_real_robot.md index e57d783a..f4bef69c 100644 --- a/examples/7_get_started_with_real_robot.md +++ b/examples/7_get_started_with_real_robot.md @@ -398,7 +398,7 @@ And here are the corresponding positions for the leader arm: You can watch a [video tutorial of the calibration procedure](https://youtu.be/8drnU9uRY24) for more details. -During calibration, we count the number of full 360-degree rotations your motors have made since they were first used. That's why we ask yo to move to this arbitrary "zero" position. We don't actually "set" the zero position, so you don't need to be accurate. After calculating these "offsets" to shift the motor values around 0, we need to assess the rotation direction of each motor, which might differ. That's why we ask you to rotate all motors to roughly 90 degrees, to mesure if the values changed negatively or positively. +During calibration, we count the number of full 360-degree rotations your motors have made since they were first used. That's why we ask yo to move to this arbitrary "zero" position. We don't actually "set" the zero position, so you don't need to be accurate. After calculating these "offsets" to shift the motor values around 0, we need to assess the rotation direction of each motor, which might differ. That's why we ask you to rotate all motors to roughly 90 degrees, to measure if the values changed negatively or positively. Finally, the rest position ensures that the follower and leader arms are roughly aligned after calibration, preventing sudden movements that could damage the motors when starting teleoperation. @@ -626,7 +626,7 @@ Finally, run this code to instantiate and connectyour camera: from lerobot.common.robot_devices.cameras.configs import OpenCVCameraConfig from lerobot.common.robot_devices.cameras.opencv import OpenCVCamera -camera_config = OpenCVCameraConfig(camera_index=0) +config = OpenCVCameraConfig(camera_index=0) camera = OpenCVCamera(config) camera.connect() color_image = camera.read() @@ -663,18 +663,20 @@ camera.disconnect() **Instantiate your robot with cameras** -Additionaly, you can set up your robot to work with your cameras. +Additionally, you can set up your robot to work with your cameras. Modify the following Python code with the appropriate camera names and configurations: ```python robot = ManipulatorRobot( - leader_arms={"main": leader_arm}, - follower_arms={"main": follower_arm}, - calibration_dir=".cache/calibration/koch", - cameras={ - "laptop": OpenCVCameraConfig(0, fps=30, width=640, height=480), - "phone": OpenCVCameraConfig(1, fps=30, width=640, height=480), - }, + KochRobotConfig( + leader_arms={"main": leader_arm}, + follower_arms={"main": follower_arm}, + calibration_dir=".cache/calibration/koch", + cameras={ + "laptop": OpenCVCameraConfig(0, fps=30, width=640, height=480), + "phone": OpenCVCameraConfig(1, fps=30, width=640, height=480), + }, + ) ) robot.connect() ``` @@ -711,7 +713,7 @@ python lerobot/scripts/control_robot.py \ You will see a lot of lines appearing like this one: ``` -INFO 2024-08-10 11:15:03 ol_robot.py:209 dt: 5.12 (195.1hz) dtRlead: 4.93 (203.0hz) dtRfoll: 0.19 (5239.0hz) +INFO 2024-08-10 11:15:03 ol_robot.py:209 dt: 5.12 (195.1hz) dtRlead: 4.93 (203.0hz) dtWfoll: 0.19 (5239.0hz) ``` It contains @@ -768,7 +770,7 @@ You can use the `record` function from [`lerobot/scripts/control_robot.py`](../l 1. Frames from cameras are saved on disk in threads, and encoded into videos at the end of each episode recording. 2. Video streams from cameras are displayed in window so that you can verify them. 3. Data is stored with [`LeRobotDataset`](../lerobot/common/datasets/lerobot_dataset.py) format which is pushed to your Hugging Face page (unless `--control.push_to_hub=false` is provided). -4. Checkpoints are done during recording, so if any issue occurs, you can resume recording by re-running the same command again with `--control.resume=true`. You might need to add `--control.local_files_only=true` if your dataset was not uploaded to hugging face hub. Also you will need to manually delete the dataset directory to start recording from scratch. +4. Checkpoints are done during recording, so if any issue occurs, you can resume recording by re-running the same command again with `--control.resume=true`. You will need to manually delete the dataset directory if you want to start recording from scratch. 5. Set the flow of data recording using command line arguments: - `--control.warmup_time_s=10` defines the number of seconds before starting data collection. It allows the robot devices to warmup and synchronize (10 seconds by default). - `--control.episode_time_s=60` defines the number of seconds for data recording for each episode (60 seconds by default). @@ -823,8 +825,8 @@ It contains: - `dtRlead: 5.06 (197.5hz)` which is the delta time of reading the present position of the leader arm. - `dtWfoll: 0.25 (3963.7hz)` which is the delta time of writing the goal position on the follower arm ; writing is asynchronous so it takes less time than reading. - `dtRfoll: 6.22 (160.7hz)` which is the delta time of reading the present position on the follower arm. -- `dtRlaptop:32.57 (30.7hz) ` which is the delta time of capturing an image from the laptop camera in the thread running asynchrously. -- `dtRphone:33.84 (29.5hz)` which is the delta time of capturing an image from the phone camera in the thread running asynchrously. +- `dtRlaptop:32.57 (30.7hz) ` which is the delta time of capturing an image from the laptop camera in the thread running asynchronously. +- `dtRphone:33.84 (29.5hz)` which is the delta time of capturing an image from the phone camera in the thread running asynchronously. Troubleshooting: - On Linux, if you encounter a hanging issue when using cameras, uninstall opencv and re-install it with conda: @@ -844,7 +846,7 @@ At the end of data recording, your dataset will be uploaded on your Hugging Face echo https://huggingface.co/datasets/${HF_USER}/koch_test ``` -### b. Advices for recording dataset +### b. Advice for recording dataset Once you're comfortable with data recording, it's time to create a larger dataset for training. A good starting task is grasping an object at different locations and placing it in a bin. We suggest recording at least 50 episodes, with 10 episodes per location. Keep the cameras fixed and maintain consistent grasping behavior throughout the recordings. @@ -883,8 +885,6 @@ python lerobot/scripts/control_robot.py \ --control.episode=0 ``` -Note: You might need to add `--control.local_files_only=true` if your dataset was not uploaded to hugging face hub. - Your robot should replicate movements similar to those you recorded. For example, check out [this video](https://x.com/RemiCadene/status/1793654950905680090) where we use `replay` on a Aloha robot from [Trossen Robotics](https://www.trossenrobotics.com). ## 4. Train a policy on your data @@ -902,8 +902,6 @@ python lerobot/scripts/train.py \ --wandb.enable=true ``` -Note: You might need to add `--dataset.local_files_only=true` if your dataset was not uploaded to hugging face hub. - Let's explain it: 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/koch_test`. 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset. diff --git a/examples/8_use_stretch.md b/examples/8_use_stretch.md index 2f8c0ffb..802ea718 100644 --- a/examples/8_use_stretch.md +++ b/examples/8_use_stretch.md @@ -98,7 +98,7 @@ python lerobot/scripts/control_robot.py \ ``` This is equivalent to running `stretch_robot_home.py` -> **Note:** If you run any of the LeRobot scripts below and Stretch is not poperly homed, it will automatically home/calibrate first. +> **Note:** If you run any of the LeRobot scripts below and Stretch is not properly homed, it will automatically home/calibrate first. **Teleoperate** Before trying teleoperation, you need activate the gamepad controller by pressing the middle button. For more info, see Stretch's [doc](https://docs.hello-robot.com/0.3/getting_started/hello_robot/#gamepad-teleoperation). diff --git a/examples/9_use_aloha.md b/examples/9_use_aloha.md index d74c8b7a..055551f0 100644 --- a/examples/9_use_aloha.md +++ b/examples/9_use_aloha.md @@ -2,7 +2,7 @@ This tutorial explains how to use [Aloha and Aloha 2 stationary](https://www.tro ## Setup -Follow the [documentation from Trossen Robotics](https://docs.trossenrobotics.com/aloha_docs/getting_started/stationary/hardware_setup.html) for setting up the hardware and plugging the 4 arms and 4 cameras to your computer. +Follow the [documentation from Trossen Robotics](https://docs.trossenrobotics.com/aloha_docs/2.0/getting_started/stationary/hardware_setup.html) for setting up the hardware and plugging the 4 arms and 4 cameras to your computer. ## Install LeRobot @@ -172,10 +172,10 @@ python lerobot/scripts/control_robot.py \ As you can see, it's almost the same command as previously used to record your training dataset. Two things changed: 1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_aloha_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_aloha_test`). 2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_aloha_test`). -3. We use `--control.num_image_writer_processes=1` instead of the default value (`0`). On our computer, using a dedicated process to write images from the 4 cameras on disk allows to reach constent 30 fps during inference. Feel free to explore different values for `--control.num_image_writer_processes`. +3. We use `--control.num_image_writer_processes=1` instead of the default value (`0`). On our computer, using a dedicated process to write images from the 4 cameras on disk allows to reach constant 30 fps during inference. Feel free to explore different values for `--control.num_image_writer_processes`. ## More -Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth explaination. +Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth explanation. If you have any question or need help, please reach out on Discord in the channel `#aloha-arm`. diff --git a/examples/port_datasets/pusht_zarr.py b/examples/port_datasets/pusht_zarr.py index 53855ec8..eac6f63d 100644 --- a/examples/port_datasets/pusht_zarr.py +++ b/examples/port_datasets/pusht_zarr.py @@ -2,7 +2,6 @@ import shutil from pathlib import Path import numpy as np -import torch from huggingface_hub import HfApi from lerobot.common.constants import HF_LEROBOT_HOME @@ -91,9 +90,9 @@ def calculate_coverage(zarr_data): num_frames = len(block_pos) - coverage = np.zeros((num_frames,)) + coverage = np.zeros((num_frames,), dtype=np.float32) # 8 keypoints with 2 coords each - keypoints = np.zeros((num_frames, 16)) + keypoints = np.zeros((num_frames, 16), dtype=np.float32) # Set x, y, theta (in radians) goal_pos_angle = np.array([256, 256, np.pi / 4]) @@ -119,7 +118,7 @@ def calculate_coverage(zarr_data): intersection_area = goal_geom.intersection(block_geom).area goal_area = goal_geom.area coverage[i] = intersection_area / goal_area - keypoints[i] = torch.from_numpy(PushTEnv.get_keypoints(block_shapes).flatten()) + keypoints[i] = PushTEnv.get_keypoints(block_shapes).flatten() return coverage, keypoints @@ -181,20 +180,21 @@ def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = T for frame_idx in range(num_frames): i = from_idx + frame_idx + idx = i + (frame_idx < num_frames - 1) frame = { - "action": torch.from_numpy(action[i]), + "action": action[i], # Shift reward and success by +1 until the last item of the episode - "next.reward": reward[i + (frame_idx < num_frames - 1)], - "next.success": success[i + (frame_idx < num_frames - 1)], + "next.reward": reward[idx : idx + 1], + "next.success": success[idx : idx + 1], "task": PUSHT_TASK, } - frame["observation.state"] = torch.from_numpy(agent_pos[i]) + frame["observation.state"] = agent_pos[i] if mode == "keypoints": - frame["observation.environment_state"] = torch.from_numpy(keypoints[i]) + frame["observation.environment_state"] = keypoints[i] else: - frame["observation.image"] = torch.from_numpy(image[i]) + frame["observation.image"] = image[i] dataset.add_frame(frame) diff --git a/lerobot/common/datasets/backward_compatibility.py b/lerobot/common/datasets/backward_compatibility.py index aa814549..d1b8926a 100644 --- a/lerobot/common/datasets/backward_compatibility.py +++ b/lerobot/common/datasets/backward_compatibility.py @@ -33,8 +33,22 @@ If you encounter a problem, contact LeRobot maintainers on [Discord](https://dis or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose). """ +FUTURE_MESSAGE = """ +The dataset you requested ({repo_id}) is only available in {version} format. +As we cannot ensure forward compatibility with it, please update your current version of lerobot. +""" -class BackwardCompatibilityError(Exception): + +class CompatibilityError(Exception): ... + + +class BackwardCompatibilityError(CompatibilityError): def __init__(self, repo_id: str, version: packaging.version.Version): message = V2_MESSAGE.format(repo_id=repo_id, version=version) super().__init__(message) + + +class ForwardCompatibilityError(CompatibilityError): + def __init__(self, repo_id: str, version: packaging.version.Version): + message = FUTURE_MESSAGE.format(repo_id=repo_id, version=version) + super().__init__(message) diff --git a/lerobot/common/datasets/compute_stats.py b/lerobot/common/datasets/compute_stats.py index a029f892..1149ec83 100644 --- a/lerobot/common/datasets/compute_stats.py +++ b/lerobot/common/datasets/compute_stats.py @@ -92,7 +92,7 @@ def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], featu axes_to_reduce = (0, 2, 3) # keep channel dim keepdims = True else: - ep_ft_array = data # data is alreay a np.ndarray + ep_ft_array = data # data is already a np.ndarray axes_to_reduce = 0 # compute stats over the first axis keepdims = data.ndim == 1 # keep as np.array diff --git a/lerobot/common/datasets/factory.py b/lerobot/common/datasets/factory.py index fb1fe6d6..38c01b42 100644 --- a/lerobot/common/datasets/factory.py +++ b/lerobot/common/datasets/factory.py @@ -83,10 +83,13 @@ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDatas ) if isinstance(cfg.dataset.repo_id, str): - ds_meta = LeRobotDatasetMetadata(cfg.dataset.repo_id, revision=cfg.dataset.revision) + ds_meta = LeRobotDatasetMetadata( + cfg.dataset.repo_id, root=cfg.dataset.root, revision=cfg.dataset.revision + ) delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta) dataset = LeRobotDataset( cfg.dataset.repo_id, + root=cfg.dataset.root, episodes=cfg.dataset.episodes, delta_timestamps=delta_timestamps, image_transforms=image_transforms, diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index a5ebc475..5414c76d 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import contextlib import logging import shutil from pathlib import Path @@ -20,13 +21,14 @@ from typing import Callable import datasets import numpy as np +import packaging.version import PIL.Image import torch import torch.utils -from datasets import load_dataset +from datasets import concatenate_datasets, load_dataset from huggingface_hub import HfApi, snapshot_download from huggingface_hub.constants import REPOCARD_NAME -from packaging import version +from huggingface_hub.errors import RevisionNotFoundError from lerobot.common.constants import HF_LEROBOT_HOME from lerobot.common.datasets.compute_stats import aggregate_stats, compute_episode_stats @@ -43,12 +45,14 @@ from lerobot.common.datasets.utils import ( check_version_compatibility, create_empty_dataset_info, create_lerobot_dataset_card, + embed_images, get_delta_indices, get_episode_data_index, get_features_from_robot, get_hf_features_from_features, - get_safe_revision, + get_safe_version, hf_transform_to_torch, + is_valid_version, load_episodes, load_episodes_stats, load_info, @@ -60,7 +64,6 @@ from lerobot.common.datasets.utils import ( write_episode_stats, write_info, write_json, - write_parquet, ) from lerobot.common.datasets.video_utils import ( VideoFrame, @@ -70,7 +73,6 @@ from lerobot.common.datasets.video_utils import ( ) from lerobot.common.robot_devices.robots.utils import Robot -# For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md CODEBASE_VERSION = "v2.1" @@ -91,18 +93,19 @@ class LeRobotDatasetMetadata: raise FileNotFoundError self.load_metadata() except (FileNotFoundError, NotADirectoryError): + if is_valid_version(self.revision): + self.revision = get_safe_version(self.repo_id, self.revision) + (self.root / "meta").mkdir(exist_ok=True, parents=True) - self.revision = get_safe_revision(self.repo_id, self.revision) self.pull_from_repo(allow_patterns="meta/") self.load_metadata() - check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION) - def load_metadata(self): self.info = load_info(self.root) + check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION) self.tasks, self.task_to_task_index = load_tasks(self.root) self.episodes = load_episodes(self.root) - if version.parse(self._version) < version.parse("v2.1"): + if self._version < packaging.version.parse("v2.1"): self.stats = load_stats(self.root) self.episodes_stats = backward_compatible_episodes_stats(self.stats, self.episodes) else: @@ -124,9 +127,9 @@ class LeRobotDatasetMetadata: ) @property - def _version(self) -> str: + def _version(self) -> packaging.version.Version: """Codebase version used to create this dataset.""" - return self.info["codebase_version"] + return packaging.version.parse(self.info["codebase_version"]) def get_data_file_path(self, ep_index: int) -> Path: ep_chunk = self.get_episode_chunk(ep_index) @@ -225,7 +228,7 @@ class LeRobotDatasetMetadata: def add_task(self, task: str): """ - Given a task in natural language, add it to the dictionnary of tasks. + Given a task in natural language, add it to the dictionary of tasks. """ if task in self.task_to_task_index: raise ValueError(f"The task '{task}' already exists and can't be added twice.") @@ -388,7 +391,7 @@ class LeRobotDataset(torch.utils.data.Dataset): - info contains various information about the dataset like shapes, keys, fps etc. - stats stores the dataset statistics of the different modalities for normalization - tasks contains the prompts for each task of the dataset, which can be used for - task-conditionned training. + task-conditioned training. - hf_dataset (from datasets.Dataset), which will read any values from parquet files. - videos (optional) from which frames are loaded to be synchronous with data from parquet files. @@ -483,7 +486,7 @@ class LeRobotDataset(torch.utils.data.Dataset): self.meta = LeRobotDatasetMetadata( self.repo_id, self.root, self.revision, force_cache_sync=force_cache_sync ) - if self.episodes is not None and version.parse(self.meta._version) >= version.parse("v2.1"): + if self.episodes is not None and self.meta._version >= packaging.version.parse("v2.1"): episodes_stats = [self.meta.episodes_stats[ep_idx] for ep_idx in self.episodes] self.stats = aggregate_stats(episodes_stats) @@ -494,14 +497,17 @@ class LeRobotDataset(torch.utils.data.Dataset): assert all((self.root / fpath).is_file() for fpath in self.get_episodes_file_paths()) self.hf_dataset = self.load_hf_dataset() except (AssertionError, FileNotFoundError, NotADirectoryError): - self.revision = get_safe_revision(self.repo_id, self.revision) + self.revision = get_safe_version(self.repo_id, self.revision) self.download_episodes(download_videos) self.hf_dataset = self.load_hf_dataset() self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes) # Check timestamps - check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s) + timestamps = torch.stack(self.hf_dataset["timestamp"]).numpy() + episode_indices = torch.stack(self.hf_dataset["episode_index"]).numpy() + ep_data_index_np = {k: t.numpy() for k, t in self.episode_data_index.items()} + check_timestamps_sync(timestamps, episode_indices, ep_data_index_np, self.fps, self.tolerance_s) # Setup delta_indices if self.delta_timestamps is not None: @@ -513,6 +519,7 @@ class LeRobotDataset(torch.utils.data.Dataset): branch: str | None = None, tags: list | None = None, license: str | None = "apache-2.0", + tag_version: bool = True, push_videos: bool = True, private: bool = False, allow_patterns: list[str] | str | None = None, @@ -558,6 +565,11 @@ class LeRobotDataset(torch.utils.data.Dataset): ) card.push_to_hub(repo_id=self.repo_id, repo_type="dataset", revision=branch) + if tag_version: + with contextlib.suppress(RevisionNotFoundError): + hub_api.delete_tag(self.repo_id, tag=CODEBASE_VERSION, repo_type="dataset") + hub_api.create_tag(self.repo_id, tag=CODEBASE_VERSION, revision=branch, repo_type="dataset") + def pull_from_repo( self, allow_patterns: list[str] | str | None = None, @@ -611,7 +623,15 @@ class LeRobotDataset(torch.utils.data.Dataset): # TODO(aliberts): hf_dataset.set_format("torch") hf_dataset.set_transform(hf_transform_to_torch) + return hf_dataset + def create_hf_dataset(self) -> datasets.Dataset: + features = get_hf_features_from_features(self.features) + ft_dict = {col: [] for col in features} + hf_dataset = datasets.Dataset.from_dict(ft_dict, features=features, split="train") + + # TODO(aliberts): hf_dataset.set_format("torch") + hf_dataset.set_transform(hf_transform_to_torch) return hf_dataset @property @@ -836,7 +856,7 @@ class LeRobotDataset(torch.utils.data.Dataset): episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length) episode_buffer["episode_index"] = np.full((episode_length,), episode_index) - # Add new tasks to the tasks dictionnary + # Add new tasks to the tasks dictionary for task in episode_tasks: task_index = self.meta.get_task_index(task) if task_index is None: @@ -864,9 +884,15 @@ class LeRobotDataset(torch.utils.data.Dataset): # `meta.save_episode` be executed after encoding the videos self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats) - self.hf_dataset = self.load_hf_dataset() - self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes) - check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s) + ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index]) + ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()} + check_timestamps_sync( + episode_buffer["timestamp"], + episode_buffer["episode_index"], + ep_data_index_np, + self.fps, + self.tolerance_s, + ) video_files = list(self.root.rglob("*.mp4")) assert len(video_files) == self.num_episodes * len(self.meta.video_keys) @@ -885,9 +911,12 @@ class LeRobotDataset(torch.utils.data.Dataset): def _save_episode_table(self, episode_buffer: dict, episode_index: int) -> None: episode_dict = {key: episode_buffer[key] for key in self.hf_features} ep_dataset = datasets.Dataset.from_dict(episode_dict, features=self.hf_features, split="train") + ep_dataset = embed_images(ep_dataset) + self.hf_dataset = concatenate_datasets([self.hf_dataset, ep_dataset]) + self.hf_dataset.set_transform(hf_transform_to_torch) ep_data_path = self.root / self.meta.get_data_file_path(ep_index=episode_index) ep_data_path.parent.mkdir(parents=True, exist_ok=True) - write_parquet(ep_dataset, ep_data_path) + ep_dataset.to_parquet(ep_data_path) def clear_episode_buffer(self) -> None: episode_index = self.episode_buffer["episode_index"] @@ -995,7 +1024,7 @@ class LeRobotDataset(torch.utils.data.Dataset): obj.episode_buffer = obj.create_episode_buffer() obj.episodes = None - obj.hf_dataset = None + obj.hf_dataset = obj.create_hf_dataset() obj.image_transforms = None obj.delta_timestamps = None obj.delta_indices = None diff --git a/lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md b/lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md deleted file mode 100644 index 8fcc8bbe..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md +++ /dev/null @@ -1,56 +0,0 @@ -## Using / Updating `CODEBASE_VERSION` (for maintainers) - -Since our dataset pushed to the hub are decoupled with the evolution of this repo, we ensure compatibility of -the datasets with our code, we use a `CODEBASE_VERSION` (defined in -lerobot/common/datasets/lerobot_dataset.py) variable. - -For instance, [`lerobot/pusht`](https://huggingface.co/datasets/lerobot/pusht) has many versions to maintain backward compatibility between LeRobot codebase versions: -- [v1.0](https://huggingface.co/datasets/lerobot/pusht/tree/v1.0) -- [v1.1](https://huggingface.co/datasets/lerobot/pusht/tree/v1.1) -- [v1.2](https://huggingface.co/datasets/lerobot/pusht/tree/v1.2) -- [v1.3](https://huggingface.co/datasets/lerobot/pusht/tree/v1.3) -- [v1.4](https://huggingface.co/datasets/lerobot/pusht/tree/v1.4) -- [v1.5](https://huggingface.co/datasets/lerobot/pusht/tree/v1.5) -- [v1.6](https://huggingface.co/datasets/lerobot/pusht/tree/v1.6) <-- last version -- [main](https://huggingface.co/datasets/lerobot/pusht/tree/main) <-- points to the last version - -Starting with v1.6, every dataset pushed to the hub or saved locally also have this version number in their -`info.json` metadata. - -### Uploading a new dataset -If you are pushing a new dataset, you don't need to worry about any of the instructions below, nor to be -compatible with previous codebase versions. The `push_dataset_to_hub.py` script will automatically tag your -dataset with the current `CODEBASE_VERSION`. - -### Updating an existing dataset -If you want to update an existing dataset, you need to change the `CODEBASE_VERSION` from `lerobot_dataset.py` -before running `push_dataset_to_hub.py`. This is especially useful if you introduce a breaking change -intentionally or not (i.e. something not backward compatible such as modifying the reward functions used, -deleting some frames at the end of an episode, etc.). That way, people running a previous version of the -codebase won't be affected by your change and backward compatibility is maintained. - -However, you will need to update the version of ALL the other datasets so that they have the new -`CODEBASE_VERSION` as a branch in their hugging face dataset repository. Don't worry, there is an easy way -that doesn't require to run `push_dataset_to_hub.py`. You can just "branch-out" from the `main` branch on HF -dataset repo by running this script which corresponds to a `git checkout -b` (so no copy or upload needed): - -```python -from huggingface_hub import HfApi - -from lerobot import available_datasets -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION - -api = HfApi() - -for repo_id in available_datasets: - dataset_info = api.list_repo_refs(repo_id, repo_type="dataset") - branches = [b.name for b in dataset_info.branches] - if CODEBASE_VERSION in branches: - print(f"{repo_id} already @{CODEBASE_VERSION}, skipping.") - continue - else: - # Now create a branch named after the new version by branching out from "main" - # which is expected to be the preceding version - api.create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION, revision="main") - print(f"{repo_id} successfully updated @{CODEBASE_VERSION}") -``` diff --git a/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py b/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py index edeaf093..cc291cea 100644 --- a/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py +++ b/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py @@ -152,7 +152,7 @@ def download_raw(raw_dir: Path, repo_id: str): stacklevel=1, ) - # Send warning if raw_dir isn't well formated + # Send warning if raw_dir isn't well formatted if raw_dir.parts[-2] != user_id or raw_dir.parts[-1] != dataset_id: warnings.warn( f"""`raw_dir` ({raw_dir}) doesn't contain a community or user id `/` the name of the dataset that diff --git a/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py b/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py index 4968e002..acf820bf 100644 --- a/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py +++ b/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py @@ -68,9 +68,9 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod modality_df, on="timestamp_utc", # "nearest" is the best option over "backward", since the latter can desynchronizes camera timestamps by - # matching timestamps that are too far appart, in order to fit the backward constraints. It's not the case for "nearest". + # matching timestamps that are too far apart, in order to fit the backward constraints. It's not the case for "nearest". # However, note that "nearest" might synchronize the reference camera with other cameras on slightly future timestamps. - # are too far appart. + # are too far apart. direction="nearest", tolerance=pd.Timedelta(f"{1 / fps} seconds"), ) @@ -126,7 +126,7 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod videos_dir.parent.mkdir(parents=True, exist_ok=True) videos_dir.symlink_to((raw_dir / "videos").absolute()) - # sanity check the video paths are well formated + # sanity check the video paths are well formatted for key in df: if "observation.images." not in key: continue @@ -143,7 +143,7 @@ def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episod # it is the case for video_frame dictionary = [{"path": ..., "timestamp": ...}] data_dict[key] = [video_frame[0] for video_frame in df[key].values] - # sanity check the video path is well formated + # sanity check the video path is well formatted video_path = videos_dir.parent / data_dict[key][0]["path"] if not video_path.exists(): raise ValueError(f"Video file not found in {video_path}") diff --git a/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py b/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py index 1f8a5d14..2ffb8369 100644 --- a/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py +++ b/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py @@ -17,7 +17,7 @@ For all datasets in the RLDS format. For https://github.com/google-deepmind/open_x_embodiment (OPENX) datasets. -NOTE: You need to install tensorflow and tensorflow_datsets before running this script. +NOTE: You need to install tensorflow and tensorflow_datasets before running this script. Example: python lerobot/scripts/push_dataset_to_hub.py \ diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index 6125480c..7e297b35 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -27,15 +27,19 @@ from typing import Any import datasets import jsonlines import numpy as np -import pyarrow.compute as pc +import packaging.version import torch from datasets.table import embed_table_storage from huggingface_hub import DatasetCard, DatasetCardData, HfApi -from packaging import version +from huggingface_hub.errors import RevisionNotFoundError from PIL import Image as PILImage from torchvision import transforms -from lerobot.common.datasets.backward_compatibility import V21_MESSAGE, BackwardCompatibilityError +from lerobot.common.datasets.backward_compatibility import ( + V21_MESSAGE, + BackwardCompatibilityError, + ForwardCompatibilityError, +) from lerobot.common.robot_devices.robots.utils import Robot from lerobot.common.utils.utils import is_valid_numpy_dtype_string from lerobot.configs.types import DictLike, FeatureType, PolicyFeature @@ -129,13 +133,13 @@ def serialize_dict(stats: dict[str, torch.Tensor | np.ndarray | dict]) -> dict: return unflatten_dict(serialized_dict) -def write_parquet(dataset: datasets.Dataset, fpath: Path) -> None: +def embed_images(dataset: datasets.Dataset) -> datasets.Dataset: # Embed image bytes into the table before saving to parquet format = dataset.format dataset = dataset.with_format("arrow") dataset = dataset.map(embed_table_storage, batched=False) dataset = dataset.with_format(**format) - dataset.to_parquet(fpath) + return dataset def load_json(fpath: Path) -> Any: @@ -219,7 +223,7 @@ def load_episodes(local_dir: Path) -> dict: def write_episode_stats(episode_index: int, episode_stats: dict, local_dir: Path): - # We wrap episode_stats in a dictionnary since `episode_stats["episode_index"]` + # We wrap episode_stats in a dictionary since `episode_stats["episode_index"]` # is a dictionary of stats and not an integer. episode_stats = {"episode_index": episode_index, "stats": serialize_dict(episode_stats)} append_jsonlines(episode_stats, local_dir / EPISODES_STATS_PATH) @@ -269,38 +273,91 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]): return items_dict +def is_valid_version(version: str) -> bool: + try: + packaging.version.parse(version) + return True + except packaging.version.InvalidVersion: + return False + + def check_version_compatibility( - repo_id: str, version_to_check: str, current_version: str, enforce_breaking_major: bool = True + repo_id: str, + version_to_check: str | packaging.version.Version, + current_version: str | packaging.version.Version, + enforce_breaking_major: bool = True, ) -> None: - v_check = version.parse(version_to_check) - v_current = version.parse(current_version) + v_check = ( + packaging.version.parse(version_to_check) + if not isinstance(version_to_check, packaging.version.Version) + else version_to_check + ) + v_current = ( + packaging.version.parse(current_version) + if not isinstance(current_version, packaging.version.Version) + else current_version + ) if v_check.major < v_current.major and enforce_breaking_major: raise BackwardCompatibilityError(repo_id, v_check) elif v_check.minor < v_current.minor: - logging.warning(V21_MESSAGE.format(repo_id=repo_id, version=version_to_check)) + logging.warning(V21_MESSAGE.format(repo_id=repo_id, version=v_check)) -def get_repo_versions(repo_id: str) -> list[version.Version]: +def get_repo_versions(repo_id: str) -> list[packaging.version.Version]: """Returns available valid versions (branches and tags) on given repo.""" api = HfApi() repo_refs = api.list_repo_refs(repo_id, repo_type="dataset") repo_refs = [b.name for b in repo_refs.branches + repo_refs.tags] repo_versions = [] for ref in repo_refs: - with contextlib.suppress(version.InvalidVersion): - repo_versions.append(version.parse(ref)) + with contextlib.suppress(packaging.version.InvalidVersion): + repo_versions.append(packaging.version.parse(ref)) return repo_versions -def get_safe_revision(repo_id: str, revision: str) -> str: - """Returns the version if available on repo, otherwise return the latest available.""" - api = HfApi() - if api.revision_exists(repo_id, revision, repo_type="dataset"): - return revision - +def get_safe_version(repo_id: str, version: str | packaging.version.Version) -> str: + """ + Returns the version if available on repo or the latest compatible one. + Otherwise, will throw a `CompatibilityError`. + """ + target_version = ( + packaging.version.parse(version) if not isinstance(version, packaging.version.Version) else version + ) hub_versions = get_repo_versions(repo_id) - return f"v{max(hub_versions)}" + + if not hub_versions: + raise RevisionNotFoundError( + f"""Your dataset must be tagged with a codebase version. + Assuming _version_ is the codebase_version value in the info.json, you can run this: + ```python + from huggingface_hub import HfApi + + hub_api = HfApi() + hub_api.create_tag("{repo_id}", tag="_version_", repo_type="dataset") + ``` + """ + ) + + if target_version in hub_versions: + return f"v{target_version}" + + compatibles = [ + v for v in hub_versions if v.major == target_version.major and v.minor <= target_version.minor + ] + if compatibles: + return_version = max(compatibles) + if return_version < target_version: + logging.warning(f"Revision {version} for {repo_id} not found, using version v{return_version}") + return f"v{return_version}" + + lower_major = [v for v in hub_versions if v.major < target_version.major] + if lower_major: + raise BackwardCompatibilityError(repo_id, max(lower_major)) + + upper_versions = [v for v in hub_versions if v > target_version] + assert len(upper_versions) > 0 + raise ForwardCompatibilityError(repo_id, min(upper_versions)) def get_hf_features_from_features(features: dict) -> datasets.Features: @@ -402,82 +459,79 @@ def get_episode_data_index( if episodes is not None: episode_lengths = {ep_idx: episode_lengths[ep_idx] for ep_idx in episodes} - cumulative_lenghts = list(accumulate(episode_lengths.values())) + cumulative_lengths = list(accumulate(episode_lengths.values())) return { - "from": torch.LongTensor([0] + cumulative_lenghts[:-1]), - "to": torch.LongTensor(cumulative_lenghts), - } - - -def calculate_total_episode( - hf_dataset: datasets.Dataset, raise_if_not_contiguous: bool = True -) -> dict[str, torch.Tensor]: - episode_indices = sorted(hf_dataset.unique("episode_index")) - total_episodes = len(episode_indices) - if raise_if_not_contiguous and episode_indices != list(range(total_episodes)): - raise ValueError("episode_index values are not sorted and contiguous.") - return total_episodes - - -def calculate_episode_data_index(hf_dataset: datasets.Dataset) -> dict[str, torch.Tensor]: - episode_lengths = [] - table = hf_dataset.data.table - total_episodes = calculate_total_episode(hf_dataset) - for ep_idx in range(total_episodes): - ep_table = table.filter(pc.equal(table["episode_index"], ep_idx)) - episode_lengths.insert(ep_idx, len(ep_table)) - - cumulative_lenghts = list(accumulate(episode_lengths)) - return { - "from": torch.LongTensor([0] + cumulative_lenghts[:-1]), - "to": torch.LongTensor(cumulative_lenghts), + "from": torch.LongTensor([0] + cumulative_lengths[:-1]), + "to": torch.LongTensor(cumulative_lengths), } def check_timestamps_sync( - hf_dataset: datasets.Dataset, - episode_data_index: dict[str, torch.Tensor], + timestamps: np.ndarray, + episode_indices: np.ndarray, + episode_data_index: dict[str, np.ndarray], fps: int, tolerance_s: float, raise_value_error: bool = True, ) -> bool: """ - This check is to make sure that each timestamps is separated to the next by 1/fps +/- tolerance to - account for possible numerical error. - """ - timestamps = torch.stack(hf_dataset["timestamp"]) - diffs = torch.diff(timestamps) - within_tolerance = torch.abs(diffs - 1 / fps) <= tolerance_s + This check is to make sure that each timestamp is separated from the next by (1/fps) +/- tolerance + to account for possible numerical error. - # We mask differences between the timestamp at the end of an episode - # and the one at the start of the next episode since these are expected - # to be outside tolerance. - mask = torch.ones(len(diffs), dtype=torch.bool) - ignored_diffs = episode_data_index["to"][:-1] - 1 + Args: + timestamps (np.ndarray): Array of timestamps in seconds. + episode_indices (np.ndarray): Array indicating the episode index for each timestamp. + episode_data_index (dict[str, np.ndarray]): A dictionary that includes 'to', + which identifies indices for the end of each episode. + fps (int): Frames per second. Used to check the expected difference between consecutive timestamps. + tolerance_s (float): Allowed deviation from the expected (1/fps) difference. + raise_value_error (bool): Whether to raise a ValueError if the check fails. + + Returns: + bool: True if all checked timestamp differences lie within tolerance, False otherwise. + + Raises: + ValueError: If the check fails and `raise_value_error` is True. + """ + if timestamps.shape != episode_indices.shape: + raise ValueError( + "timestamps and episode_indices should have the same shape. " + f"Found {timestamps.shape=} and {episode_indices.shape=}." + ) + + # Consecutive differences + diffs = np.diff(timestamps) + within_tolerance = np.abs(diffs - (1.0 / fps)) <= tolerance_s + + # Mask to ignore differences at the boundaries between episodes + mask = np.ones(len(diffs), dtype=bool) + ignored_diffs = episode_data_index["to"][:-1] - 1 # indices at the end of each episode mask[ignored_diffs] = False filtered_within_tolerance = within_tolerance[mask] - if not torch.all(filtered_within_tolerance): + # Check if all remaining diffs are within tolerance + if not np.all(filtered_within_tolerance): # Track original indices before masking - original_indices = torch.arange(len(diffs)) + original_indices = np.arange(len(diffs)) filtered_indices = original_indices[mask] - outside_tolerance_filtered_indices = torch.nonzero(~filtered_within_tolerance) # .squeeze() + outside_tolerance_filtered_indices = np.nonzero(~filtered_within_tolerance)[0] outside_tolerance_indices = filtered_indices[outside_tolerance_filtered_indices] - episode_indices = torch.stack(hf_dataset["episode_index"]) outside_tolerances = [] for idx in outside_tolerance_indices: entry = { "timestamps": [timestamps[idx], timestamps[idx + 1]], "diff": diffs[idx], - "episode_index": episode_indices[idx].item(), + "episode_index": episode_indices[idx].item() + if hasattr(episode_indices[idx], "item") + else episode_indices[idx], } outside_tolerances.append(entry) if raise_value_error: raise ValueError( f"""One or several timestamps unexpectedly violate the tolerance inside episode range. - This might be due to synchronization issues with timestamps during data collection. + This might be due to synchronization issues during data collection. \n{pformat(outside_tolerances)}""" ) return False diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py index 4cd93a2d..99ab2cbf 100644 --- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py +++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py @@ -31,6 +31,7 @@ from lerobot.common.robot_devices.robots.configs import AlohaRobotConfig LOCAL_DIR = Path("data/") +# spellchecker:off ALOHA_MOBILE_INFO = { "robot_config": AlohaRobotConfig(), "license": "mit", @@ -856,6 +857,7 @@ DATASETS = { }""").lstrip(), }, } +# spellchecker:on def batch_convert(): diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py index 943e94f0..acf0282f 100644 --- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py +++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py @@ -17,7 +17,7 @@ """ This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 1.6 to 2.0. You will be required to provide the 'tasks', which is a short but accurate description in plain English -for each of the task performed in the dataset. This will allow to easily train models with task-conditionning. +for each of the task performed in the dataset. This will allow to easily train models with task-conditioning. We support 3 different scenarios for these tasks (see instructions below): 1. Single task dataset: all episodes of your dataset have the same single task. @@ -130,7 +130,7 @@ from lerobot.common.datasets.utils import ( create_branch, create_lerobot_dataset_card, flatten_dict, - get_safe_revision, + get_safe_version, load_json, unflatten_dict, write_json, @@ -443,7 +443,7 @@ def convert_dataset( test_branch: str | None = None, **card_kwargs, ): - v1 = get_safe_revision(repo_id, V16) + v1 = get_safe_version(repo_id, V16) v1x_dir = local_dir / V16 / repo_id v20_dir = local_dir / V20 / repo_id v1x_dir.mkdir(parents=True, exist_ok=True) diff --git a/lerobot/common/datasets/v21/_remove_language_instruction.py b/lerobot/common/datasets/v21/_remove_language_instruction.py new file mode 100644 index 00000000..dd4604cf --- /dev/null +++ b/lerobot/common/datasets/v21/_remove_language_instruction.py @@ -0,0 +1,73 @@ +import logging +import traceback +from pathlib import Path + +from datasets import get_dataset_config_info +from huggingface_hub import HfApi + +from lerobot import available_datasets +from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata +from lerobot.common.datasets.utils import INFO_PATH, write_info +from lerobot.common.datasets.v21.convert_dataset_v20_to_v21 import V20, SuppressWarnings + +LOCAL_DIR = Path("data/") + +hub_api = HfApi() + + +def fix_dataset(repo_id: str) -> str: + if not hub_api.revision_exists(repo_id, V20, repo_type="dataset"): + return f"{repo_id}: skipped (not in {V20})." + + dataset_info = get_dataset_config_info(repo_id, "default") + with SuppressWarnings(): + lerobot_metadata = LeRobotDatasetMetadata(repo_id, revision=V20, force_cache_sync=True) + + meta_features = {key for key, ft in lerobot_metadata.features.items() if ft["dtype"] != "video"} + parquet_features = set(dataset_info.features) + + diff_parquet_meta = parquet_features - meta_features + diff_meta_parquet = meta_features - parquet_features + + if diff_parquet_meta: + raise ValueError(f"In parquet not in info.json: {parquet_features - meta_features}") + + if not diff_meta_parquet: + return f"{repo_id}: skipped (no diff)" + + if diff_meta_parquet: + logging.warning(f"In info.json not in parquet: {meta_features - parquet_features}") + assert diff_meta_parquet == {"language_instruction"} + lerobot_metadata.features.pop("language_instruction") + write_info(lerobot_metadata.info, lerobot_metadata.root) + commit_info = hub_api.upload_file( + path_or_fileobj=lerobot_metadata.root / INFO_PATH, + path_in_repo=INFO_PATH, + repo_id=repo_id, + repo_type="dataset", + revision=V20, + commit_message="Remove 'language_instruction'", + create_pr=True, + ) + return f"{repo_id}: success - PR: {commit_info.pr_url}" + + +def batch_fix(): + status = {} + LOCAL_DIR.mkdir(parents=True, exist_ok=True) + logfile = LOCAL_DIR / "fix_features_v20.txt" + for num, repo_id in enumerate(available_datasets): + print(f"\nConverting {repo_id} ({num}/{len(available_datasets)})") + print("---------------------------------------------------------") + try: + status = fix_dataset(repo_id) + except Exception: + status = f"{repo_id}: failed\n {traceback.format_exc()}" + + logging.info(status) + with open(logfile, "a") as file: + file.write(status + "\n") + + +if __name__ == "__main__": + batch_fix() diff --git a/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py b/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py index cee9da16..cc9272a8 100644 --- a/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py +++ b/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py @@ -21,8 +21,10 @@ This script is for internal use to convert all datasets under the 'lerobot' hub import traceback from pathlib import Path +from huggingface_hub import HfApi + from lerobot import available_datasets -from lerobot.common.datasets.v21.convert_dataset_v20_to_v21 import convert_dataset +from lerobot.common.datasets.v21.convert_dataset_v20_to_v21 import V21, convert_dataset LOCAL_DIR = Path("data/") @@ -31,19 +33,21 @@ def batch_convert(): status = {} LOCAL_DIR.mkdir(parents=True, exist_ok=True) logfile = LOCAL_DIR / "conversion_log_v21.txt" + hub_api = HfApi() for num, repo_id in enumerate(available_datasets): print(f"\nConverting {repo_id} ({num}/{len(available_datasets)})") print("---------------------------------------------------------") try: - convert_dataset(repo_id) - status = f"{repo_id}: success." - with open(logfile, "a") as file: - file.write(status + "\n") + if hub_api.revision_exists(repo_id, V21, repo_type="dataset"): + status = f"{repo_id}: success (already in {V21})." + else: + convert_dataset(repo_id) + status = f"{repo_id}: success." except Exception: status = f"{repo_id}: failed\n {traceback.format_exc()}" - with open(logfile, "a") as file: - file.write(status + "\n") - continue + + with open(logfile, "a") as file: + file.write(status + "\n") if __name__ == "__main__": diff --git a/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py index f55c13c1..163a6003 100644 --- a/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py +++ b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py @@ -48,7 +48,7 @@ def convert_dataset( dataset = LeRobotDataset(repo_id, revision=V20, force_cache_sync=True) if (dataset.root / EPISODES_STATS_PATH).is_file(): - raise FileExistsError("episodes_stats.jsonl already exists.") + (dataset.root / EPISODES_STATS_PATH).unlink() convert_stats(dataset, num_workers=num_workers) ref_stats = load_stats(dataset.root) @@ -57,7 +57,7 @@ def convert_dataset( dataset.meta.info["codebase_version"] = CODEBASE_VERSION write_info(dataset.meta.info, dataset.root) - dataset.push_to_hub(branch=branch, allow_patterns="meta/") + dataset.push_to_hub(branch=branch, tag_version=False, allow_patterns="meta/") # delete old stats.json file if (dataset.root / STATS_PATH).is_file: diff --git a/lerobot/common/datasets/v21/convert_stats.py b/lerobot/common/datasets/v21/convert_stats.py index b13e0e19..cbf584b7 100644 --- a/lerobot/common/datasets/v21/convert_stats.py +++ b/lerobot/common/datasets/v21/convert_stats.py @@ -65,7 +65,7 @@ def check_aggregate_stats( dataset: LeRobotDataset, reference_stats: dict[str, dict[str, np.ndarray]], video_rtol_atol: tuple[float] = (1e-2, 1e-2), - default_rtol_atol: tuple[float] = (5e-6, 0.0), + default_rtol_atol: tuple[float] = (5e-6, 6e-5), ): """Verifies that the aggregated stats from episodes_stats are close to reference stats.""" agg_stats = aggregate_stats(list(dataset.meta.episodes_stats.values())) diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index 3d9269a2..dcd60b0b 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -73,7 +73,7 @@ def decode_video_frames_torchvision( last_ts = max(timestamps) # access closest key frame of the first requested frame - # Note: closest key frame timestamp is usally smaller than `first_ts` (e.g. key frame can be the first frame of the video) + # Note: closest key frame timestamp is usually smaller than `first_ts` (e.g. key frame can be the first frame of the video) # for details on what `seek` is doing see: https://pyav.basswood-io.com/docs/stable/api/container.html?highlight=inputcontainer#av.container.InputContainer.seek reader.seek(first_ts, keyframes_only=keyframes_only) diff --git a/lerobot/common/envs/factory.py b/lerobot/common/envs/factory.py index 49239363..8450f84b 100644 --- a/lerobot/common/envs/factory.py +++ b/lerobot/common/envs/factory.py @@ -37,12 +37,12 @@ def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> g Args: cfg (EnvConfig): the config of the environment to instantiate. n_envs (int, optional): The number of parallelized env to return. Defaults to 1. - use_async_envs (bool, optional): Wether to return an AsyncVectorEnv or a SyncVectorEnv. Defaults to + use_async_envs (bool, optional): Whether to return an AsyncVectorEnv or a SyncVectorEnv. Defaults to False. Raises: ValueError: if n_envs < 1 - ModuleNotFoundError: If the requested env package is not intalled + ModuleNotFoundError: If the requested env package is not installed Returns: gym.vector.VectorEnv: The parallelized gym.env instance. diff --git a/lerobot/common/policies/act/configuration_act.py b/lerobot/common/policies/act/configuration_act.py index 4f724c12..7a5819b7 100644 --- a/lerobot/common/policies/act/configuration_act.py +++ b/lerobot/common/policies/act/configuration_act.py @@ -64,7 +64,7 @@ class ACTConfig(PreTrainedConfig): output_normalization_modes: Similar dictionary as `normalize_input_modes`, but to unnormalize to the original scale. Note that this is also used for normalizing the training targets. vision_backbone: Name of the torchvision resnet backbone to use for encoding images. - pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone. + pretrained_backbone_weights: Pretrained weights from torchvision to initialize the backbone. `None` means no pretrained weights. replace_final_stride_with_dilation: Whether to replace the ResNet's final 2x2 stride with a dilated convolution. diff --git a/lerobot/common/policies/diffusion/configuration_diffusion.py b/lerobot/common/policies/diffusion/configuration_diffusion.py index d571e152..e73c65fe 100644 --- a/lerobot/common/policies/diffusion/configuration_diffusion.py +++ b/lerobot/common/policies/diffusion/configuration_diffusion.py @@ -68,7 +68,7 @@ class DiffusionConfig(PreTrainedConfig): within the image size. If None, no cropping is done. crop_is_random: Whether the crop should be random at training time (it's always a center crop in eval mode). - pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone. + pretrained_backbone_weights: Pretrained weights from torchvision to initialize the backbone. `None` means no pretrained weights. use_group_norm: Whether to replace batch normalization with group normalization in the backbone. The group sizes are set to be about 16 (to be precise, feature_dim // 16). @@ -99,7 +99,7 @@ class DiffusionConfig(PreTrainedConfig): num_inference_steps: Number of reverse diffusion steps to use at inference time (steps are evenly spaced). If not provided, this defaults to be the same as `num_train_timesteps`. do_mask_loss_for_padding: Whether to mask the loss when there are copy-padded actions. See - `LeRobotDataset` and `load_previous_and_future_frames` for mor information. Note, this defaults + `LeRobotDataset` and `load_previous_and_future_frames` for more information. Note, this defaults to False as the original Diffusion Policy implementation does the same. """ diff --git a/lerobot/common/policies/pi0/conversion_scripts/convert_pi0_to_hf_lerobot.py b/lerobot/common/policies/pi0/conversion_scripts/convert_pi0_to_hf_lerobot.py index f85437a5..dd8622dd 100644 --- a/lerobot/common/policies/pi0/conversion_scripts/convert_pi0_to_hf_lerobot.py +++ b/lerobot/common/policies/pi0/conversion_scripts/convert_pi0_to_hf_lerobot.py @@ -2,7 +2,7 @@ Convert pi0 parameters from Jax to Pytorch Follow [README of openpi](https://github.com/Physical-Intelligence/openpi) to create a new environment -and install the required librairies. +and install the required libraries. ```bash cd ~/code/openpi diff --git a/lerobot/common/policies/tdmpc/configuration_tdmpc.py b/lerobot/common/policies/tdmpc/configuration_tdmpc.py index c3e8aee6..3fce01df 100644 --- a/lerobot/common/policies/tdmpc/configuration_tdmpc.py +++ b/lerobot/common/policies/tdmpc/configuration_tdmpc.py @@ -76,7 +76,7 @@ class TDMPCConfig(PreTrainedConfig): n_pi_samples: Number of samples to draw from the policy / world model rollout every CEM iteration. Can be zero. uncertainty_regularizer_coeff: Coefficient for the uncertainty regularization used when estimating - trajectory values (this is the λ coeffiecient in eqn 4 of FOWM). + trajectory values (this is the λ coefficient in eqn 4 of FOWM). n_elites: The number of elite samples to use for updating the gaussian parameters every CEM iteration. elite_weighting_temperature: The temperature to use for softmax weighting (by trajectory value) of the elites, when updating the gaussian parameters for CEM. @@ -165,7 +165,7 @@ class TDMPCConfig(PreTrainedConfig): """Input validation (not exhaustive).""" if self.n_gaussian_samples <= 0: raise ValueError( - f"The number of guassian samples for CEM should be non-zero. Got `{self.n_gaussian_samples=}`" + f"The number of gaussian samples for CEM should be non-zero. Got `{self.n_gaussian_samples=}`" ) if self.normalization_mapping["ACTION"] is not NormalizationMode.MIN_MAX: raise ValueError( diff --git a/lerobot/common/policies/vqbet/configuration_vqbet.py b/lerobot/common/policies/vqbet/configuration_vqbet.py index 59389d6e..28e9c433 100644 --- a/lerobot/common/policies/vqbet/configuration_vqbet.py +++ b/lerobot/common/policies/vqbet/configuration_vqbet.py @@ -66,7 +66,7 @@ class VQBeTConfig(PreTrainedConfig): within the image size. If None, no cropping is done. crop_is_random: Whether the crop should be random at training time (it's always a center crop in eval mode). - pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone. + pretrained_backbone_weights: Pretrained weights from torchvision to initialize the backbone. `None` means no pretrained weights. use_group_norm: Whether to replace batch normalization with group normalization in the backbone. The group sizes are set to be about 16 (to be precise, feature_dim // 16). diff --git a/lerobot/common/policies/vqbet/modeling_vqbet.py b/lerobot/common/policies/vqbet/modeling_vqbet.py index 1f70b186..97a08e2f 100644 --- a/lerobot/common/policies/vqbet/modeling_vqbet.py +++ b/lerobot/common/policies/vqbet/modeling_vqbet.py @@ -485,7 +485,7 @@ class VQBeTHead(nn.Module): def forward(self, x, **kwargs) -> dict: # N is the batch size, and T is number of action query tokens, which are process through same GPT N, T, _ = x.shape - # we calculate N and T side parallely. Thus, the dimensions would be + # we calculate N and T side parallelly. Thus, the dimensions would be # (batch size * number of action query tokens, action chunk size, action dimension) x = einops.rearrange(x, "N T WA -> (N T) WA") @@ -772,7 +772,7 @@ class VqVae(nn.Module): Encoder and decoder are MLPs consisting of an input, output layer, and hidden layer, respectively. The vq_layer uses residual VQs. - This class contains functions for training the encoder and decoder along with the residual VQ layer (for trainign phase 1), + This class contains functions for training the encoder and decoder along with the residual VQ layer (for training phase 1), as well as functions to help BeT training part in training phase 2. """ diff --git a/lerobot/common/policies/vqbet/vqbet_utils.py b/lerobot/common/policies/vqbet/vqbet_utils.py index a2bd2df3..139d119e 100644 --- a/lerobot/common/policies/vqbet/vqbet_utils.py +++ b/lerobot/common/policies/vqbet/vqbet_utils.py @@ -38,7 +38,7 @@ from lerobot.common.policies.vqbet.configuration_vqbet import VQBeTConfig This file is part of a VQ-BeT that utilizes code from the following repositories: - Vector Quantize PyTorch code is licensed under the MIT License: - Origianl source: https://github.com/lucidrains/vector-quantize-pytorch + Original source: https://github.com/lucidrains/vector-quantize-pytorch - nanoGPT part is an adaptation of Andrej Karpathy's nanoGPT implementation in PyTorch. Original source: https://github.com/karpathy/nanoGPT @@ -289,7 +289,7 @@ class GPT(nn.Module): This file is a part for Residual Vector Quantization that utilizes code from the following repository: - Phil Wang's vector-quantize-pytorch implementation in PyTorch. - Origianl source: https://github.com/lucidrains/vector-quantize-pytorch + Original source: https://github.com/lucidrains/vector-quantize-pytorch - The vector-quantize-pytorch code is licensed under the MIT License: @@ -1349,9 +1349,9 @@ class EuclideanCodebook(nn.Module): # calculate distributed variance - variance_numer = reduce((data - batch_mean) ** 2, "h n d -> h 1 d", "sum") - distributed.all_reduce(variance_numer) - batch_variance = variance_numer / num_vectors + variance_number = reduce((data - batch_mean) ** 2, "h n d -> h 1 d", "sum") + distributed.all_reduce(variance_number) + batch_variance = variance_number / num_vectors self.update_with_decay("batch_variance", batch_variance, self.affine_param_batch_decay) diff --git a/lerobot/common/robot_devices/control_configs.py b/lerobot/common/robot_devices/control_configs.py index d7d03ac0..2ef1b44b 100644 --- a/lerobot/common/robot_devices/control_configs.py +++ b/lerobot/common/robot_devices/control_configs.py @@ -66,7 +66,7 @@ class RecordControlConfig(ControlConfig): private: bool = False # Add tags to your dataset on the hub. tags: list[str] | None = None - # Number of subprocesses handling the saving of frames as PNGs. Set to 0 to use threads only; + # Number of subprocesses handling the saving of frames as PNG. Set to 0 to use threads only; # set to ≥1 to use subprocesses, each using threads to write images. The best number of processes # and threads depends on your system. We recommend 4 threads per camera with 0 processes. # If fps is unstable, adjust the thread count. If still unstable, try using 1 or more subprocesses. @@ -127,6 +127,12 @@ class ReplayControlConfig(ControlConfig): play_sounds: bool = True +@ControlConfig.register_subclass("remote_robot") +@dataclass +class RemoteRobotConfig(ControlConfig): + log_interval: int = 100 + + @dataclass class ControlPipelineConfig: robot: RobotConfig diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index 6c97d0cb..d2361a64 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -12,7 +12,6 @@ from functools import cache import cv2 import torch -import tqdm from deepdiff import DeepDiff from termcolor import colored @@ -276,24 +275,18 @@ def control_loop( break -def reset_environment(robot, events, reset_time_s): +def reset_environment(robot, events, reset_time_s, fps): # TODO(rcadene): refactor warmup_record and reset_environment - # TODO(alibets): allow for teleop during reset if has_method(robot, "teleop_safety_stop"): robot.teleop_safety_stop() - timestamp = 0 - start_vencod_t = time.perf_counter() - - # Wait if necessary - with tqdm.tqdm(total=reset_time_s, desc="Waiting") as pbar: - while timestamp < reset_time_s: - time.sleep(1) - timestamp = time.perf_counter() - start_vencod_t - pbar.update(1) - if events["exit_early"]: - events["exit_early"] = False - break + control_loop( + robot=robot, + control_time_s=reset_time_s, + events=events, + fps=fps, + teleoperate=True, + ) def stop_recording(robot, listener, display_cameras): diff --git a/lerobot/common/robot_devices/motors/dynamixel.py b/lerobot/common/robot_devices/motors/dynamixel.py index 54836d8e..17ea933d 100644 --- a/lerobot/common/robot_devices/motors/dynamixel.py +++ b/lerobot/common/robot_devices/motors/dynamixel.py @@ -242,7 +242,7 @@ class DriveMode(enum.Enum): class CalibrationMode(enum.Enum): # Joints with rotational motions are expressed in degrees in nominal range of [-180, 180] DEGREE = 0 - # Joints with linear motions (like gripper of Aloha) are experessed in nominal range of [0, 100] + # Joints with linear motions (like gripper of Aloha) are expressed in nominal range of [0, 100] LINEAR = 1 @@ -610,7 +610,7 @@ class DynamixelMotorsBus: # 0-centered resolution range (e.g. [-2048, 2048] for resolution=4096) values[i] = values[i] / HALF_TURN_DEGREE * (resolution // 2) - # Substract the homing offsets to come back to actual motor range of values + # Subtract the homing offsets to come back to actual motor range of values # which can be arbitrary. values[i] -= homing_offset diff --git a/lerobot/common/robot_devices/motors/feetech.py b/lerobot/common/robot_devices/motors/feetech.py index a59db7df..cec36d37 100644 --- a/lerobot/common/robot_devices/motors/feetech.py +++ b/lerobot/common/robot_devices/motors/feetech.py @@ -221,7 +221,7 @@ class DriveMode(enum.Enum): class CalibrationMode(enum.Enum): # Joints with rotational motions are expressed in degrees in nominal range of [-180, 180] DEGREE = 0 - # Joints with linear motions (like gripper of Aloha) are experessed in nominal range of [0, 100] + # Joints with linear motions (like gripper of Aloha) are expressed in nominal range of [0, 100] LINEAR = 1 @@ -591,7 +591,7 @@ class FeetechMotorsBus: # 0-centered resolution range (e.g. [-2048, 2048] for resolution=4096) values[i] = values[i] / HALF_TURN_DEGREE * (resolution // 2) - # Substract the homing offsets to come back to actual motor range of values + # Subtract the homing offsets to come back to actual motor range of values # which can be arbitrary. values[i] -= homing_offset @@ -632,7 +632,7 @@ class FeetechMotorsBus: track["prev"][idx] = values[i] continue - # Detect a full rotation occured + # Detect a full rotation occurred if abs(track["prev"][idx] - values[i]) > 2048: # Position went below 0 and got reset to 4095 if track["prev"][idx] < values[i]: diff --git a/lerobot/common/robot_devices/robots/configs.py b/lerobot/common/robot_devices/robots/configs.py index a976f601..88cb4e6f 100644 --- a/lerobot/common/robot_devices/robots/configs.py +++ b/lerobot/common/robot_devices/robots/configs.py @@ -514,3 +514,86 @@ class StretchRobotConfig(RobotConfig): ) mock: bool = False + + +@RobotConfig.register_subclass("lekiwi") +@dataclass +class LeKiwiRobotConfig(RobotConfig): + # `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes. + # Set this to a positive scalar to have the same value for all motors, or a list that is the same length as + # the number of motors in your follower arms. + max_relative_target: int | None = None + + # Network Configuration + ip: str = "192.168.0.193" + port: int = 5555 + video_port: int = 5556 + + cameras: dict[str, CameraConfig] = field( + default_factory=lambda: { + "front": OpenCVCameraConfig( + camera_index="/dev/video0", fps=30, width=640, height=480, rotation=90 + ), + "wrist": OpenCVCameraConfig( + camera_index="/dev/video2", fps=30, width=640, height=480, rotation=180 + ), + } + ) + + calibration_dir: str = ".cache/calibration/lekiwi" + + leader_arms: dict[str, MotorsBusConfig] = field( + default_factory=lambda: { + "main": FeetechMotorsBusConfig( + port="/dev/tty.usbmodem585A0077581", + motors={ + # name: (index, model) + "shoulder_pan": [1, "sts3215"], + "shoulder_lift": [2, "sts3215"], + "elbow_flex": [3, "sts3215"], + "wrist_flex": [4, "sts3215"], + "wrist_roll": [5, "sts3215"], + "gripper": [6, "sts3215"], + }, + ), + } + ) + + follower_arms: dict[str, MotorsBusConfig] = field( + default_factory=lambda: { + "main": FeetechMotorsBusConfig( + port="/dev/ttyACM0", + motors={ + # name: (index, model) + "shoulder_pan": [1, "sts3215"], + "shoulder_lift": [2, "sts3215"], + "elbow_flex": [3, "sts3215"], + "wrist_flex": [4, "sts3215"], + "wrist_roll": [5, "sts3215"], + "gripper": [6, "sts3215"], + "left_wheel": (7, "sts3215"), + "back_wheel": (8, "sts3215"), + "right_wheel": (9, "sts3215"), + }, + ), + } + ) + + teleop_keys: dict[str, str] = field( + default_factory=lambda: { + # Movement + "forward": "w", + "backward": "s", + "left": "a", + "right": "d", + "rotate_left": "z", + "rotate_right": "x", + # Speed control + "speed_up": "r", + "speed_down": "f", + # quit teleop + "quit": "q", + } + ) + + mock: bool = False diff --git a/lerobot/common/robot_devices/robots/dynamixel_calibration.py b/lerobot/common/robot_devices/robots/dynamixel_calibration.py index 5c4932d2..142d5794 100644 --- a/lerobot/common/robot_devices/robots/dynamixel_calibration.py +++ b/lerobot/common/robot_devices/robots/dynamixel_calibration.py @@ -87,7 +87,7 @@ def run_arm_calibration(arm: MotorsBus, robot_type: str, arm_name: str, arm_type # For instance, if the motor rotates 90 degree, and its value is -90 after applying the homing offset, then we know its rotation direction # is inverted. However, for the calibration being successful, we need everyone to follow the same target position. # Sometimes, there is only one possible rotation direction. For instance, if the gripper is closed, there is only one direction which - # corresponds to opening the gripper. When the rotation direction is ambiguous, we arbitrarely rotate clockwise from the point of view + # corresponds to opening the gripper. When the rotation direction is ambiguous, we arbitrarily rotate clockwise from the point of view # of the previous motor in the kinetic chain. print("\nMove arm to rotated target position") print("See: " + URL_TEMPLATE.format(robot=robot_type, arm=arm_type, position="rotated")) @@ -115,7 +115,7 @@ def run_arm_calibration(arm: MotorsBus, robot_type: str, arm_name: str, arm_type # TODO(rcadene): make type of joints (DEGREE or LINEAR) configurable from yaml? if robot_type in ["aloha"] and "gripper" in arm.motor_names: - # Joints with linear motions (like gripper of Aloha) are experessed in nominal range of [0, 100] + # Joints with linear motions (like gripper of Aloha) are expressed in nominal range of [0, 100] calib_idx = arm.motor_names.index("gripper") calib_mode[calib_idx] = CalibrationMode.LINEAR.name diff --git a/lerobot/common/robot_devices/robots/feetech_calibration.py b/lerobot/common/robot_devices/robots/feetech_calibration.py index b015951a..d779cd44 100644 --- a/lerobot/common/robot_devices/robots/feetech_calibration.py +++ b/lerobot/common/robot_devices/robots/feetech_calibration.py @@ -443,7 +443,7 @@ def run_arm_manual_calibration(arm: MotorsBus, robot_type: str, arm_name: str, a # For instance, if the motor rotates 90 degree, and its value is -90 after applying the homing offset, then we know its rotation direction # is inverted. However, for the calibration being successful, we need everyone to follow the same target position. # Sometimes, there is only one possible rotation direction. For instance, if the gripper is closed, there is only one direction which - # corresponds to opening the gripper. When the rotation direction is ambiguous, we arbitrarely rotate clockwise from the point of view + # corresponds to opening the gripper. When the rotation direction is ambiguous, we arbitrarily rotate clockwise from the point of view # of the previous motor in the kinetic chain. print("\nMove arm to rotated target position") print("See: " + URL_TEMPLATE.format(robot=robot_type, arm=arm_type, position="rotated")) diff --git a/lerobot/common/robot_devices/robots/lekiwi_remote.py b/lerobot/common/robot_devices/robots/lekiwi_remote.py new file mode 100644 index 00000000..fd9491fa --- /dev/null +++ b/lerobot/common/robot_devices/robots/lekiwi_remote.py @@ -0,0 +1,210 @@ +import base64 +import json +import threading +import time +from pathlib import Path + +import cv2 +import zmq + +from lerobot.common.robot_devices.robots.mobile_manipulator import LeKiwi + + +def setup_zmq_sockets(config): + context = zmq.Context() + cmd_socket = context.socket(zmq.PULL) + cmd_socket.setsockopt(zmq.CONFLATE, 1) + cmd_socket.bind(f"tcp://*:{config.port}") + + video_socket = context.socket(zmq.PUSH) + video_socket.setsockopt(zmq.CONFLATE, 1) + video_socket.bind(f"tcp://*:{config.video_port}") + + return context, cmd_socket, video_socket + + +def run_camera_capture(cameras, images_lock, latest_images_dict, stop_event): + while not stop_event.is_set(): + local_dict = {} + for name, cam in cameras.items(): + frame = cam.async_read() + ret, buffer = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 90]) + if ret: + local_dict[name] = base64.b64encode(buffer).decode("utf-8") + else: + local_dict[name] = "" + with images_lock: + latest_images_dict.update(local_dict) + time.sleep(0.01) + + +def calibrate_follower_arm(motors_bus, calib_dir_str): + """ + Calibrates the follower arm. Attempts to load an existing calibration file; + if not found, runs manual calibration and saves the result. + """ + calib_dir = Path(calib_dir_str) + calib_dir.mkdir(parents=True, exist_ok=True) + calib_file = calib_dir / "main_follower.json" + try: + from lerobot.common.robot_devices.robots.feetech_calibration import run_arm_manual_calibration + except ImportError: + print("[WARNING] Calibration function not available. Skipping calibration.") + return + + if calib_file.exists(): + with open(calib_file) as f: + calibration = json.load(f) + print(f"[INFO] Loaded calibration from {calib_file}") + else: + print("[INFO] Calibration file not found. Running manual calibration...") + calibration = run_arm_manual_calibration(motors_bus, "lekiwi", "follower_arm", "follower") + print(f"[INFO] Calibration complete. Saving to {calib_file}") + with open(calib_file, "w") as f: + json.dump(calibration, f) + try: + motors_bus.set_calibration(calibration) + print("[INFO] Applied calibration for follower arm.") + except Exception as e: + print(f"[WARNING] Could not apply calibration: {e}") + + +def run_lekiwi(robot_config): + """ + Runs the LeKiwi robot: + - Sets up cameras and connects them. + - Initializes the follower arm motors. + - Calibrates the follower arm if necessary. + - Creates ZeroMQ sockets for receiving commands and streaming observations. + - Processes incoming commands (arm and wheel commands) and sends back sensor and camera data. + """ + # Import helper functions and classes + from lerobot.common.robot_devices.cameras.utils import make_cameras_from_configs + from lerobot.common.robot_devices.motors.feetech import FeetechMotorsBus, TorqueMode + + # Initialize cameras from the robot configuration. + cameras = make_cameras_from_configs(robot_config.cameras) + for cam in cameras.values(): + cam.connect() + + # Initialize the motors bus using the follower arm configuration. + motor_config = robot_config.follower_arms.get("main") + if motor_config is None: + print("[ERROR] Follower arm 'main' configuration not found.") + return + motors_bus = FeetechMotorsBus(motor_config) + motors_bus.connect() + + # Calibrate the follower arm. + calibrate_follower_arm(motors_bus, robot_config.calibration_dir) + + # Create the LeKiwi robot instance. + robot = LeKiwi(motors_bus) + + # Define the expected arm motor IDs. + arm_motor_ids = ["shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll", "gripper"] + + # Disable torque for each arm motor. + for motor in arm_motor_ids: + motors_bus.write("Torque_Enable", TorqueMode.DISABLED.value, motor) + + # Set up ZeroMQ sockets. + context, cmd_socket, video_socket = setup_zmq_sockets(robot_config) + + # Start the camera capture thread. + latest_images_dict = {} + images_lock = threading.Lock() + stop_event = threading.Event() + cam_thread = threading.Thread( + target=run_camera_capture, args=(cameras, images_lock, latest_images_dict, stop_event), daemon=True + ) + cam_thread.start() + + last_cmd_time = time.time() + print("LeKiwi robot server started. Waiting for commands...") + + try: + while True: + loop_start_time = time.time() + + # Process incoming commands (non-blocking). + while True: + try: + msg = cmd_socket.recv_string(zmq.NOBLOCK) + except zmq.Again: + break + try: + data = json.loads(msg) + # Process arm position commands. + if "arm_positions" in data: + arm_positions = data["arm_positions"] + if not isinstance(arm_positions, list): + print(f"[ERROR] Invalid arm_positions: {arm_positions}") + elif len(arm_positions) < len(arm_motor_ids): + print( + f"[WARNING] Received {len(arm_positions)} arm positions, expected {len(arm_motor_ids)}" + ) + else: + for motor, pos in zip(arm_motor_ids, arm_positions, strict=False): + motors_bus.write("Goal_Position", pos, motor) + # Process wheel (base) commands. + if "raw_velocity" in data: + raw_command = data["raw_velocity"] + # Expect keys: "left_wheel", "back_wheel", "right_wheel". + command_speeds = [ + int(raw_command.get("left_wheel", 0)), + int(raw_command.get("back_wheel", 0)), + int(raw_command.get("right_wheel", 0)), + ] + robot.set_velocity(command_speeds) + last_cmd_time = time.time() + except Exception as e: + print(f"[ERROR] Parsing message failed: {e}") + + # Watchdog: stop the robot if no command is received for over 0.5 seconds. + now = time.time() + if now - last_cmd_time > 0.5: + robot.stop() + last_cmd_time = now + + # Read current wheel speeds from the robot. + current_velocity = robot.read_velocity() + + # Read the follower arm state from the motors bus. + follower_arm_state = [] + for motor in arm_motor_ids: + try: + pos = motors_bus.read("Present_Position", motor) + # Convert the position to a float (or use as is if already numeric). + follower_arm_state.append(float(pos) if not isinstance(pos, (int, float)) else pos) + except Exception as e: + print(f"[ERROR] Reading motor {motor} failed: {e}") + + # Get the latest camera images. + with images_lock: + images_dict_copy = dict(latest_images_dict) + + # Build the observation dictionary. + observation = { + "images": images_dict_copy, + "present_speed": current_velocity, + "follower_arm_state": follower_arm_state, + } + # Send the observation over the video socket. + video_socket.send_string(json.dumps(observation)) + + # Ensure a short sleep to avoid overloading the CPU. + elapsed = time.time() - loop_start_time + time.sleep( + max(0.033 - elapsed, 0) + ) # If robot jitters increase the sleep and monitor cpu load with `top` in cmd + except KeyboardInterrupt: + print("Shutting down LeKiwi server.") + finally: + stop_event.set() + cam_thread.join() + robot.stop() + motors_bus.disconnect() + cmd_socket.close() + video_socket.close() + context.term() diff --git a/lerobot/common/robot_devices/robots/manipulator.py b/lerobot/common/robot_devices/robots/manipulator.py index e7f7cbb1..62e5416e 100644 --- a/lerobot/common/robot_devices/robots/manipulator.py +++ b/lerobot/common/robot_devices/robots/manipulator.py @@ -44,7 +44,7 @@ class ManipulatorRobot: # TODO(rcadene): Implement force feedback """This class allows to control any manipulator robot of various number of motors. - Non exaustive list of robots: + Non exhaustive list of robots: - [Koch v1.0](https://github.com/AlexanderKoch-Koch/low_cost_robot), with and without the wrist-to-elbow expansion, developed by Alexander Koch from [Tau Robotics](https://tau-robotics.com) - [Koch v1.1](https://github.com/jess-moss/koch-v1-1) developed by Jess Moss @@ -55,7 +55,7 @@ class ManipulatorRobot: robot = ManipulatorRobot(KochRobotConfig()) ``` - Example of overwritting motors during instantiation: + Example of overwriting motors during instantiation: ```python # Defines how to communicate with the motors of the leader and follower arms leader_arms = { @@ -90,7 +90,7 @@ class ManipulatorRobot: robot = ManipulatorRobot(robot_config) ``` - Example of overwritting cameras during instantiation: + Example of overwriting cameras during instantiation: ```python # Defines how to communicate with 2 cameras connected to the computer. # Here, the webcam of the laptop and the phone (connected in USB to the laptop) @@ -229,7 +229,7 @@ class ManipulatorRobot: if self.robot_type in ["koch", "koch_bimanual", "aloha"]: from lerobot.common.robot_devices.motors.dynamixel import TorqueMode - elif self.robot_type in ["so100", "moss"]: + elif self.robot_type in ["so100", "moss", "lekiwi"]: from lerobot.common.robot_devices.motors.feetech import TorqueMode # We assume that at connection time, arms are in a rest position, and torque can @@ -246,7 +246,7 @@ class ManipulatorRobot: self.set_koch_robot_preset() elif self.robot_type == "aloha": self.set_aloha_robot_preset() - elif self.robot_type in ["so100", "moss"]: + elif self.robot_type in ["so100", "moss", "lekiwi"]: self.set_so100_robot_preset() # Enable torque on all motors of the follower arms @@ -299,7 +299,7 @@ class ManipulatorRobot: calibration = run_arm_calibration(arm, self.robot_type, name, arm_type) - elif self.robot_type in ["so100", "moss"]: + elif self.robot_type in ["so100", "moss", "lekiwi"]: from lerobot.common.robot_devices.robots.feetech_calibration import ( run_arm_manual_calibration, ) @@ -348,7 +348,7 @@ class ManipulatorRobot: set_operating_mode_(self.follower_arms[name]) # Set better PID values to close the gap between recorded states and actions - # TODO(rcadene): Implement an automatic procedure to set optimial PID values for each motor + # TODO(rcadene): Implement an automatic procedure to set optimal PID values for each motor self.follower_arms[name].write("Position_P_Gain", 1500, "elbow_flex") self.follower_arms[name].write("Position_I_Gain", 0, "elbow_flex") self.follower_arms[name].write("Position_D_Gain", 600, "elbow_flex") @@ -500,7 +500,7 @@ class ManipulatorRobot: self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"] self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t - # Populate output dictionnaries + # Populate output dictionaries obs_dict, action_dict = {}, {} obs_dict["observation.state"] = state action_dict["action"] = action @@ -540,7 +540,7 @@ class ManipulatorRobot: self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"] self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t - # Populate output dictionnaries and format to pytorch + # Populate output dictionaries and format to pytorch obs_dict = {} obs_dict["observation.state"] = state for name in self.cameras: diff --git a/lerobot/common/robot_devices/robots/mobile_manipulator.py b/lerobot/common/robot_devices/robots/mobile_manipulator.py new file mode 100644 index 00000000..b20c61f7 --- /dev/null +++ b/lerobot/common/robot_devices/robots/mobile_manipulator.py @@ -0,0 +1,691 @@ +import base64 +import json +import os +import sys +from pathlib import Path + +import cv2 +import numpy as np +import torch +import zmq + +from lerobot.common.robot_devices.cameras.utils import make_cameras_from_configs +from lerobot.common.robot_devices.motors.feetech import TorqueMode +from lerobot.common.robot_devices.motors.utils import MotorsBus, make_motors_buses_from_configs +from lerobot.common.robot_devices.robots.configs import LeKiwiRobotConfig +from lerobot.common.robot_devices.robots.feetech_calibration import run_arm_manual_calibration +from lerobot.common.robot_devices.robots.utils import get_arm_id +from lerobot.common.robot_devices.utils import RobotDeviceNotConnectedError + +PYNPUT_AVAILABLE = True +try: + # Only import if there's a valid X server or if we're not on a Pi + if ("DISPLAY" not in os.environ) and ("linux" in sys.platform): + print("No DISPLAY set. Skipping pynput import.") + raise ImportError("pynput blocked intentionally due to no display.") + + from pynput import keyboard +except ImportError: + keyboard = None + PYNPUT_AVAILABLE = False +except Exception as e: + keyboard = None + PYNPUT_AVAILABLE = False + print(f"Could not import pynput: {e}") + + +class MobileManipulator: + """ + MobileManipulator is a class for connecting to and controlling a remote mobile manipulator robot. + The robot includes a three omniwheel mobile base and a remote follower arm. + The leader arm is connected locally (on the laptop) and its joint positions are recorded and then + forwarded to the remote follower arm (after applying a safety clamp). + In parallel, keyboard teleoperation is used to generate raw velocity commands for the wheels. + """ + + def __init__(self, config: LeKiwiRobotConfig): + """ + Expected keys in config: + - ip, port, video_port for the remote connection. + - calibration_dir, leader_arms, follower_arms, max_relative_target, etc. + """ + self.robot_type = config.type + self.config = config + self.remote_ip = config.ip + self.remote_port = config.port + self.remote_port_video = config.video_port + self.calibration_dir = Path(self.config.calibration_dir) + self.logs = {} + + self.teleop_keys = self.config.teleop_keys + + # For teleoperation, the leader arm (local) is used to record the desired arm pose. + self.leader_arms = make_motors_buses_from_configs(self.config.leader_arms) + + self.follower_arms = make_motors_buses_from_configs(self.config.follower_arms) + + self.cameras = make_cameras_from_configs(self.config.cameras) + + self.is_connected = False + + self.last_frames = {} + self.last_present_speed = {} + self.last_remote_arm_state = torch.zeros(6, dtype=torch.float32) + + # Define three speed levels and a current index + self.speed_levels = [ + {"xy": 0.1, "theta": 30}, # slow + {"xy": 0.2, "theta": 60}, # medium + {"xy": 0.3, "theta": 90}, # fast + ] + self.speed_index = 0 # Start at slow + + # ZeroMQ context and sockets. + self.context = None + self.cmd_socket = None + self.video_socket = None + + # Keyboard state for base teleoperation. + self.running = True + self.pressed_keys = { + "forward": False, + "backward": False, + "left": False, + "right": False, + "rotate_left": False, + "rotate_right": False, + } + + if PYNPUT_AVAILABLE: + print("pynput is available - enabling local keyboard listener.") + self.listener = keyboard.Listener( + on_press=self.on_press, + on_release=self.on_release, + ) + self.listener.start() + else: + print("pynput not available - skipping local keyboard listener.") + self.listener = None + + def get_motor_names(self, arms: dict[str, MotorsBus]) -> list: + return [f"{arm}_{motor}" for arm, bus in arms.items() for motor in bus.motors] + + @property + def camera_features(self) -> dict: + cam_ft = {} + for cam_key, cam in self.cameras.items(): + key = f"observation.images.{cam_key}" + cam_ft[key] = { + "shape": (cam.height, cam.width, cam.channels), + "names": ["height", "width", "channels"], + "info": None, + } + return cam_ft + + @property + def motor_features(self) -> dict: + follower_arm_names = [ + "shoulder_pan", + "shoulder_lift", + "elbow_flex", + "wrist_flex", + "wrist_roll", + "gripper", + ] + observations = ["x_mm", "y_mm", "theta"] + combined_names = follower_arm_names + observations + return { + "action": { + "dtype": "float32", + "shape": (len(combined_names),), + "names": combined_names, + }, + "observation.state": { + "dtype": "float32", + "shape": (len(combined_names),), + "names": combined_names, + }, + } + + @property + def features(self): + return {**self.motor_features, **self.camera_features} + + @property + def has_camera(self): + return len(self.cameras) > 0 + + @property + def num_cameras(self): + return len(self.cameras) + + @property + def available_arms(self): + available = [] + for name in self.leader_arms: + available.append(get_arm_id(name, "leader")) + for name in self.follower_arms: + available.append(get_arm_id(name, "follower")) + return available + + def on_press(self, key): + try: + # Movement + if key.char == self.teleop_keys["forward"]: + self.pressed_keys["forward"] = True + elif key.char == self.teleop_keys["backward"]: + self.pressed_keys["backward"] = True + elif key.char == self.teleop_keys["left"]: + self.pressed_keys["left"] = True + elif key.char == self.teleop_keys["right"]: + self.pressed_keys["right"] = True + elif key.char == self.teleop_keys["rotate_left"]: + self.pressed_keys["rotate_left"] = True + elif key.char == self.teleop_keys["rotate_right"]: + self.pressed_keys["rotate_right"] = True + + # Quit teleoperation + elif key.char == self.teleop_keys["quit"]: + self.running = False + return False + + # Speed control + elif key.char == self.teleop_keys["speed_up"]: + self.speed_index = min(self.speed_index + 1, 2) + print(f"Speed index increased to {self.speed_index}") + elif key.char == self.teleop_keys["speed_down"]: + self.speed_index = max(self.speed_index - 1, 0) + print(f"Speed index decreased to {self.speed_index}") + + except AttributeError: + # e.g., if key is special like Key.esc + if key == keyboard.Key.esc: + self.running = False + return False + + def on_release(self, key): + try: + if hasattr(key, "char"): + if key.char == self.teleop_keys["forward"]: + self.pressed_keys["forward"] = False + elif key.char == self.teleop_keys["backward"]: + self.pressed_keys["backward"] = False + elif key.char == self.teleop_keys["left"]: + self.pressed_keys["left"] = False + elif key.char == self.teleop_keys["right"]: + self.pressed_keys["right"] = False + elif key.char == self.teleop_keys["rotate_left"]: + self.pressed_keys["rotate_left"] = False + elif key.char == self.teleop_keys["rotate_right"]: + self.pressed_keys["rotate_right"] = False + except AttributeError: + pass + + def connect(self): + if not self.leader_arms: + raise ValueError("MobileManipulator has no leader arm to connect.") + for name in self.leader_arms: + print(f"Connecting {name} leader arm.") + self.calibrate_leader() + + # Set up ZeroMQ sockets to communicate with the remote mobile robot. + self.context = zmq.Context() + self.cmd_socket = self.context.socket(zmq.PUSH) + connection_string = f"tcp://{self.remote_ip}:{self.remote_port}" + self.cmd_socket.connect(connection_string) + self.cmd_socket.setsockopt(zmq.CONFLATE, 1) + self.video_socket = self.context.socket(zmq.PULL) + video_connection = f"tcp://{self.remote_ip}:{self.remote_port_video}" + self.video_socket.connect(video_connection) + self.video_socket.setsockopt(zmq.CONFLATE, 1) + print( + f"[INFO] Connected to remote robot at {connection_string} and video stream at {video_connection}." + ) + self.is_connected = True + + def load_or_run_calibration_(self, name, arm, arm_type): + arm_id = get_arm_id(name, arm_type) + arm_calib_path = self.calibration_dir / f"{arm_id}.json" + + if arm_calib_path.exists(): + with open(arm_calib_path) as f: + calibration = json.load(f) + else: + print(f"Missing calibration file '{arm_calib_path}'") + calibration = run_arm_manual_calibration(arm, self.robot_type, name, arm_type) + print(f"Calibration is done! Saving calibration file '{arm_calib_path}'") + arm_calib_path.parent.mkdir(parents=True, exist_ok=True) + with open(arm_calib_path, "w") as f: + json.dump(calibration, f) + + return calibration + + def calibrate_leader(self): + for name, arm in self.leader_arms.items(): + # Connect the bus + arm.connect() + + # Disable torque on all motors + for motor_id in arm.motors: + arm.write("Torque_Enable", TorqueMode.DISABLED.value, motor_id) + + # Now run calibration + calibration = self.load_or_run_calibration_(name, arm, "leader") + arm.set_calibration(calibration) + + def calibrate_follower(self): + for name, bus in self.follower_arms.items(): + bus.connect() + + # Disable torque on all motors + for motor_id in bus.motors: + bus.write("Torque_Enable", 0, motor_id) + + # Then filter out wheels + arm_only_dict = {k: v for k, v in bus.motors.items() if not k.startswith("wheel_")} + if not arm_only_dict: + continue + + original_motors = bus.motors + bus.motors = arm_only_dict + + calibration = self.load_or_run_calibration_(name, bus, "follower") + bus.set_calibration(calibration) + + bus.motors = original_motors + + def _get_data(self): + """ + Polls the video socket for up to 15 ms. If data arrives, decode only + the *latest* message, returning frames, speed, and arm state. If + nothing arrives for any field, use the last known values. + """ + frames = {} + present_speed = {} + remote_arm_state_tensor = torch.zeros(6, dtype=torch.float32) + + # Poll up to 15 ms + poller = zmq.Poller() + poller.register(self.video_socket, zmq.POLLIN) + socks = dict(poller.poll(15)) + if self.video_socket not in socks or socks[self.video_socket] != zmq.POLLIN: + # No new data arrived → reuse ALL old data + return (self.last_frames, self.last_present_speed, self.last_remote_arm_state) + + # Drain all messages, keep only the last + last_msg = None + while True: + try: + obs_string = self.video_socket.recv_string(zmq.NOBLOCK) + last_msg = obs_string + except zmq.Again: + break + + if not last_msg: + # No new message → also reuse old + return (self.last_frames, self.last_present_speed, self.last_remote_arm_state) + + # Decode only the final message + try: + observation = json.loads(last_msg) + + images_dict = observation.get("images", {}) + new_speed = observation.get("present_speed", {}) + new_arm_state = observation.get("follower_arm_state", None) + + # Convert images + for cam_name, image_b64 in images_dict.items(): + if image_b64: + jpg_data = base64.b64decode(image_b64) + np_arr = np.frombuffer(jpg_data, dtype=np.uint8) + frame_candidate = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) + if frame_candidate is not None: + frames[cam_name] = frame_candidate + + # If remote_arm_state is None and frames is None there is no message then use the previous message + if new_arm_state is not None and frames is not None: + self.last_frames = frames + + remote_arm_state_tensor = torch.tensor(new_arm_state, dtype=torch.float32) + self.last_remote_arm_state = remote_arm_state_tensor + + present_speed = new_speed + self.last_present_speed = new_speed + else: + frames = self.last_frames + + remote_arm_state_tensor = self.last_remote_arm_state + + present_speed = self.last_present_speed + + except Exception as e: + print(f"[DEBUG] Error decoding video message: {e}") + # If decode fails, fall back to old data + return (self.last_frames, self.last_present_speed, self.last_remote_arm_state) + + return frames, present_speed, remote_arm_state_tensor + + def _process_present_speed(self, present_speed: dict) -> torch.Tensor: + state_tensor = torch.zeros(3, dtype=torch.int32) + if present_speed: + decoded = {key: MobileManipulator.raw_to_degps(value) for key, value in present_speed.items()} + if "1" in decoded: + state_tensor[0] = decoded["1"] + if "2" in decoded: + state_tensor[1] = decoded["2"] + if "3" in decoded: + state_tensor[2] = decoded["3"] + return state_tensor + + def teleop_step( + self, record_data: bool = False + ) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: + if not self.is_connected: + raise RobotDeviceNotConnectedError("MobileManipulator is not connected. Run `connect()` first.") + + speed_setting = self.speed_levels[self.speed_index] + xy_speed = speed_setting["xy"] # e.g. 0.1, 0.25, or 0.4 + theta_speed = speed_setting["theta"] # e.g. 30, 60, or 90 + + # Prepare to assign the position of the leader to the follower + arm_positions = [] + for name in self.leader_arms: + pos = self.leader_arms[name].read("Present_Position") + pos_tensor = torch.from_numpy(pos).float() + # Instead of pos_tensor.item(), use tolist() to convert the entire tensor to a list + arm_positions.extend(pos_tensor.tolist()) + + # (The rest of your code for generating wheel commands remains unchanged) + x_cmd = 0.0 # m/s forward/backward + y_cmd = 0.0 # m/s lateral + theta_cmd = 0.0 # deg/s rotation + if self.pressed_keys["forward"]: + x_cmd += xy_speed + if self.pressed_keys["backward"]: + x_cmd -= xy_speed + if self.pressed_keys["left"]: + y_cmd += xy_speed + if self.pressed_keys["right"]: + y_cmd -= xy_speed + if self.pressed_keys["rotate_left"]: + theta_cmd += theta_speed + if self.pressed_keys["rotate_right"]: + theta_cmd -= theta_speed + + wheel_commands = self.body_to_wheel_raw(x_cmd, y_cmd, theta_cmd) + + message = {"raw_velocity": wheel_commands, "arm_positions": arm_positions} + self.cmd_socket.send_string(json.dumps(message)) + + if not record_data: + return + + obs_dict = self.capture_observation() + + arm_state_tensor = torch.tensor(arm_positions, dtype=torch.float32) + + wheel_velocity_tuple = self.wheel_raw_to_body(wheel_commands) + wheel_velocity_mm = ( + wheel_velocity_tuple[0] * 1000.0, + wheel_velocity_tuple[1] * 1000.0, + wheel_velocity_tuple[2], + ) + wheel_tensor = torch.tensor(wheel_velocity_mm, dtype=torch.float32) + action_tensor = torch.cat([arm_state_tensor, wheel_tensor]) + action_dict = {"action": action_tensor} + + return obs_dict, action_dict + + def capture_observation(self) -> dict: + """ + Capture observations from the remote robot: current follower arm positions, + present wheel speeds (converted to body-frame velocities: x, y, theta), + and a camera frame. + """ + if not self.is_connected: + raise RobotDeviceNotConnectedError("Not connected. Run `connect()` first.") + + frames, present_speed, remote_arm_state_tensor = self._get_data() + + body_state = self.wheel_raw_to_body(present_speed) + + body_state_mm = (body_state[0] * 1000.0, body_state[1] * 1000.0, body_state[2]) # Convert x,y to mm/s + wheel_state_tensor = torch.tensor(body_state_mm, dtype=torch.float32) + combined_state_tensor = torch.cat((remote_arm_state_tensor, wheel_state_tensor), dim=0) + + obs_dict = {"observation.state": combined_state_tensor} + + # Loop over each configured camera + for cam_name, cam in self.cameras.items(): + frame = frames.get(cam_name, None) + if frame is None: + # Create a black image using the camera's configured width, height, and channels + frame = np.zeros((cam.height, cam.width, cam.channels), dtype=np.uint8) + obs_dict[f"observation.images.{cam_name}"] = torch.from_numpy(frame) + + return obs_dict + + def send_action(self, action: torch.Tensor) -> torch.Tensor: + if not self.is_connected: + raise RobotDeviceNotConnectedError("Not connected. Run `connect()` first.") + + # Ensure the action tensor has at least 9 elements: + # - First 6: arm positions. + # - Last 3: base commands. + if action.numel() < 9: + # Pad with zeros if there are not enough elements. + padded = torch.zeros(9, dtype=action.dtype) + padded[: action.numel()] = action + action = padded + + # Extract arm and base actions. + arm_actions = action[:6].flatten() + base_actions = action[6:].flatten() + + x_cmd_mm = base_actions[0].item() # mm/s + y_cmd_mm = base_actions[1].item() # mm/s + theta_cmd = base_actions[2].item() # deg/s + + # Convert mm/s to m/s for the kinematics calculations. + x_cmd = x_cmd_mm / 1000.0 # m/s + y_cmd = y_cmd_mm / 1000.0 # m/s + + # Compute wheel commands from body commands. + wheel_commands = self.body_to_wheel_raw(x_cmd, y_cmd, theta_cmd) + + arm_positions_list = arm_actions.tolist() + + message = {"raw_velocity": wheel_commands, "arm_positions": arm_positions_list} + self.cmd_socket.send_string(json.dumps(message)) + + return action + + def print_logs(self): + pass + + def disconnect(self): + if not self.is_connected: + raise RobotDeviceNotConnectedError("Not connected.") + if self.cmd_socket: + stop_cmd = { + "raw_velocity": {"left_wheel": 0, "back_wheel": 0, "right_wheel": 0}, + "arm_positions": {}, + } + self.cmd_socket.send_string(json.dumps(stop_cmd)) + self.cmd_socket.close() + if self.video_socket: + self.video_socket.close() + if self.context: + self.context.term() + if PYNPUT_AVAILABLE: + self.listener.stop() + self.is_connected = False + print("[INFO] Disconnected from remote robot.") + + def __del__(self): + if getattr(self, "is_connected", False): + self.disconnect() + if PYNPUT_AVAILABLE: + self.listener.stop() + + @staticmethod + def degps_to_raw(degps: float) -> int: + steps_per_deg = 4096.0 / 360.0 + speed_in_steps = abs(degps) * steps_per_deg + speed_int = int(round(speed_in_steps)) + if speed_int > 0x7FFF: + speed_int = 0x7FFF + if degps < 0: + return speed_int | 0x8000 + else: + return speed_int & 0x7FFF + + @staticmethod + def raw_to_degps(raw_speed: int) -> float: + steps_per_deg = 4096.0 / 360.0 + magnitude = raw_speed & 0x7FFF + degps = magnitude / steps_per_deg + if raw_speed & 0x8000: + degps = -degps + return degps + + def body_to_wheel_raw( + self, + x_cmd: float, + y_cmd: float, + theta_cmd: float, + wheel_radius: float = 0.05, + base_radius: float = 0.125, + max_raw: int = 3000, + ) -> dict: + """ + Convert desired body-frame velocities into wheel raw commands. + + Parameters: + x_cmd : Linear velocity in x (m/s). + y_cmd : Linear velocity in y (m/s). + theta_cmd : Rotational velocity (deg/s). + wheel_radius: Radius of each wheel (meters). + base_radius : Distance from the center of rotation to each wheel (meters). + max_raw : Maximum allowed raw command (ticks) per wheel. + + Returns: + A dictionary with wheel raw commands: + {"left_wheel": value, "back_wheel": value, "right_wheel": value}. + + Notes: + - Internally, the method converts theta_cmd to rad/s for the kinematics. + - The raw command is computed from the wheels angular speed in deg/s + using degps_to_raw(). If any command exceeds max_raw, all commands + are scaled down proportionally. + """ + # Convert rotational velocity from deg/s to rad/s. + theta_rad = theta_cmd * (np.pi / 180.0) + # Create the body velocity vector [x, y, theta_rad]. + velocity_vector = np.array([x_cmd, y_cmd, theta_rad]) + + # Define the wheel mounting angles with a -90° offset. + angles = np.radians(np.array([240, 120, 0]) - 90) + # Build the kinematic matrix: each row maps body velocities to a wheel’s linear speed. + # The third column (base_radius) accounts for the effect of rotation. + m = np.array([[np.cos(a), np.sin(a), base_radius] for a in angles]) + + # Compute each wheel’s linear speed (m/s) and then its angular speed (rad/s). + wheel_linear_speeds = m.dot(velocity_vector) + wheel_angular_speeds = wheel_linear_speeds / wheel_radius + + # Convert wheel angular speeds from rad/s to deg/s. + wheel_degps = wheel_angular_speeds * (180.0 / np.pi) + + # Scaling + steps_per_deg = 4096.0 / 360.0 + raw_floats = [abs(degps) * steps_per_deg for degps in wheel_degps] + max_raw_computed = max(raw_floats) + if max_raw_computed > max_raw: + scale = max_raw / max_raw_computed + wheel_degps = wheel_degps * scale + + # Convert each wheel’s angular speed (deg/s) to a raw integer. + wheel_raw = [MobileManipulator.degps_to_raw(deg) for deg in wheel_degps] + + return {"left_wheel": wheel_raw[0], "back_wheel": wheel_raw[1], "right_wheel": wheel_raw[2]} + + def wheel_raw_to_body( + self, wheel_raw: dict, wheel_radius: float = 0.05, base_radius: float = 0.125 + ) -> tuple: + """ + Convert wheel raw command feedback back into body-frame velocities. + + Parameters: + wheel_raw : Dictionary with raw wheel commands (keys: "left_wheel", "back_wheel", "right_wheel"). + wheel_radius: Radius of each wheel (meters). + base_radius : Distance from the robot center to each wheel (meters). + + Returns: + A tuple (x_cmd, y_cmd, theta_cmd) where: + x_cmd : Linear velocity in x (m/s). + y_cmd : Linear velocity in y (m/s). + theta_cmd : Rotational velocity in deg/s. + """ + # Extract the raw values in order. + raw_list = [ + int(wheel_raw.get("left_wheel", 0)), + int(wheel_raw.get("back_wheel", 0)), + int(wheel_raw.get("right_wheel", 0)), + ] + + # Convert each raw command back to an angular speed in deg/s. + wheel_degps = np.array([MobileManipulator.raw_to_degps(r) for r in raw_list]) + # Convert from deg/s to rad/s. + wheel_radps = wheel_degps * (np.pi / 180.0) + # Compute each wheel’s linear speed (m/s) from its angular speed. + wheel_linear_speeds = wheel_radps * wheel_radius + + # Define the wheel mounting angles with a -90° offset. + angles = np.radians(np.array([240, 120, 0]) - 90) + m = np.array([[np.cos(a), np.sin(a), base_radius] for a in angles]) + + # Solve the inverse kinematics: body_velocity = M⁻¹ · wheel_linear_speeds. + m_inv = np.linalg.inv(m) + velocity_vector = m_inv.dot(wheel_linear_speeds) + x_cmd, y_cmd, theta_rad = velocity_vector + theta_cmd = theta_rad * (180.0 / np.pi) + return (x_cmd, y_cmd, theta_cmd) + + +class LeKiwi: + def __init__(self, motor_bus): + """ + Initializes the LeKiwi with Feetech motors bus. + """ + self.motor_bus = motor_bus + self.motor_ids = ["left_wheel", "back_wheel", "right_wheel"] + + # Initialize motors in velocity mode. + self.motor_bus.write("Lock", 0) + self.motor_bus.write("Mode", [1, 1, 1], self.motor_ids) + self.motor_bus.write("Lock", 1) + print("Motors set to velocity mode.") + + def read_velocity(self): + """ + Reads the raw speeds for all wheels. Returns a dictionary with motor names: + """ + raw_speeds = self.motor_bus.read("Present_Speed", self.motor_ids) + return { + "left_wheel": int(raw_speeds[0]), + "back_wheel": int(raw_speeds[1]), + "right_wheel": int(raw_speeds[2]), + } + + def set_velocity(self, command_speeds): + """ + Sends raw velocity commands (16-bit encoded values) directly to the motor bus. + The order of speeds must correspond to self.motor_ids. + """ + self.motor_bus.write("Goal_Speed", command_speeds, self.motor_ids) + + def stop(self): + """Stops the robot by setting all motor speeds to zero.""" + self.motor_bus.write("Goal_Speed", [0, 0, 0], self.motor_ids) + print("Motors stopped.") diff --git a/lerobot/common/robot_devices/robots/stretch.py b/lerobot/common/robot_devices/robots/stretch.py index b63bf941..9cfe6e49 100644 --- a/lerobot/common/robot_devices/robots/stretch.py +++ b/lerobot/common/robot_devices/robots/stretch.py @@ -108,7 +108,7 @@ class StretchRobot(StretchAPI): self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"] self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t - # Populate output dictionnaries + # Populate output dictionaries obs_dict, action_dict = {}, {} obs_dict["observation.state"] = state action_dict["action"] = action @@ -153,7 +153,7 @@ class StretchRobot(StretchAPI): self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"] self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t - # Populate output dictionnaries + # Populate output dictionaries obs_dict = {} obs_dict["observation.state"] = state for name in self.cameras: diff --git a/lerobot/common/robot_devices/robots/utils.py b/lerobot/common/robot_devices/robots/utils.py index 86ff6473..47e2519b 100644 --- a/lerobot/common/robot_devices/robots/utils.py +++ b/lerobot/common/robot_devices/robots/utils.py @@ -4,6 +4,7 @@ from lerobot.common.robot_devices.robots.configs import ( AlohaRobotConfig, KochBimanualRobotConfig, KochRobotConfig, + LeKiwiRobotConfig, ManipulatorRobotConfig, MossRobotConfig, RobotConfig, @@ -45,6 +46,8 @@ def make_robot_config(robot_type: str, **kwargs) -> RobotConfig: return So100RobotConfig(**kwargs) elif robot_type == "stretch": return StretchRobotConfig(**kwargs) + elif robot_type == "lekiwi": + return LeKiwiRobotConfig(**kwargs) else: raise ValueError(f"Robot type '{robot_type}' is not available.") @@ -54,6 +57,10 @@ def make_robot_from_config(config: RobotConfig): from lerobot.common.robot_devices.robots.manipulator import ManipulatorRobot return ManipulatorRobot(config) + elif isinstance(config, LeKiwiRobotConfig): + from lerobot.common.robot_devices.robots.mobile_manipulator import MobileManipulator + + return MobileManipulator(config) else: from lerobot.common.robot_devices.robots.stretch import StretchRobot diff --git a/lerobot/common/utils/utils.py b/lerobot/common/utils/utils.py index 0d9631b0..3863443d 100644 --- a/lerobot/common/utils/utils.py +++ b/lerobot/common/utils/utils.py @@ -17,6 +17,7 @@ import logging import os import os.path as osp import platform +import subprocess from copy import copy from datetime import datetime, timezone from pathlib import Path @@ -165,23 +166,31 @@ def capture_timestamp_utc(): def say(text, blocking=False): - # Check if mac, linux, or windows. - if platform.system() == "Darwin": - cmd = f'say "{text}"' - if not blocking: - cmd += " &" - elif platform.system() == "Linux": - cmd = f'spd-say "{text}"' - if blocking: - cmd += " --wait" - elif platform.system() == "Windows": - # TODO(rcadene): Make blocking option work for Windows - cmd = ( - 'PowerShell -Command "Add-Type -AssemblyName System.Speech; ' - f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')\"" - ) + system = platform.system() - os.system(cmd) + if system == "Darwin": + cmd = ["say", text] + + elif system == "Linux": + cmd = ["spd-say", text] + if blocking: + cmd.append("--wait") + + elif system == "Windows": + cmd = [ + "PowerShell", + "-Command", + "Add-Type -AssemblyName System.Speech; " + f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')", + ] + + else: + raise RuntimeError("Unsupported operating system for text-to-speech.") + + if blocking: + subprocess.run(cmd, check=True) + else: + subprocess.Popen(cmd, creationflags=subprocess.CREATE_NO_WINDOW if system == "Windows" else 0) def log_say(text, play_sounds, blocking=False): diff --git a/lerobot/configs/default.py b/lerobot/configs/default.py index a5013431..1e7f5819 100644 --- a/lerobot/configs/default.py +++ b/lerobot/configs/default.py @@ -27,8 +27,10 @@ class DatasetConfig: # You may provide a list of datasets here. `train.py` creates them all and concatenates them. Note: only data # keys common between the datasets are kept. Each dataset gets and additional transform that inserts the # "dataset_index" into the returned item. The index mapping is made according to the order in which the - # datsets are provided. + # datasets are provided. repo_id: str + # Root directory where the dataset will be stored (e.g. 'dataset/path'). + root: str | None = None episodes: list[int] | None = None image_transforms: ImageTransformsConfig = field(default_factory=ImageTransformsConfig) revision: str | None = None diff --git a/lerobot/configs/train.py b/lerobot/configs/train.py index 93f6e2a4..464c11f9 100644 --- a/lerobot/configs/train.py +++ b/lerobot/configs/train.py @@ -102,7 +102,7 @@ class TrainPipelineConfig(HubMixin): if not self.resume and isinstance(self.output_dir, Path) and self.output_dir.is_dir(): raise FileExistsError( - f"Output directory {self.output_dir} alreay exists and resume is {self.resume}. " + f"Output directory {self.output_dir} already exists and resume is {self.resume}. " f"Please change your output directory so that {self.output_dir} is not overwritten." ) elif not self.output_dir: diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index e6103271..ab5d0e8a 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -77,6 +77,13 @@ python lerobot/scripts/control_robot.py record \ --control.reset_time_s=10 ``` +- For remote controlled robots like LeKiwi, run this script on the robot edge device (e.g. RaspBerryPi): +```bash +python lerobot/scripts/control_robot.py \ + --robot.type=lekiwi \ + --control.type=remote_robot +``` + **NOTE**: You can use your keyboard to control data recording flow. - Tap right arrow key '->' to early exit while recording an episode and go to resseting the environment. - Tap right arrow key '->' to early exit while resetting the environment and got to recording the next episode. @@ -126,6 +133,7 @@ from lerobot.common.robot_devices.control_configs import ( CalibrateControlConfig, ControlPipelineConfig, RecordControlConfig, + RemoteRobotConfig, ReplayControlConfig, TeleoperateControlConfig, ) @@ -187,6 +195,16 @@ def calibrate(robot: Robot, cfg: CalibrateControlConfig): if robot.is_connected: robot.disconnect() + if robot.robot_type.startswith("lekiwi") and "main_follower" in arms: + print("Calibrating only the lekiwi follower arm 'main_follower'...") + robot.calibrate_follower() + return + + if robot.robot_type.startswith("lekiwi") and "main_leader" in arms: + print("Calibrating only the lekiwi leader arm 'main_leader'...") + robot.calibrate_leader() + return + # Calling `connect` automatically runs calibration # when the calibration file is missing robot.connect() @@ -281,7 +299,7 @@ def record( (recorded_episodes < cfg.num_episodes - 1) or events["rerecord_episode"] ): log_say("Reset the environment", cfg.play_sounds) - reset_environment(robot, events, cfg.reset_time_s) + reset_environment(robot, events, cfg.reset_time_s, cfg.fps) if events["rerecord_episode"]: log_say("Re-record episode", cfg.play_sounds) @@ -349,6 +367,10 @@ def control_robot(cfg: ControlPipelineConfig): record(robot, cfg.control) elif isinstance(cfg.control, ReplayControlConfig): replay(robot, cfg.control) + elif isinstance(cfg.control, RemoteRobotConfig): + from lerobot.common.robot_devices.robots.lekiwi_remote import run_lekiwi + + run_lekiwi(cfg.robot) if robot.is_connected: # Disconnect manually to avoid a "Core dump" during process diff --git a/lerobot/scripts/control_sim_robot.py b/lerobot/scripts/control_sim_robot.py index 8d69bf31..49a88d14 100644 --- a/lerobot/scripts/control_sim_robot.py +++ b/lerobot/scripts/control_sim_robot.py @@ -59,8 +59,8 @@ python lerobot/scripts/control_sim_robot.py record \ ``` **NOTE**: You can use your keyboard to control data recording flow. -- Tap right arrow key '->' to early exit while recording an episode and go to reseting the environment. -- Tap right arrow key '->' to early exit while reseting the environment and got to recording the next episode. +- Tap right arrow key '->' to early exit while recording an episode and go to resetting the environment. +- Tap right arrow key '->' to early exit while resetting the environment and got to recording the next episode. - Tap left arrow key '<-' to early exit and re-record the current episode. - Tap escape key 'esc' to stop the data recording. This might require a sudo permission to allow your terminal to monitor keyboard events. @@ -131,7 +131,7 @@ def none_or_int(value): def init_sim_calibration(robot, cfg): # Constants necessary for transforming the joint pos of the real robot to the sim - # depending on the robot discription used in that sim. + # depending on the robot description used in that sim. start_pos = np.array(robot.leader_arms.main.calibration["start_pos"]) axis_directions = np.array(cfg.get("axis_directions", [1])) offsets = np.array(cfg.get("offsets", [0])) * np.pi @@ -445,7 +445,7 @@ if __name__ == "__main__": type=int, default=0, help=( - "Number of subprocesses handling the saving of frames as PNGs. Set to 0 to use threads only; " + "Number of subprocesses handling the saving of frames as PNG. Set to 0 to use threads only; " "set to ≥1 to use subprocesses, each using threads to write images. The best number of processes " "and threads depends on your system. We recommend 4 threads per camera with 0 processes. " "If fps is unstable, adjust the thread count. If still unstable, try using 1 or more subprocesses." diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py index a4f79afc..47225993 100644 --- a/lerobot/scripts/eval.py +++ b/lerobot/scripts/eval.py @@ -454,7 +454,7 @@ def _compile_episode_data( @parser.wrap() -def eval(cfg: EvalPipelineConfig): +def eval_main(cfg: EvalPipelineConfig): logging.info(pformat(asdict(cfg))) # Check device is available @@ -499,4 +499,4 @@ def eval(cfg: EvalPipelineConfig): if __name__ == "__main__": init_logging() - eval() + eval_main() diff --git a/lerobot/scripts/push_dataset_to_hub.py b/lerobot/scripts/push_dataset_to_hub.py index 0233ede6..3de2462b 100644 --- a/lerobot/scripts/push_dataset_to_hub.py +++ b/lerobot/scripts/push_dataset_to_hub.py @@ -175,7 +175,7 @@ def push_dataset_to_hub( # Robustify when `local_dir` is str instead of Path local_dir = Path(local_dir) - # Send warning if local_dir isn't well formated + # Send warning if local_dir isn't well formatted if local_dir.parts[-2] != user_id or local_dir.parts[-1] != dataset_id: warnings.warn( f"`local_dir` ({local_dir}) doesn't contain a community or user id `/` the name of the dataset that match the `repo_id` (e.g. 'data/lerobot/pusht'). Following this naming convention is advised, but not mandatory.", diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index f3c57fe2..e36c697a 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -72,7 +72,7 @@ def update_policy( # TODO(rcadene): policy.unnormalize_outputs(out_dict) grad_scaler.scale(loss).backward() - # Unscale the graident of the optimzer's assigned params in-place **prior to gradient clipping**. + # Unscale the gradient of the optimizer's assigned params in-place **prior to gradient clipping**. grad_scaler.unscale_(optimizer) grad_norm = torch.nn.utils.clip_grad_norm_( diff --git a/lerobot/scripts/visualize_dataset_html.py b/lerobot/scripts/visualize_dataset_html.py index a022c91e..a6899ce9 100644 --- a/lerobot/scripts/visualize_dataset_html.py +++ b/lerobot/scripts/visualize_dataset_html.py @@ -150,7 +150,7 @@ def run_server( 400, ) dataset_version = ( - dataset.meta._version if isinstance(dataset, LeRobotDataset) else dataset.codebase_version + str(dataset.meta._version) if isinstance(dataset, LeRobotDataset) else dataset.codebase_version ) match = re.search(r"v(\d+)\.", dataset_version) if match: @@ -194,7 +194,7 @@ def run_server( ] response = requests.get( - f"https://huggingface.co/datasets/{repo_id}/resolve/main/meta/episodes.jsonl" + f"https://huggingface.co/datasets/{repo_id}/resolve/main/meta/episodes.jsonl", timeout=5 ) response.raise_for_status() # Split into lines and parse each line as JSON @@ -245,16 +245,17 @@ def get_episode_data(dataset: LeRobotDataset | IterableNamespace, episode_index) if isinstance(dataset, LeRobotDataset) else dataset.features[column_name].shape[0] ) - header += [f"{column_name}_{i}" for i in range(dim_state)] if "names" in dataset.features[column_name] and dataset.features[column_name]["names"]: column_names = dataset.features[column_name]["names"] while not isinstance(column_names, list): column_names = list(column_names.values())[0] else: - column_names = [f"motor_{i}" for i in range(dim_state)] + column_names = [f"{column_name}_{i}" for i in range(dim_state)] columns.append({"key": column_name, "value": column_names}) + header += column_names + selected_columns.insert(0, "timestamp") if isinstance(dataset, LeRobotDataset): @@ -317,7 +318,9 @@ def get_episode_language_instruction(dataset: LeRobotDataset, ep_index: int) -> def get_dataset_info(repo_id: str) -> IterableNamespace: - response = requests.get(f"https://huggingface.co/datasets/{repo_id}/resolve/main/meta/info.json") + response = requests.get( + f"https://huggingface.co/datasets/{repo_id}/resolve/main/meta/info.json", timeout=5 + ) response.raise_for_status() # Raises an HTTPError for bad responses dataset_info = response.json() dataset_info["repo_id"] = repo_id @@ -364,7 +367,7 @@ def visualize_dataset_html( template_folder=template_dir, ) else: - # Create a simlink from the dataset video folder containg mp4 files to the output directory + # Create a simlink from the dataset video folder containing mp4 files to the output directory # so that the http server can get access to the mp4 files. if isinstance(dataset, LeRobotDataset): ln_videos_dir = static_dir / "videos" diff --git a/lerobot/templates/visualize_dataset_template.html b/lerobot/templates/visualize_dataset_template.html index 08de3e3d..96ef28d3 100644 --- a/lerobot/templates/visualize_dataset_template.html +++ b/lerobot/templates/visualize_dataset_template.html @@ -14,21 +14,7 @@ - +
+ -
- {% for episode in episodes %} -

- - {{ episode }} - -

- {% endfor %} +
+ +
+ +
+
@@ -230,14 +246,16 @@
-