Merge remote-tracking branch 'origin/user/rcadene/2024_10_12_refactor_record' into user/rcadene/2024_09_18_async_inference

2024-10-16 12:20:19 +02:00
parent cd45cb046d 29f6abc997
commit 826744be71
48 changed files with 7400 additions and 2380 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -65,7 +65,6 @@ htmlcov/
 .nox/
 .coverage
 .coverage.*
-.cache
 nosetests.xml
 coverage.xml
 *.cover
@@ -73,6 +72,11 @@ coverage.xml
 .hypothesis/
 .pytest_cache/

+# Ignore .cache except calibration
+.cache/*
+!.cache/calibration/
+!.cache/calibration/**
+
 # Translations
 *.mo
 *.pot
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -11,6 +11,7 @@ on:
      - ".github/**"
      - "poetry.lock"
      - "Makefile"
+      - ".cache/**"
  push:
    branches:
      - main
@@ -21,6 +22,7 @@ on:
      - ".github/**"
      - "poetry.lock"
      - "Makefile"
+      - ".cache/**"

 jobs:
  pytest:
@@ -35,13 +37,17 @@ jobs:
          lfs: true  # Ensure LFS files are pulled

      - name: Install apt dependencies
-        run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev ffmpeg
+      # portaudio19-dev is needed to install pyaudio
+        run: |
+          sudo apt-get update && \
+          sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev

      - name: Install poetry
        run: |
          pipx install poetry && poetry config virtualenvs.in-project true
          echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH

+      # TODO(rcadene, aliberts): python 3.12 seems to be used in the tests, not python 3.10
      - name: Set up Python 3.10
        uses: actions/setup-python@v5
        with:
@@ -60,7 +66,6 @@ jobs:
            -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
            && rm -rf tests/outputs outputs

-
  pytest-minimal:
    name: Pytest (minimal install)
    runs-on: ubuntu-latest
@@ -80,6 +85,7 @@ jobs:
          pipx install poetry && poetry config virtualenvs.in-project true
          echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH

+      # TODO(rcadene, aliberts): python 3.12 seems to be used in the tests, not python 3.10
      - name: Set up Python 3.10
        uses: actions/setup-python@v5
        with:
@@ -110,7 +116,10 @@ jobs:
          lfs: true  # Ensure LFS files are pulled

      - name: Install apt dependencies
-        run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
+      # portaudio19-dev is needed to install pyaudio
+        run: |
+          sudo apt-get update && \
+          sudo apt-get install -y libegl1-mesa-dev portaudio19-dev

      - name: Install poetry
        run: |
--- a/examples/7_get_started_with_real_robot.md
+++ b/examples/7_get_started_with_real_robot.md
@@ -45,7 +45,7 @@ poetry install --sync --extras "dynamixel"
 ```bash
 conda install -c conda-forge ffmpeg
 pip uninstall opencv-python
-conda install -c conda-forge opencv>=4.10.0
+conda install -c conda-forge "opencv>=4.10.0"
 ```

 You are now ready to plug the 5V power supply to the motor bus of the leader arm (the smaller one) since all its motors only require 5V.
--- a/examples/8_use_stretch.md
+++ b/examples/8_use_stretch.md
@@ -0,0 +1,158 @@
+This tutorial explains how to use [Stretch 3](https://hello-robot.com/stretch-3-product) with LeRobot.
+
+## Setup
+
+Familiarize yourself with Stretch by following its [tutorials](https://docs.hello-robot.com/0.3/getting_started/hello_robot/) (recommended).
+
+To use LeRobot on Stretch, 3 options are available:
+- [tethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#tethered-setup)
+- [untethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#untethered-setup)
+- ssh directly into Stretch (you will first need to install and configure openssh-server on stretch using one of the two above setups)
+
+
+## Install LeRobot
+
+On Stretch's CLI, follow these steps:
+
+1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install):
+```bash
+mkdir -p ~/miniconda3
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
+bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+rm ~/miniconda3/miniconda.sh
+~/miniconda3/bin/conda init bash
+```
+
+2. Comment out these lines in `~/.profile` (this can mess up paths used by conda and ~/.local/bin should already be in your PATH)
+```
+# set PATH so it includes user's private bin if it exists
+if [ -d "$HOME/.local/bin" ] ; then
+    PATH="$HOME/.local/bin:$PATH"
+fi
+```
+
+3. Restart shell or `source ~/.bashrc`
+
+4. Create and activate a fresh conda environment for lerobot
+```bash
+conda create -y -n lerobot python=3.10 && conda activate lerobot
+```
+
+5. Clone LeRobot:
+```bash
+git clone https://github.com/huggingface/lerobot.git ~/lerobot
+```
+
+6. Install LeRobot with stretch dependencies:
+```bash
+cd ~/lerobot && pip install -e ".[stretch]"
+```
+
+> **Note:** If you get this message, you can ignore it: `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed.`
+
+And install extra dependencies for recording datasets on Linux:
+```bash
+conda install -y -c conda-forge ffmpeg
+pip uninstall -y opencv-python
+conda install -y -c conda-forge "opencv>=4.10.0"
+```
+
+7. Run a [system check](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#system-check) to make sure your robot is ready:
+```bash
+stretch_system_check.py
+```
+
+> **Note:** You may need to free the "robot process" after booting Stretch by running `stretch_free_robot_process.py`. For more info this Stretch's [doc](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#turning-off-gamepad-teleoperation).
+
+You should get something like this:
+```bash
+For use with S T R E T C H (R) from Hello Robot Inc.
+---------------------------------------------------------------------
+
+Model = Stretch 3
+Tool = DexWrist 3 w/ Gripper
+Serial Number = stretch-se3-3054
+
+---- Checking Hardware ----
+[Pass] Comms are ready
+[Pass] Actuators are ready
+[Warn] Sensors not ready (IMU AZ = -10.19 out of range -10.1 to -9.5)
+[Pass] Battery voltage is 13.6 V
+
+---- Checking Software ----
+[Pass] Ubuntu 22.04 is ready
+[Pass] All APT pkgs are setup correctly
+[Pass] Firmware is up-to-date
+[Pass] Python pkgs are up-to-date
+[Pass] ROS2 Humble is ready
+```
+
+## Teleoperate, record a dataset and run a policy
+
+**Calibrate (Optional)**
+Before operating Stretch, you need to [home](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#homing) it first. Be mindful about giving Stretch some space as this procedure will move the robot's arm and gripper. Now run this command:
+```bash
+python lerobot/scripts/control_robot.py calibrate \
+    --robot-path lerobot/configs/robot/stretch.yaml
+```
+This is equivalent to running `stretch_robot_home.py`
+
+> **Note:** If you run any of the LeRobot scripts below and Stretch is not poperly homed, it will automatically home/calibrate first.
+
+**Teleoperate**
+Before trying teleoperation, you need activate the gamepad controller by pressing the middle button. For more info, see Stretch's [doc](https://docs.hello-robot.com/0.3/getting_started/hello_robot/#gamepad-teleoperation).
+
+Now try out teleoperation (see above documentation to learn about the gamepad controls):
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/stretch.yaml
+```
+This is essentially the same as running `stretch_gamepad_teleop.py`
+
+**Record a dataset**
+Once you're familiar with the gamepad controls and after a bit of practice, you can try to record your first dataset with Stretch.
+
+If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
+```bash
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+```
+
+Store your Hugging Face repository name in a variable to run these commands:
+```bash
+HF_USER=$(huggingface-cli whoami | head -n 1)
+echo $HF_USER
+```
+
+Record one episode:
+```bash
+python lerobot/scripts/control_robot.py record \
+    --robot-path lerobot/configs/robot/stretch.yaml \
+    --fps 20 \
+    --root data \
+    --repo-id ${HF_USER}/stretch_test \
+    --tags stretch tutorial \
+    --warmup-time-s 3 \
+    --episode-time-s 40 \
+    --reset-time-s 10 \
+    --num-episodes 1 \
+    --push-to-hub 0
+```
+
+> **Note:** If you're using ssh to connect to Stretch and run this script, you won't be able to visualize its cameras feed (though they will still be recording). To see the cameras stream, use [tethered](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#tethered-setup) or [untethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#untethered-setup).
+
+**Replay an episode**
+Now try to replay this episode (make sure the robot's initial position is the same):
+```bash
+python lerobot/scripts/control_robot.py replay \
+    --robot-path lerobot/configs/robot/stretch.yaml \
+    --fps 20 \
+    --root data \
+    --repo-id ${HF_USER}/stretch_test \
+    --episode 0
+```
+
+Follow [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) to train a policy on your data and run inference on your robot. You will need to adapt the code for Stretch.
+
+> TODO(rcadene, aliberts): Add already setup environment and policy yaml configuration files
+
+If you need help, please reach out on Discord in the channel `#stretch3-mobile-arm`.
--- a/examples/9_use_aloha.md
+++ b/examples/9_use_aloha.md
@@ -0,0 +1,179 @@
+This tutorial explains how to use [Aloha and Aloha 2 stationary](https://www.trossenrobotics.com/aloha-stationary) with LeRobot.
+
+## Setup
+
+Follow the [documentation from Trossen Robotics](https://docs.trossenrobotics.com/aloha_docs/getting_started/stationary/hardware_setup.html) for setting up the hardware and plugging the 4 arms and 4 cameras to your computer.
+
+
+## Install LeRobot
+
+On your computer:
+
+1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install):
+```bash
+mkdir -p ~/miniconda3
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
+bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+rm ~/miniconda3/miniconda.sh
+~/miniconda3/bin/conda init bash
+```
+
+2. Restart shell or `source ~/.bashrc`
+
+3. Create and activate a fresh conda environment for lerobot
+```bash
+conda create -y -n lerobot python=3.10 && conda activate lerobot
+```
+
+4. Clone LeRobot:
+```bash
+git clone https://github.com/huggingface/lerobot.git ~/lerobot
+```
+
+5. Install LeRobot with dependencies for the Aloha motors (dynamixel) and cameras (intelrealsense):
+```bash
+cd ~/lerobot && pip install -e ".[dynamixel intelrealsense]"
+```
+
+And install extra dependencies for recording datasets on Linux:
+```bash
+conda install -y -c conda-forge ffmpeg
+pip uninstall -y opencv-python
+conda install -y -c conda-forge "opencv>=4.10.0"
+```
+
+## Teleoperate
+
+**/!\ FOR SAFETY, READ THIS /!\**
+Teleoperation consists in manually operating the leader arms to move the follower arms. Importantly:
+1. Make sure your leader arms are in the same position as the follower arms, so that the follower arms don't move too fast to match the leader arms,
+2. Our code assumes that your robot has been assembled following Trossen Robotics instructions. This allows us to skip calibration, as we use the pre-defined calibration files in `.cache/calibration/aloha_default`. If you replace a motor, make sure you follow the exact instructions from Trossen Robotics.
+
+By running the following code, you can start your first **SAFE** teleoperation:
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/aloha.yaml \
+    --robot-overrides max_relative_target=5
+```
+
+By adding `--robot-overrides max_relative_target=5`, we override the default value for `max_relative_target` defined in `lerobot/configs/robot/aloha.yaml`. It is expected to be `5` to limit the magnitude of the movement for more safety, but the teloperation won't be smooth. When you feel confident, you can disable this limit by adding `--robot-overrides max_relative_target=null` to the command line:
+```bash
+python lerobot/scripts/control_robot.py teleoperate \
+    --robot-path lerobot/configs/robot/aloha.yaml \
+    --robot-overrides max_relative_target=null
+```
+
+## Record a dataset
+
+Once you're familiar with teleoperation, you can record your first dataset with Aloha.
+
+If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
+```bash
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+```
+
+Store your Hugging Face repository name in a variable to run these commands:
+```bash
+HF_USER=$(huggingface-cli whoami | head -n 1)
+echo $HF_USER
+```
+
+Record 2 episodes and upload your dataset to the hub:
+```bash
+python lerobot/scripts/control_robot.py record \
+    --robot-path lerobot/configs/robot/aloha.yaml \
+    --robot-overrides max_relative_target=null \
+    --fps 30 \
+    --root data \
+    --repo-id ${HF_USER}/aloha_test \
+    --tags aloha tutorial \
+    --warmup-time-s 5 \
+    --episode-time-s 40 \
+    --reset-time-s 10 \
+    --num-episodes 2 \
+    --push-to-hub 1
+```
+
+## Visualize a dataset
+
+If you uploaded your dataset to the hub with `--push-to-hub 1`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
+```bash
+echo ${HF_USER}/aloha_test
+```
+
+If you didn't upload with `--push-to-hub 0`, you can also visualize it locally with:
+```bash
+python lerobot/scripts/visualize_dataset_html.py \
+  --root data \
+  --repo-id ${HF_USER}/aloha_test
+```
+
+## Replay an episode
+
+**/!\ FOR SAFETY, READ THIS /!\**
+Replay consists in automatically replaying the sequence of actions (i.e. goal positions for your motors) recorded in a given dataset episode. Make sure the current initial position of your robot is similar to the one in your episode, so that your follower arms don't move too fast to go to the first goal positions. For safety, you might want to add `--robot-overrides max_relative_target=5` to your command line as explained above.
+
+Now try to replay the first episode on your robot:
+```bash
+python lerobot/scripts/control_robot.py replay \
+    --robot-path lerobot/configs/robot/aloha.yaml \
+    --robot-overrides max_relative_target=null \
+    --fps 30 \
+    --root data \
+    --repo-id ${HF_USER}/aloha_test \
+    --episode 0
+```
+
+## Train a policy
+
+To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
+```bash
+DATA_DIR=data python lerobot/scripts/train.py \
+  dataset_repo_id=${HF_USER}/aloha_test \
+  policy=act_aloha_real \
+  env=aloha_real \
+  hydra.run.dir=outputs/train/act_aloha_test \
+  hydra.job.name=act_aloha_test \
+  device=cuda \
+  wandb.enable=true
+```
+
+Let's explain it:
+1. We provided the dataset as argument with `dataset_repo_id=${HF_USER}/aloha_test`.
+2. We provided the policy with `policy=act_aloha_real`. This loads configurations from [`lerobot/configs/policy/act_aloha_real.yaml`](../lerobot/configs/policy/act_aloha_real.yaml). Importantly, this policy uses 4 cameras as input `cam_right_wrist`, `cam_left_wrist`, `cam_high`, and `cam_low`.
+3. We provided an environment as argument with `env=aloha_real`. This loads configurations from [`lerobot/configs/env/aloha_real.yaml`](../lerobot/configs/env/aloha_real.yaml). Note: this yaml defines 18 dimensions for the `state_dim` and `action_dim`, corresponding to 18 motors, not 14 motors as used in previous Aloha work. This is because, we include the `shoulder_shadow` and `elbow_shadow` motors for simplicity.
+4. We provided `device=cuda` since we are training on a Nvidia GPU.
+5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
+6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync.
+
+Training should take several hours. You will find checkpoints in `outputs/train/act_aloha_test/checkpoints`.
+
+## Evaluate your policy
+
+You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
+```bash
+python lerobot/scripts/control_robot.py record \
+  --robot-path lerobot/configs/robot/aloha.yaml \
+  --robot-overrides max_relative_target=null \
+  --fps 30 \
+  --root data \
+  --repo-id ${HF_USER}/eval_act_aloha_test \
+  --tags aloha tutorial eval \
+  --warmup-time-s 5 \
+  --episode-time-s 40 \
+  --reset-time-s 10 \
+  --num-episodes 10 \
+  --num-image-writer-processes 1 \
+  -p outputs/train/act_aloha_test/checkpoints/last/pretrained_model
+```
+
+As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
+1. There is an additional `-p` argument which indicates the path to your policy checkpoint with  (e.g. `-p outputs/train/eval_aloha_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `-p ${HF_USER}/act_aloha_test`).
+2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `--repo-id ${HF_USER}/eval_act_aloha_test`).
+3. We use `--num-image-writer-processes 1` instead of the default value (`0`). On our computer, using a dedicated process to write images from the 4 cameras on disk allows to reach constent 30 fps during inference. Feel free to explore different values for `--num-image-writer-processes`.
+
+## More
+
+Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth explaination.
+
+If you have any question or need help, please reach out on Discord in the channel `#aloha-arm`.
--- a/lerobot/init.py
+++ b/lerobot/init.py
@@ -28,6 +28,8 @@ Example:
        print(lerobot.available_policies)
        print(lerobot.available_policies_per_env)
        print(lerobot.available_robots)
+        print(lerobot.available_cameras)
+        print(lerobot.available_motors)
    ```

 When implementing a new dataset loadable with LeRobotDataset follow these steps:
@@ -198,12 +200,25 @@ available_robots = [
    "aloha",
 ]

+# lists all available cameras from `lerobot/common/robot_devices/cameras`
+available_cameras = [
+    "opencv",
+    "intelrealsense",
+]
+
+# lists all available motors from `lerobot/common/robot_devices/motors`
+available_motors = [
+    "dynamixel",
+]
+
 # keys and values refer to yaml files
 available_policies_per_env = {
    "aloha": ["act"],
    "pusht": ["diffusion", "vqbet"],
    "xarm": ["tdmpc"],
-    "dora_aloha_real": ["act_real"],
+    "koch_real": ["act_koch_real"],
+    "aloha_real": ["act_aloha_real"],
+    "dora_aloha_real": ["act_aloha_real"],
 }

 env_task_pairs = [(env, task) for env, tasks in available_tasks_per_env.items() for task in tasks]
--- a/lerobot/common/datasets/compute_stats.py
+++ b/lerobot/common/datasets/compute_stats.py
@@ -68,7 +68,7 @@ def get_stats_einops_patterns(dataset, num_workers=0):
    return stats_patterns


-def compute_stats(dataset, batch_size=32, num_workers=16, max_num_samples=None):
+def compute_stats(dataset, batch_size=8, num_workers=8, max_num_samples=None):
    """Compute mean/std and min/max statistics of all data keys in a LeRobotDataset."""
    if max_num_samples is None:
        max_num_samples = len(dataset)
--- a/lerobot/common/datasets/populate_dataset.py
+++ b/lerobot/common/datasets/populate_dataset.py
@@ -0,0 +1,468 @@
+"""Functions to create an empty dataset, and populate it with frames."""
+# TODO(rcadene, aliberts): to adapt as class methods of next version of LeRobotDataset
+
+import concurrent
+import json
+import logging
+import multiprocessing
+import shutil
+from pathlib import Path
+
+import torch
+import tqdm
+from PIL import Image
+
+from lerobot.common.datasets.compute_stats import compute_stats
+from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
+from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import to_hf_dataset
+from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, get_default_encoding
+from lerobot.common.datasets.utils import calculate_episode_data_index, create_branch
+from lerobot.common.datasets.video_utils import encode_video_frames
+from lerobot.common.utils.utils import log_say
+from lerobot.scripts.push_dataset_to_hub import (
+    push_dataset_card_to_hub,
+    push_meta_data_to_hub,
+    push_videos_to_hub,
+    save_meta_data,
+)
+
+########################################################################################
+# Asynchrounous saving of images on disk
+########################################################################################
+
+
+def safe_stop_image_writer(func):
+    # TODO(aliberts): Allow to pass custom exceptions
+    # (e.g. ThreadServiceExit, KeyboardInterrupt, SystemExit, UnpluggedError, DynamixelCommError)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except Exception as e:
+            image_writer = kwargs.get("dataset", {}).get("image_writer")
+            if image_writer is not None:
+                print("Waiting for image writer to terminate...")
+                stop_image_writer(image_writer, timeout=20)
+            raise e
+
+    return wrapper
+
+
+def save_image(img_tensor, key, frame_index, episode_index, videos_dir: str):
+    img = Image.fromarray(img_tensor.numpy())
+    path = Path(videos_dir) / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
+    path.parent.mkdir(parents=True, exist_ok=True)
+    img.save(str(path), quality=100)
+
+
+def loop_to_save_images_in_threads(image_queue, num_threads):
+    if num_threads < 1:
+        raise NotImplementedError(f"Only `num_threads>=1` is supported for now, but {num_threads=} given.")
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
+        futures = []
+        while True:
+            # Blocks until a frame is available
+            frame_data = image_queue.get()
+
+            # As usually done, exit loop when receiving None to stop the worker
+            if frame_data is None:
+                break
+
+            image, key, frame_index, episode_index, videos_dir = frame_data
+            futures.append(executor.submit(save_image, image, key, frame_index, episode_index, videos_dir))
+
+        # Before exiting function, wait for all threads to complete
+        with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
+            concurrent.futures.wait(futures)
+            progress_bar.update(len(futures))
+
+
+def start_image_writer_processes(image_queue, num_processes, num_threads_per_process):
+    if num_processes < 1:
+        raise ValueError(f"Only `num_processes>=1` is supported, but {num_processes=} given.")
+
+    if num_threads_per_process < 1:
+        raise NotImplementedError(
+            "Only `num_threads_per_process>=1` is supported for now, but {num_threads_per_process=} given."
+        )
+
+    processes = []
+    for _ in range(num_processes):
+        process = multiprocessing.Process(
+            target=loop_to_save_images_in_threads,
+            args=(image_queue, num_threads_per_process),
+        )
+        process.start()
+        processes.append(process)
+    return processes
+
+
+def stop_processes(processes, queue, timeout):
+    # Send None to each process to signal them to stop
+    for _ in processes:
+        queue.put(None)
+
+    # Wait maximum 20 seconds for all processes to terminate
+    for process in processes:
+        process.join(timeout=timeout)
+
+    # If not terminated after 20 seconds, force termination
+    if process.is_alive():
+        process.terminate()
+
+    # Close the queue, no more items can be put in the queue
+    queue.close()
+
+    # Ensure all background queue threads have finished
+    queue.join_thread()
+
+
+def start_image_writer(num_processes, num_threads):
+    """This function abstract away the initialisation of processes or/and threads to
+    save images on disk asynchrounously, which is critical to control a robot and record data
+    at a high frame rate.
+
+    When `num_processes=0`, it returns a dictionary containing a threads pool of size `num_threads`.
+    When `num_processes>0`, it returns a dictionary containing a processes pool of size `num_processes`,
+    where each subprocess starts their own threads pool of size `num_threads`.
+
+    The optimal number of processes and threads depends on your computer capabilities.
+    We advise to use 4 threads per camera with 0 processes. If the fps is not stable, try to increase or lower
+    the number of threads. If it is still not stable, try to use 1 subprocess, or more.
+    """
+    image_writer = {}
+
+    if num_processes == 0:
+        futures = []
+        threads_pool = concurrent.futures.ThreadPoolExecutor(max_workers=num_threads)
+        image_writer["threads_pool"], image_writer["futures"] = threads_pool, futures
+    else:
+        # TODO(rcadene): When using num_processes>1, `multiprocessing.Manager().Queue()`
+        # might be better than `multiprocessing.Queue()`. Source: https://www.geeksforgeeks.org/python-multiprocessing-queue-vs-multiprocessing-manager-queue
+        image_queue = multiprocessing.Queue()
+        processes_pool = start_image_writer_processes(
+            image_queue, num_processes=num_processes, num_threads_per_process=num_threads
+        )
+        image_writer["processes_pool"], image_writer["image_queue"] = processes_pool, image_queue
+
+    return image_writer
+
+
+def async_save_image(image_writer, image, key, frame_index, episode_index, videos_dir):
+    """This function abstract away the saving of an image on disk asynchrounously. It uses a dictionary
+    called image writer which contains either a pool of processes or a pool of threads.
+    """
+    if "threads_pool" in image_writer:
+        threads_pool, futures = image_writer["threads_pool"], image_writer["futures"]
+        futures.append(threads_pool.submit(save_image, image, key, frame_index, episode_index, videos_dir))
+    else:
+        image_queue = image_writer["image_queue"]
+        image_queue.put((image, key, frame_index, episode_index, videos_dir))
+
+
+def stop_image_writer(image_writer, timeout):
+    if "threads_pool" in image_writer:
+        futures = image_writer["futures"]
+        # Before exiting function, wait for all threads to complete
+        with tqdm.tqdm(total=len(futures), desc="Writing images") as progress_bar:
+            concurrent.futures.wait(futures, timeout=timeout)
+            progress_bar.update(len(futures))
+    else:
+        processes_pool, image_queue = image_writer["processes_pool"], image_writer["image_queue"]
+        stop_processes(processes_pool, image_queue, timeout=timeout)
+
+
+########################################################################################
+# Functions to initialize, resume and populate a dataset
+########################################################################################
+
+
+def init_dataset(
+    repo_id,
+    root,
+    force_override,
+    fps,
+    video,
+    write_images,
+    num_image_writer_processes,
+    num_image_writer_threads,
+):
+    local_dir = Path(root) / repo_id
+    if local_dir.exists() and force_override:
+        shutil.rmtree(local_dir)
+
+    episodes_dir = local_dir / "episodes"
+    episodes_dir.mkdir(parents=True, exist_ok=True)
+
+    videos_dir = local_dir / "videos"
+    videos_dir.mkdir(parents=True, exist_ok=True)
+
+    # Logic to resume data recording
+    rec_info_path = episodes_dir / "data_recording_info.json"
+    if rec_info_path.exists():
+        with open(rec_info_path) as f:
+            rec_info = json.load(f)
+        num_episodes = rec_info["last_episode_index"] + 1
+    else:
+        num_episodes = 0
+
+    dataset = {
+        "repo_id": repo_id,
+        "local_dir": local_dir,
+        "videos_dir": videos_dir,
+        "episodes_dir": episodes_dir,
+        "fps": fps,
+        "video": video,
+        "rec_info_path": rec_info_path,
+        "num_episodes": num_episodes,
+    }
+
+    if write_images:
+        # Initialize processes or/and threads dedicated to save images on disk asynchronously,
+        # which is critical to control a robot and record data at a high frame rate.
+        image_writer = start_image_writer(
+            num_processes=num_image_writer_processes,
+            num_threads=num_image_writer_threads,
+        )
+        dataset["image_writer"] = image_writer
+
+    return dataset
+
+
+def add_frame(dataset, observation, action):
+    if "current_episode" not in dataset:
+        # initialize episode dictionary
+        ep_dict = {}
+        for key in observation:
+            if key not in ep_dict:
+                ep_dict[key] = []
+        for key in action:
+            if key not in ep_dict:
+                ep_dict[key] = []
+
+        ep_dict["episode_index"] = []
+        ep_dict["frame_index"] = []
+        ep_dict["timestamp"] = []
+        ep_dict["next.done"] = []
+
+        dataset["current_episode"] = ep_dict
+        dataset["current_frame_index"] = 0
+
+    ep_dict = dataset["current_episode"]
+    episode_index = dataset["num_episodes"]
+    frame_index = dataset["current_frame_index"]
+    videos_dir = dataset["videos_dir"]
+    video = dataset["video"]
+    fps = dataset["fps"]
+
+    ep_dict["episode_index"].append(episode_index)
+    ep_dict["frame_index"].append(frame_index)
+    ep_dict["timestamp"].append(frame_index / fps)
+    ep_dict["next.done"].append(False)
+
+    img_keys = [key for key in observation if "image" in key]
+    non_img_keys = [key for key in observation if "image" not in key]
+
+    # Save all observed modalities except images
+    for key in non_img_keys:
+        ep_dict[key].append(observation[key])
+
+    # Save actions
+    for key in action:
+        ep_dict[key].append(action[key])
+
+    if "image_writer" not in dataset:
+        dataset["current_frame_index"] += 1
+        return
+
+    # Save images
+    image_writer = dataset["image_writer"]
+    for key in img_keys:
+        imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
+        async_save_image(
+            image_writer,
+            image=observation[key],
+            key=key,
+            frame_index=frame_index,
+            episode_index=episode_index,
+            videos_dir=str(videos_dir),
+        )
+
+        if video:
+            fname = f"{key}_episode_{episode_index:06d}.mp4"
+            frame_info = {"path": f"videos/{fname}", "timestamp": frame_index / fps}
+        else:
+            frame_info = str(imgs_dir / f"frame_{frame_index:06d}.png")
+
+        ep_dict[key].append(frame_info)
+
+    dataset["current_frame_index"] += 1
+
+
+def delete_current_episode(dataset):
+    del dataset["current_episode"]
+    del dataset["current_frame_index"]
+
+    # delete temporary images
+    episode_index = dataset["num_episodes"]
+    videos_dir = dataset["videos_dir"]
+    for tmp_imgs_dir in videos_dir.glob(f"*_episode_{episode_index:06d}"):
+        shutil.rmtree(tmp_imgs_dir)
+
+
+def save_current_episode(dataset):
+    episode_index = dataset["num_episodes"]
+    ep_dict = dataset["current_episode"]
+    episodes_dir = dataset["episodes_dir"]
+    rec_info_path = dataset["rec_info_path"]
+
+    ep_dict["next.done"][-1] = True
+
+    for key in ep_dict:
+        if "observation" in key and "image" not in key:
+            ep_dict[key] = torch.stack(ep_dict[key])
+
+    ep_dict["action"] = torch.stack(ep_dict["action"])
+    ep_dict["episode_index"] = torch.tensor(ep_dict["episode_index"])
+    ep_dict["frame_index"] = torch.tensor(ep_dict["frame_index"])
+    ep_dict["timestamp"] = torch.tensor(ep_dict["timestamp"])
+    ep_dict["next.done"] = torch.tensor(ep_dict["next.done"])
+
+    ep_path = episodes_dir / f"episode_{episode_index}.pth"
+    torch.save(ep_dict, ep_path)
+
+    rec_info = {
+        "last_episode_index": episode_index,
+    }
+    with open(rec_info_path, "w") as f:
+        json.dump(rec_info, f)
+
+    # force re-initialization of episode dictionnary during add_frame
+    del dataset["current_episode"]
+
+    dataset["num_episodes"] += 1
+
+
+def encode_videos(dataset, image_keys, play_sounds):
+    log_say("Encoding videos", play_sounds)
+
+    num_episodes = dataset["num_episodes"]
+    videos_dir = dataset["videos_dir"]
+    local_dir = dataset["local_dir"]
+    fps = dataset["fps"]
+
+    # Use ffmpeg to convert frames stored as png into mp4 videos
+    for episode_index in tqdm.tqdm(range(num_episodes)):
+        for key in image_keys:
+            # key = f"observation.images.{name}"
+            tmp_imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
+            fname = f"{key}_episode_{episode_index:06d}.mp4"
+            video_path = local_dir / "videos" / fname
+            if video_path.exists():
+                # Skip if video is already encoded. Could be the case when resuming data recording.
+                continue
+            # note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
+            # since video encoding with ffmpeg is already using multithreading.
+            encode_video_frames(tmp_imgs_dir, video_path, fps, overwrite=True)
+            shutil.rmtree(tmp_imgs_dir)
+
+
+def from_dataset_to_lerobot_dataset(dataset, play_sounds):
+    log_say("Consolidate episodes", play_sounds)
+
+    num_episodes = dataset["num_episodes"]
+    episodes_dir = dataset["episodes_dir"]
+    videos_dir = dataset["videos_dir"]
+    video = dataset["video"]
+    fps = dataset["fps"]
+    repo_id = dataset["repo_id"]
+
+    ep_dicts = []
+    for episode_index in tqdm.tqdm(range(num_episodes)):
+        ep_path = episodes_dir / f"episode_{episode_index}.pth"
+        ep_dict = torch.load(ep_path)
+        ep_dicts.append(ep_dict)
+    data_dict = concatenate_episodes(ep_dicts)
+
+    if video:
+        image_keys = [key for key in data_dict if "image" in key]
+        encode_videos(dataset, image_keys, play_sounds)
+
+    hf_dataset = to_hf_dataset(data_dict, video)
+    episode_data_index = calculate_episode_data_index(hf_dataset)
+
+    info = {
+        "codebase_version": CODEBASE_VERSION,
+        "fps": fps,
+        "video": video,
+    }
+    if video:
+        info["encoding"] = get_default_encoding()
+
+    lerobot_dataset = LeRobotDataset.from_preloaded(
+        repo_id=repo_id,
+        hf_dataset=hf_dataset,
+        episode_data_index=episode_data_index,
+        info=info,
+        videos_dir=videos_dir,
+    )
+
+    return lerobot_dataset
+
+
+def save_lerobot_dataset_on_disk(lerobot_dataset):
+    hf_dataset = lerobot_dataset.hf_dataset
+    info = lerobot_dataset.info
+    stats = lerobot_dataset.stats
+    episode_data_index = lerobot_dataset.episode_data_index
+    local_dir = lerobot_dataset.videos_dir.parent
+    meta_data_dir = local_dir / "meta_data"
+
+    hf_dataset = hf_dataset.with_format(None)  # to remove transforms that cant be saved
+    hf_dataset.save_to_disk(str(local_dir / "train"))
+
+    save_meta_data(info, stats, episode_data_index, meta_data_dir)
+
+
+def push_lerobot_dataset_to_hub(lerobot_dataset, tags):
+    hf_dataset = lerobot_dataset.hf_dataset
+    local_dir = lerobot_dataset.videos_dir.parent
+    videos_dir = lerobot_dataset.videos_dir
+    repo_id = lerobot_dataset.repo_id
+    video = lerobot_dataset.video
+    meta_data_dir = local_dir / "meta_data"
+
+    if not (local_dir / "train").exists():
+        raise ValueError(
+            "You need to run `save_lerobot_dataset_on_disk(lerobot_dataset)` before pushing to the hub."
+        )
+
+    hf_dataset.push_to_hub(repo_id, revision="main")
+    push_meta_data_to_hub(repo_id, meta_data_dir, revision="main")
+    push_dataset_card_to_hub(repo_id, revision="main", tags=tags)
+    if video:
+        push_videos_to_hub(repo_id, videos_dir, revision="main")
+    create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
+
+
+def create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds):
+    if "image_writer" in dataset:
+        logging.info("Waiting for image writer to terminate...")
+        image_writer = dataset["image_writer"]
+        stop_image_writer(image_writer, timeout=20)
+
+    lerobot_dataset = from_dataset_to_lerobot_dataset(dataset, play_sounds)
+
+    if run_compute_stats:
+        log_say("Computing dataset statistics", play_sounds)
+        lerobot_dataset.stats = compute_stats(lerobot_dataset)
+    else:
+        logging.info("Skipping computation of the dataset statistics")
+        lerobot_dataset.stats = {}
+
+    save_lerobot_dataset_on_disk(lerobot_dataset)
+
+    if push_to_hub:
+        push_lerobot_dataset_to_hub(lerobot_dataset, tags)
+
+    return lerobot_dataset
--- a/lerobot/common/logger.py
+++ b/lerobot/common/logger.py
@@ -189,7 +189,7 @@ class Logger:
            training_state["scheduler"] = scheduler.state_dict()
        torch.save(training_state, save_dir / self.training_state_file_name)

-    def save_checkpont(
+    def save_checkpoint(
        self,
        train_step: int,
        policy: Policy,
--- a/lerobot/common/robot_devices/cameras/intelrealsense.py
+++ b/lerobot/common/robot_devices/cameras/intelrealsense.py
@@ -5,17 +5,17 @@ This file contains utilities for recording frames from Intel Realsense cameras.
 import argparse
 import concurrent.futures
 import logging
+import math
 import shutil
 import threading
 import time
 import traceback
+from collections import Counter
 from dataclasses import dataclass, replace
 from pathlib import Path
 from threading import Thread

-import cv2
 import numpy as np
-import pyrealsense2 as rs
 from PIL import Image

 from lerobot.common.robot_devices.utils import (
@@ -28,57 +28,76 @@ from lerobot.scripts.control_robot import busy_wait
 SERIAL_NUMBER_INDEX = 1


-def find_camera_indices(raise_when_empty=True) -> list[int]:
+def find_cameras(raise_when_empty=True, mock=False) -> list[dict]:
    """
-    Find the serial numbers of the Intel RealSense cameras
+    Find the names and the serial numbers of the Intel RealSense cameras
    connected to the computer.
    """
-    camera_ids = []
+    if mock:
+        import tests.mock_pyrealsense2 as rs
+    else:
+        import pyrealsense2 as rs
+
+    cameras = []
    for device in rs.context().query_devices():
        serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
-        camera_ids.append(serial_number)
+        name = device.get_info(rs.camera_info.name)
+        cameras.append(
+            {
+                "serial_number": serial_number,
+                "name": name,
+            }
+        )

-    if raise_when_empty and len(camera_ids) == 0:
+    if raise_when_empty and len(cameras) == 0:
        raise OSError(
            "Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
        )

-    return camera_ids
+    return cameras


-def save_image(img_array, camera_idx, frame_index, images_dir):
+def save_image(img_array, serial_number, frame_index, images_dir):
    try:
        img = Image.fromarray(img_array)
-        path = images_dir / f"camera_{camera_idx}_frame_{frame_index:06d}.png"
+        path = images_dir / f"camera_{serial_number}_frame_{frame_index:06d}.png"
        path.parent.mkdir(parents=True, exist_ok=True)
        img.save(str(path), quality=100)
        logging.info(f"Saved image: {path}")
    except Exception as e:
-        logging.error(f"Failed to save image for camera {camera_idx} frame {frame_index}: {e}")
+        logging.error(f"Failed to save image for camera {serial_number} frame {frame_index}: {e}")


 def save_images_from_cameras(
    images_dir: Path,
-    camera_ids: list[int] | None = None,
+    serial_numbers: list[int] | None = None,
    fps=None,
    width=None,
    height=None,
    record_time_s=2,
+    mock=False,
 ):
    """
    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
-    associated to a given camera index.
+    associated to a given serial number.
    """
-    if camera_ids is None:
-        camera_ids = find_camera_indices()
+    if serial_numbers is None or len(serial_numbers) == 0:
+        camera_infos = find_cameras(mock=mock)
+        serial_numbers = [cam["serial_number"] for cam in camera_infos]
+
+    if mock:
+        import tests.mock_cv2 as cv2
+    else:
+        import cv2

    print("Connecting cameras")
    cameras = []
-    for cam_idx in camera_ids:
-        camera = IntelRealSenseCamera(cam_idx, fps=fps, width=width, height=height)
+    for cam_sn in serial_numbers:
+        print(f"{cam_sn=}")
+        camera = IntelRealSenseCamera(cam_sn, fps=fps, width=width, height=height, mock=mock)
        camera.connect()
        print(
-            f"IntelRealSenseCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
+            f"IntelRealSenseCamera({camera.serial_number}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})"
        )
        cameras.append(camera)

@@ -93,7 +112,7 @@ def save_images_from_cameras(
    frame_index = 0
    start_time = time.perf_counter()
    try:
-        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
            while True:
                now = time.perf_counter()

@@ -103,12 +122,13 @@ def save_images_from_cameras(
                    image = camera.read() if fps is None else camera.async_read()
                    if image is None:
                        print("No Frame")
+
                    bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                    executor.submit(
                        save_image,
                        bgr_converted_image,
-                        camera.camera_index,
+                        camera.serial_number,
                        frame_index,
                        images_dir,
                    )
@@ -140,6 +160,7 @@ class IntelRealSenseCameraConfig:
    IntelRealSenseCameraConfig(90, 640, 480)
    IntelRealSenseCameraConfig(30, 1280, 720)
    IntelRealSenseCameraConfig(30, 640, 480, use_depth=True)
+    IntelRealSenseCameraConfig(30, 640, 480, rotation=90)
    ```
    """

@@ -149,6 +170,8 @@ class IntelRealSenseCameraConfig:
    color_mode: str = "rgb"
    use_depth: bool = False
    force_hardware_reset: bool = True
+    rotation: int | None = None
+    mock: bool = False

    def __post_init__(self):
        if self.color_mode not in ["rgb", "bgr"]:
@@ -156,19 +179,23 @@ class IntelRealSenseCameraConfig:
                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
            )

-        if (self.fps or self.width or self.height) and not (self.fps and self.width and self.height):
+        at_least_one_is_not_none = self.fps is not None or self.width is not None or self.height is not None
+        at_least_one_is_none = self.fps is None or self.width is None or self.height is None
+        if at_least_one_is_not_none and at_least_one_is_none:
            raise ValueError(
                "For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
                f"but {self.fps=}, {self.width=}, {self.height=} were provided."
            )

+        if self.rotation not in [-90, None, 90, 180]:
+            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
+

 class IntelRealSenseCamera:
    """
    The IntelRealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras:
-    - camera_index corresponds to the serial number of the camera,
-    - camera_index won't randomly change as it can be the case of OpenCVCamera for Linux,
-    - read is more reliable than OpenCVCamera,
+    - is instantiated with the serial number of the camera - won't randomly change as it can be the case of OpenCVCamera for Linux,
+    - can also be instantiated with the camera's name — if it's unique — using IntelRealSenseCamera.init_from_name(),
    - depth map can be returned.

    To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera:
@@ -181,8 +208,10 @@ class IntelRealSenseCamera:

    Example of usage:
    ```python
-    camera_index = 128422271347
-    camera = IntelRealSenseCamera(camera_index)
+    # Instantiate with its serial number
+    camera = IntelRealSenseCamera(128422271347)
+    # Or by its name if it's unique
+    camera = IntelRealSenseCamera.init_from_name("Intel RealSense D405")
    camera.connect()
    color_image = camera.read()
    # when done using the camera, consider disconnecting
@@ -191,19 +220,19 @@ class IntelRealSenseCamera:

    Example of changing default fps, width, height and color_mode:
    ```python
-    camera = IntelRealSenseCamera(camera_index, fps=30, width=1280, height=720)
+    camera = IntelRealSenseCamera(serial_number, fps=30, width=1280, height=720)
    camera = connect()  # applies the settings, might error out if these settings are not compatible with the camera

-    camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480)
+    camera = IntelRealSenseCamera(serial_number, fps=90, width=640, height=480)
    camera = connect()

-    camera = IntelRealSenseCamera(camera_index, fps=90, width=640, height=480, color_mode="bgr")
+    camera = IntelRealSenseCamera(serial_number, fps=90, width=640, height=480, color_mode="bgr")
    camera = connect()
    ```

    Example of returning depth:
    ```python
-    camera = IntelRealSenseCamera(camera_index, use_depth=True)
+    camera = IntelRealSenseCamera(serial_number, use_depth=True)
    camera.connect()
    color_image, depth_map = camera.read()
    ```
@@ -211,7 +240,7 @@ class IntelRealSenseCamera:

    def __init__(
        self,
-        camera_index: int,
+        serial_number: int,
        config: IntelRealSenseCameraConfig | None = None,
        **kwargs,
    ):
@@ -221,13 +250,14 @@ class IntelRealSenseCamera:
        # Overwrite the config arguments using kwargs
        config = replace(config, **kwargs)

-        self.camera_index = camera_index
+        self.serial_number = serial_number
        self.fps = config.fps
        self.width = config.width
        self.height = config.height
        self.color_mode = config.color_mode
        self.use_depth = config.use_depth
        self.force_hardware_reset = config.force_hardware_reset
+        self.mock = config.mock

        self.camera = None
        self.is_connected = False
@@ -237,14 +267,55 @@ class IntelRealSenseCamera:
        self.depth_map = None
        self.logs = {}

+        if self.mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
+        # TODO(alibets): Do we keep original width/height or do we define them after rotation?
+        self.rotation = None
+        if config.rotation == -90:
+            self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
+        elif config.rotation == 90:
+            self.rotation = cv2.ROTATE_90_CLOCKWISE
+        elif config.rotation == 180:
+            self.rotation = cv2.ROTATE_180
+
+    @classmethod
+    def init_from_name(cls, name: str, config: IntelRealSenseCameraConfig | None = None, **kwargs):
+        camera_infos = find_cameras()
+        camera_names = [cam["name"] for cam in camera_infos]
+        this_name_count = Counter(camera_names)[name]
+        if this_name_count > 1:
+            # TODO(aliberts): Test this with multiple identical cameras (Aloha)
+            raise ValueError(
+                f"Multiple {name} cameras have been detected. Please use their serial number to instantiate them."
+            )
+
+        name_to_serial_dict = {cam["name"]: cam["serial_number"] for cam in camera_infos}
+        cam_sn = name_to_serial_dict[name]
+
+        if config is None:
+            config = IntelRealSenseCameraConfig()
+
+        # Overwrite the config arguments using kwargs
+        config = replace(config, **kwargs)
+
+        return cls(serial_number=cam_sn, config=config, **kwargs)
+
    def connect(self):
        if self.is_connected:
            raise RobotDeviceAlreadyConnectedError(
-                f"IntelRealSenseCamera({self.camera_index}) is already connected."
+                f"IntelRealSenseCamera({self.serial_number}) is already connected."
            )

+        if self.mock:
+            import tests.mock_pyrealsense2 as rs
+        else:
+            import pyrealsense2 as rs
+
        config = rs.config()
-        config.enable_device(str(self.camera_index))
+        config.enable_device(str(self.serial_number))

        if self.fps and self.width and self.height:
            # TODO(rcadene): can we set rgb8 directly?
@@ -260,7 +331,7 @@ class IntelRealSenseCamera:

        self.camera = rs.pipeline()
        try:
-            self.camera.start(config)
+            profile = self.camera.start(config)
            is_camera_open = True
        except RuntimeError:
            is_camera_open = False
@@ -269,15 +340,41 @@ class IntelRealSenseCamera:
        # If the camera doesn't work, display the camera indices corresponding to
        # valid cameras.
        if not is_camera_open:
-            # Verify that the provided `camera_index` is valid before printing the traceback
-            available_cam_ids = find_camera_indices()
-            if self.camera_index not in available_cam_ids:
+            # Verify that the provided `serial_number` is valid before printing the traceback
+            camera_infos = find_cameras()
+            serial_numbers = [cam["serial_number"] for cam in camera_infos]
+            if self.serial_number not in serial_numbers:
                raise ValueError(
-                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
-                    "To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
+                    f"`serial_number` is expected to be one of these available cameras {serial_numbers}, but {self.serial_number} is provided instead. "
+                    "To find the serial number you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
                )

-            raise OSError(f"Can't access IntelRealSenseCamera({self.camera_index}).")
+            raise OSError(f"Can't access IntelRealSenseCamera({self.serial_number}).")
+
+        color_stream = profile.get_stream(rs.stream.color)
+        color_profile = color_stream.as_video_stream_profile()
+        actual_fps = color_profile.fps()
+        actual_width = color_profile.width()
+        actual_height = color_profile.height()
+
+        # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
+        if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
+            # Using `OSError` since it's a broad that encompasses issues related to device communication
+            raise OSError(
+                f"Can't set {self.fps=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_fps}."
+            )
+        if self.width is not None and self.width != actual_width:
+            raise OSError(
+                f"Can't set {self.width=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_width}."
+            )
+        if self.height is not None and self.height != actual_height:
+            raise OSError(
+                f"Can't set {self.height=} for IntelRealSenseCamera({self.serial_number}). Actual value is {actual_height}."
+            )
+
+        self.fps = round(actual_fps)
+        self.width = round(actual_width)
+        self.height = round(actual_height)

        self.is_connected = True

@@ -293,9 +390,14 @@ class IntelRealSenseCamera:
        """
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
-                f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
+                f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
            )

+        if self.mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
        start_time = time.perf_counter()

        frame = self.camera.wait_for_frames(timeout_ms=5000)
@@ -303,7 +405,7 @@ class IntelRealSenseCamera:
        color_frame = frame.get_color_frame()

        if not color_frame:
-            raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.camera_index}).")
+            raise OSError(f"Can't capture color image from IntelRealSenseCamera({self.serial_number}).")

        color_image = np.asanyarray(color_frame.get_data())

@@ -323,6 +425,9 @@ class IntelRealSenseCamera:
                f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
            )

+        if self.rotation is not None:
+            color_image = cv2.rotate(color_image, self.rotation)
+
        # log the number of seconds it took to read the image
        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time

@@ -332,7 +437,7 @@ class IntelRealSenseCamera:
        if self.use_depth:
            depth_frame = frame.get_depth_frame()
            if not depth_frame:
-                raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.camera_index}).")
+                raise OSError(f"Can't capture depth image from IntelRealSenseCamera({self.serial_number}).")

            depth_map = np.asanyarray(depth_frame.get_data())

@@ -342,12 +447,15 @@ class IntelRealSenseCamera:
                    f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
                )

+            if self.rotation is not None:
+                depth_map = cv2.rotate(depth_map, self.rotation)
+
            return color_image, depth_map
        else:
            return color_image

    def read_loop(self):
-        while self.stop_event is None or not self.stop_event.is_set():
+        while not self.stop_event.is_set():
            if self.use_depth:
                self.color_image, self.depth_map = self.read()
            else:
@@ -357,7 +465,7 @@ class IntelRealSenseCamera:
        """Access the latest color image"""
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
-                f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
+                f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
            )

        if self.thread is None:
@@ -368,6 +476,7 @@ class IntelRealSenseCamera:

        num_tries = 0
        while self.color_image is None:
+            # TODO(rcadene, aliberts): intelrealsense has diverged compared to opencv over here
            num_tries += 1
            time.sleep(1 / self.fps)
            if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
@@ -383,7 +492,7 @@ class IntelRealSenseCamera:
    def disconnect(self):
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
-                f"IntelRealSenseCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
+                f"IntelRealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
            )

        if self.thread is not None and self.thread.is_alive():
@@ -408,11 +517,11 @@ if __name__ == "__main__":
        description="Save a few frames using `IntelRealSenseCamera` for all cameras connected to the computer, or a selected subset."
    )
    parser.add_argument(
-        "--camera-ids",
+        "--serial-numbers",
        type=int,
        nargs="*",
        default=None,
-        help="List of camera indices used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
+        help="List of serial numbers used to instantiate the `IntelRealSenseCamera`. If not provided, find and use all available camera indices.",
    )
    parser.add_argument(
        "--fps",
--- a/lerobot/common/robot_devices/cameras/opencv.py
+++ b/lerobot/common/robot_devices/cameras/opencv.py
@@ -13,7 +13,6 @@ from dataclasses import dataclass, replace
 from pathlib import Path
 from threading import Thread

-import cv2
 import numpy as np
 from PIL import Image

@@ -24,10 +23,6 @@ from lerobot.common.robot_devices.utils import (
 )
 from lerobot.common.utils.utils import capture_timestamp_utc

-# Use 1 thread to avoid blocking the main thread. Especially useful during data collection
-# when other threads are used to save the images.
-cv2.setNumThreads(1)
-
 # The maximum opencv device index depends on your operating system. For instance,
 # if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
 # on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
@@ -36,20 +31,44 @@ cv2.setNumThreads(1)
 MAX_OPENCV_INDEX = 60


-def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX):
+def find_cameras(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False) -> list[dict]:
+    cameras = []
    if platform.system() == "Linux":
-        # Linux uses camera ports
        print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
-        possible_camera_ids = []
-        for port in Path("/dev").glob("video*"):
-            camera_idx = int(str(port).replace("/dev/video", ""))
-            possible_camera_ids.append(camera_idx)
+        possible_ports = [str(port) for port in Path("/dev").glob("video*")]
+        ports = _find_cameras(possible_ports, mock=mock)
+        for port in ports:
+            cameras.append(
+                {
+                    "port": port,
+                    "index": int(port.removeprefix("/dev/video")),
+                }
+            )
    else:
        print(
            "Mac or Windows detected. Finding available camera indices through "
            f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
        )
-        possible_camera_ids = range(max_index_search_range)
+        possible_indices = range(max_index_search_range)
+        indices = _find_cameras(possible_indices, mock=mock)
+        for index in indices:
+            cameras.append(
+                {
+                    "port": None,
+                    "index": index,
+                }
+            )
+
+    return cameras
+
+
+def _find_cameras(
+    possible_camera_ids: list[int | str], raise_when_empty=False, mock=False
+) -> list[int | str]:
+    if mock:
+        import tests.mock_cv2 as cv2
+    else:
+        import cv2

    camera_ids = []
    for camera_idx in possible_camera_ids:
@@ -70,6 +89,16 @@ def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENC
    return camera_ids


+def is_valid_unix_path(path: str) -> bool:
+    """Note: if 'path' points to a symlink, this will return True only if the target exists"""
+    p = Path(path)
+    return p.is_absolute() and p.exists()
+
+
+def get_camera_index_from_unix_port(port: Path) -> int:
+    return int(str(port.resolve()).removeprefix("/dev/video"))
+
+
 def save_image(img_array, camera_index, frame_index, images_dir):
    img = Image.fromarray(img_array)
    path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
@@ -78,19 +107,26 @@ def save_image(img_array, camera_index, frame_index, images_dir):


 def save_images_from_cameras(
-    images_dir: Path, camera_ids: list[int] | None = None, fps=None, width=None, height=None, record_time_s=2
+    images_dir: Path,
+    camera_ids: list | None = None,
+    fps=None,
+    width=None,
+    height=None,
+    record_time_s=2,
+    mock=False,
 ):
    """
    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
    associated to a given camera index.
    """
-    if camera_ids is None:
-        camera_ids = find_camera_indices()
+    if camera_ids is None or len(camera_ids) == 0:
+        camera_infos = find_cameras(mock=mock)
+        camera_ids = [cam["index"] for cam in camera_infos]

    print("Connecting cameras")
    cameras = []
    for cam_idx in camera_ids:
-        camera = OpenCVCamera(cam_idx, fps=fps, width=width, height=height)
+        camera = OpenCVCamera(cam_idx, fps=fps, width=width, height=height, mock=mock)
        camera.connect()
        print(
            f"OpenCVCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, "
@@ -108,7 +144,7 @@ def save_images_from_cameras(
    print(f"Saving images to {images_dir}")
    frame_index = 0
    start_time = time.perf_counter()
-    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        while True:
            now = time.perf_counter()

@@ -129,11 +165,11 @@ def save_images_from_cameras(
                dt_s = time.perf_counter() - now
                busy_wait(1 / fps - dt_s)

+            print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
+
            if time.perf_counter() - start_time > record_time_s:
                break

-            print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
-
            frame_index += 1

    print(f"Images have been saved to {images_dir}")
@@ -156,6 +192,8 @@ class OpenCVCameraConfig:
    width: int | None = None
    height: int | None = None
    color_mode: str = "rgb"
+    rotation: int | None = None
+    mock: bool = False

    def __post_init__(self):
        if self.color_mode not in ["rgb", "bgr"]:
@@ -163,6 +201,9 @@ class OpenCVCameraConfig:
                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
            )

+        if self.rotation not in [-90, None, 90, 180]:
+            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
+

 class OpenCVCamera:
    """
@@ -203,7 +244,7 @@ class OpenCVCamera:
    ```
    """

-    def __init__(self, camera_index: int, config: OpenCVCameraConfig | None = None, **kwargs):
+    def __init__(self, camera_index: int | str, config: OpenCVCameraConfig | None = None, **kwargs):
        if config is None:
            config = OpenCVCameraConfig()

@@ -211,10 +252,24 @@ class OpenCVCamera:
        config = replace(config, **kwargs)

        self.camera_index = camera_index
+        self.port = None
+
+        # Linux uses ports for connecting to cameras
+        if platform.system() == "Linux":
+            if isinstance(self.camera_index, int):
+                self.port = Path(f"/dev/video{self.camera_index}")
+            elif isinstance(self.camera_index, str) and is_valid_unix_path(self.camera_index):
+                self.port = Path(self.camera_index)
+                # Retrieve the camera index from a potentially symlinked path
+                self.camera_index = get_camera_index_from_unix_port(self.port)
+            else:
+                raise ValueError(f"Please check the provided camera_index: {camera_index}")
+
        self.fps = config.fps
        self.width = config.width
        self.height = config.height
        self.color_mode = config.color_mode
+        self.mock = config.mock

        self.camera = None
        self.is_connected = False
@@ -223,43 +278,60 @@ class OpenCVCamera:
        self.color_image = None
        self.logs = {}

+        if self.mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
+        # TODO(aliberts): Do we keep original width/height or do we define them after rotation?
+        self.rotation = None
+        if config.rotation == -90:
+            self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
+        elif config.rotation == 90:
+            self.rotation = cv2.ROTATE_90_CLOCKWISE
+        elif config.rotation == 180:
+            self.rotation = cv2.ROTATE_180
+
    def connect(self):
        if self.is_connected:
            raise RobotDeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")

+        if self.mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
+            # Use 1 thread to avoid blocking the main thread. Especially useful during data collection
+            # when other threads are used to save the images.
+            cv2.setNumThreads(1)
+
+        camera_idx = f"/dev/video{self.camera_index}" if platform.system() == "Linux" else self.camera_index
        # First create a temporary camera trying to access `camera_index`,
        # and verify it is a valid camera by calling `isOpened`.
-
-        if platform.system() == "Linux":
-            # Linux uses ports for connecting to cameras
-            tmp_camera = cv2.VideoCapture(f"/dev/video{self.camera_index}")
-        else:
-            tmp_camera = cv2.VideoCapture(self.camera_index)
-
+        tmp_camera = cv2.VideoCapture(camera_idx)
        is_camera_open = tmp_camera.isOpened()
        # Release camera to make it accessible for `find_camera_indices`
+        tmp_camera.release()
        del tmp_camera

        # If the camera doesn't work, display the camera indices corresponding to
        # valid cameras.
        if not is_camera_open:
            # Verify that the provided `camera_index` is valid before printing the traceback
-            available_cam_ids = find_camera_indices()
+            cameras_info = find_cameras()
+            available_cam_ids = [cam["index"] for cam in cameras_info]
            if self.camera_index not in available_cam_ids:
                raise ValueError(
                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
                    "To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`."
                )

-            raise OSError(f"Can't access OpenCVCamera({self.camera_index}).")
+            raise OSError(f"Can't access OpenCVCamera({camera_idx}).")

        # Secondly, create the camera that will be used downstream.
        # Note: For some unknown reason, calling `isOpened` blocks the camera which then
        # needs to be re-created.
-        if platform.system() == "Linux":
-            self.camera = cv2.VideoCapture(f"/dev/video{self.camera_index}")
-        else:
-            self.camera = cv2.VideoCapture(self.camera_index)
+        self.camera = cv2.VideoCapture(camera_idx)

        if self.fps is not None:
            self.camera.set(cv2.CAP_PROP_FPS, self.fps)
@@ -272,22 +344,24 @@ class OpenCVCamera:
        actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
        actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)

+        # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
        if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
+            # Using `OSError` since it's a broad that encompasses issues related to device communication
            raise OSError(
                f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
            )
-        if self.width is not None and self.width != actual_width:
+        if self.width is not None and not math.isclose(self.width, actual_width, rel_tol=1e-3):
            raise OSError(
                f"Can't set {self.width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
            )
-        if self.height is not None and self.height != actual_height:
+        if self.height is not None and not math.isclose(self.height, actual_height, rel_tol=1e-3):
            raise OSError(
                f"Can't set {self.height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
            )

-        self.fps = actual_fps
-        self.width = actual_width
-        self.height = actual_height
+        self.fps = round(actual_fps)
+        self.width = round(actual_width)
+        self.height = round(actual_height)

        self.is_connected = True

@@ -306,6 +380,7 @@ class OpenCVCamera:
        start_time = time.perf_counter()

        ret, color_image = self.camera.read()
+
        if not ret:
            raise OSError(f"Can't capture color image from camera {self.camera_index}.")

@@ -320,6 +395,11 @@ class OpenCVCamera:
        # However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
        # so we convert the image color from BGR to RGB.
        if requested_color_mode == "rgb":
+            if self.mock:
+                import tests.mock_cv2 as cv2
+            else:
+                import cv2
+
            color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)

        h, w, _ = color_image.shape
@@ -328,17 +408,25 @@ class OpenCVCamera:
                f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
            )

+        if self.rotation is not None:
+            color_image = cv2.rotate(color_image, self.rotation)
+
        # log the number of seconds it took to read the image
        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time

        # log the utc time at which the image was received
        self.logs["timestamp_utc"] = capture_timestamp_utc()

+        self.color_image = color_image
+
        return color_image

    def read_loop(self):
-        while self.stop_event is None or not self.stop_event.is_set():
-            self.color_image = self.read()
+        while not self.stop_event.is_set():
+            try:
+                self.color_image = self.read()
+            except Exception as e:
+                print(f"Error reading in thread: {e}")

    def async_read(self):
        if not self.is_connected:
@@ -353,15 +441,14 @@ class OpenCVCamera:
            self.thread.start()

        num_tries = 0
-        while self.color_image is None:
-            num_tries += 1
-            time.sleep(1 / self.fps)
-            if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
-                raise Exception(
-                    "The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called."
-                )
+        while True:
+            if self.color_image is not None:
+                return self.color_image

-        return self.color_image
+            time.sleep(1 / self.fps)
+            num_tries += 1
+            if num_tries > self.fps * 2:
+                raise TimeoutError("Timed out waiting for async_read() to start.")

    def disconnect(self):
        if not self.is_connected:
@@ -369,16 +456,14 @@ class OpenCVCamera:
                f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
            )

-        if self.thread is not None and self.thread.is_alive():
-            # wait for the thread to finish
+        if self.thread is not None:
            self.stop_event.set()
-            self.thread.join()
+            self.thread.join()  # wait for the thread to finish
            self.thread = None
            self.stop_event = None

        self.camera.release()
        self.camera = None
-
        self.is_connected = False

    def __del__(self):
@@ -424,7 +509,7 @@ if __name__ == "__main__":
    parser.add_argument(
        "--record-time-s",
        type=float,
-        default=2.0,
+        default=4.0,
        help="Set the number of seconds used to record the frames. By default, 2 seconds.",
    )
    args = parser.parse_args()
--- a/lerobot/common/robot_devices/cameras/utils.py
+++ b/lerobot/common/robot_devices/cameras/utils.py
@@ -1,55 +1,8 @@
-from pathlib import Path
 from typing import Protocol

-import cv2
-import einops
 import numpy as np


-def write_shape_on_image_inplace(image):
-    height, width = image.shape[:2]
-    text = f"Width: {width} Height: {height}"
-
-    # Define the font, scale, color, and thickness
-    font = cv2.FONT_HERSHEY_SIMPLEX
-    font_scale = 1
-    color = (255, 0, 0)  # Blue in BGR
-    thickness = 2
-
-    position = (10, height - 10)  # 10 pixels from the bottom-left corner
-    cv2.putText(image, text, position, font, font_scale, color, thickness)
-
-
-def save_color_image(image, path, write_shape=False):
-    path = Path(path)
-    path.parent.mkdir(parents=True, exist_ok=True)
-    if write_shape:
-        write_shape_on_image_inplace(image)
-    cv2.imwrite(str(path), image)
-
-
-def save_depth_image(depth, path, write_shape=False):
-    path = Path(path)
-    path.parent.mkdir(parents=True, exist_ok=True)
-
-    # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
-    depth_image = cv2.applyColorMap(cv2.convertScaleAbs(depth, alpha=0.03), cv2.COLORMAP_JET)
-
-    if write_shape:
-        write_shape_on_image_inplace(depth_image)
-    cv2.imwrite(str(path), depth_image)
-
-
-def convert_torch_image_to_cv2(tensor, rgb_to_bgr=True):
-    assert tensor.ndim == 3
-    c, h, w = tensor.shape
-    assert c < h and c < w
-    color_image = einops.rearrange(tensor, "c h w -> h w c").numpy()
-    if rgb_to_bgr:
-        color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
-    return color_image
-
-
 # Defines a camera type
 class Camera(Protocol):
    def connect(self): ...
--- a/lerobot/common/robot_devices/control_utils.py
+++ b/lerobot/common/robot_devices/control_utils.py
@@ -0,0 +1,327 @@
+########################################################################################
+# Utilities
+########################################################################################
+
+
+import logging
+import time
+import traceback
+from contextlib import nullcontext
+from copy import copy
+from functools import cache
+
+import cv2
+import torch
+import tqdm
+from termcolor import colored
+
+from lerobot.common.datasets.populate_dataset import add_frame, safe_stop_image_writer
+from lerobot.common.policies.factory import make_policy
+from lerobot.common.robot_devices.robots.utils import Robot
+from lerobot.common.robot_devices.utils import busy_wait
+from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, set_global_seed
+from lerobot.scripts.eval import get_pretrained_policy_path
+
+
+def log_control_info(robot: Robot, dt_s, episode_index=None, frame_index=None, fps=None):
+    log_items = []
+    if episode_index is not None:
+        log_items.append(f"ep:{episode_index}")
+    if frame_index is not None:
+        log_items.append(f"frame:{frame_index}")
+
+    def log_dt(shortname, dt_val_s):
+        nonlocal log_items, fps
+        info_str = f"{shortname}:{dt_val_s * 1000:5.2f} ({1/ dt_val_s:3.1f}hz)"
+        if fps is not None:
+            actual_fps = 1 / dt_val_s
+            if actual_fps < fps - 1:
+                info_str = colored(info_str, "yellow")
+        log_items.append(info_str)
+
+    # total step time displayed in milliseconds and its frequency
+    log_dt("dt", dt_s)
+
+    # TODO(aliberts): move robot-specific logs logic in robot.print_logs()
+    if not robot.robot_type.startswith("stretch"):
+        for name in robot.leader_arms:
+            key = f"read_leader_{name}_pos_dt_s"
+            if key in robot.logs:
+                log_dt("dtRlead", robot.logs[key])
+
+        for name in robot.follower_arms:
+            key = f"write_follower_{name}_goal_pos_dt_s"
+            if key in robot.logs:
+                log_dt("dtWfoll", robot.logs[key])
+
+            key = f"read_follower_{name}_pos_dt_s"
+            if key in robot.logs:
+                log_dt("dtRfoll", robot.logs[key])
+
+        for name in robot.cameras:
+            key = f"read_camera_{name}_dt_s"
+            if key in robot.logs:
+                log_dt(f"dtR{name}", robot.logs[key])
+
+    info_str = " ".join(log_items)
+    logging.info(info_str)
+
+
+@cache
+def is_headless():
+    """Detects if python is running without a monitor."""
+    try:
+        import pynput  # noqa
+
+        return False
+    except Exception:
+        print(
+            "Error trying to import pynput. Switching to headless mode. "
+            "As a result, the video stream from the cameras won't be shown, "
+            "and you won't be able to change the control flow with keyboards. "
+            "For more info, see traceback below.\n"
+        )
+        traceback.print_exc()
+        print()
+        return True
+
+
+def has_method(_object: object, method_name: str):
+    return hasattr(_object, method_name) and callable(getattr(_object, method_name))
+
+
+def predict_action(observation, policy, device, use_amp):
+    observation = copy(observation)
+    with (
+        torch.inference_mode(),
+        torch.autocast(device_type=device.type) if device.type == "cuda" and use_amp else nullcontext(),
+    ):
+        # Convert to pytorch format: channel first and float32 in [0,1] with batch dimension
+        for name in observation:
+            if "image" in name:
+                observation[name] = observation[name].type(torch.float32) / 255
+                observation[name] = observation[name].permute(2, 0, 1).contiguous()
+            observation[name] = observation[name].unsqueeze(0)
+            observation[name] = observation[name].to(device)
+
+        # Compute the next action with the policy
+        # based on the current observation
+        action = policy.select_action(observation)
+
+        # Remove batch dimension
+        action = action.squeeze(0)
+
+        # Move to cpu, if not already the case
+        action = action.to("cpu")
+
+    return action
+
+
+def init_keyboard_listener():
+    # Allow to exit early while recording an episode or resetting the environment,
+    # by tapping the right arrow key '->'. This might require a sudo permission
+    # to allow your terminal to monitor keyboard events.
+    events = {}
+    events["exit_early"] = False
+    events["rerecord_episode"] = False
+    events["stop_recording"] = False
+
+    if is_headless():
+        logging.warning(
+            "Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
+        )
+        listener = None
+        return listener, events
+
+    # Only import pynput if not in a headless environment
+    from pynput import keyboard
+
+    def on_press(key):
+        try:
+            if key == keyboard.Key.right:
+                print("Right arrow key pressed. Exiting loop...")
+                events["exit_early"] = True
+            elif key == keyboard.Key.left:
+                print("Left arrow key pressed. Exiting loop and rerecord the last episode...")
+                events["rerecord_episode"] = True
+                events["exit_early"] = True
+            elif key == keyboard.Key.esc:
+                print("Escape key pressed. Stopping data recording...")
+                events["stop_recording"] = True
+                events["exit_early"] = True
+        except Exception as e:
+            print(f"Error handling key press: {e}")
+
+    listener = keyboard.Listener(on_press=on_press)
+    listener.start()
+
+    return listener, events
+
+
+def init_policy(pretrained_policy_name_or_path, policy_overrides):
+    """Instantiate the policy and load fps, device and use_amp from config yaml"""
+    pretrained_policy_path = get_pretrained_policy_path(pretrained_policy_name_or_path)
+    hydra_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", policy_overrides)
+    policy = make_policy(hydra_cfg=hydra_cfg, pretrained_policy_name_or_path=pretrained_policy_path)
+
+    # Check device is available
+    device = get_safe_torch_device(hydra_cfg.device, log=True)
+    use_amp = hydra_cfg.use_amp
+    policy_fps = hydra_cfg.env.fps
+
+    policy.eval()
+    policy.to(device)
+
+    torch.backends.cudnn.benchmark = True
+    torch.backends.cuda.matmul.allow_tf32 = True
+    set_global_seed(hydra_cfg.seed)
+    return policy, policy_fps, device, use_amp
+
+
+def warmup_record(
+    robot,
+    events,
+    enable_teloperation,
+    warmup_time_s,
+    display_cameras,
+    fps,
+):
+    control_loop(
+        robot=robot,
+        control_time_s=warmup_time_s,
+        display_cameras=display_cameras,
+        events=events,
+        fps=fps,
+        teleoperate=enable_teloperation,
+    )
+
+
+def record_episode(
+    robot,
+    dataset,
+    events,
+    episode_time_s,
+    display_cameras,
+    policy,
+    device,
+    use_amp,
+    fps,
+):
+    control_loop(
+        robot=robot,
+        control_time_s=episode_time_s,
+        display_cameras=display_cameras,
+        dataset=dataset,
+        events=events,
+        policy=policy,
+        device=device,
+        use_amp=use_amp,
+        fps=fps,
+        teleoperate=policy is None,
+    )
+
+
+@safe_stop_image_writer
+def control_loop(
+    robot,
+    control_time_s,
+    teleoperate=False,
+    display_cameras=False,
+    dataset=None,
+    events=None,
+    policy=None,
+    device=None,
+    use_amp=None,
+    fps=None,
+):
+    # TODO(rcadene): Add option to record logs
+    if not robot.is_connected:
+        robot.connect()
+
+    if events is None:
+        events = {"exit_early": False}
+
+    if teleoperate and policy is not None:
+        raise ValueError("When `teleoperate` is True, `policy` should be None.")
+
+    if dataset is not None and fps is not None and dataset["fps"] != fps:
+        raise ValueError(f"The dataset fps should be equal to requested fps ({dataset['fps']} != {fps}).")
+
+    timestamp = 0
+    start_episode_t = time.perf_counter()
+    while timestamp < control_time_s:
+        start_loop_t = time.perf_counter()
+
+        if teleoperate:
+            observation, action = robot.teleop_step(record_data=True)
+        else:
+            observation = robot.capture_observation()
+
+            if policy is not None:
+                pred_action = predict_action(observation, policy, device, use_amp)
+                # Action can eventually be clipped using `max_relative_target`,
+                # so action actually sent is saved in the dataset.
+                action = robot.send_action(pred_action)
+                action = {"action": action}
+
+        if dataset is not None:
+            add_frame(dataset, observation, action)
+
+        if display_cameras and not is_headless():
+            image_keys = [key for key in observation if "image" in key]
+            for key in image_keys:
+                cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
+            cv2.waitKey(1)
+
+        if fps is not None:
+            dt_s = time.perf_counter() - start_loop_t
+            busy_wait(1 / fps - dt_s)
+
+        dt_s = time.perf_counter() - start_loop_t
+        log_control_info(robot, dt_s, fps=fps)
+
+        timestamp = time.perf_counter() - start_episode_t
+        if events["exit_early"]:
+            events["exit_early"] = False
+            break
+
+
+def reset_environment(robot, events, reset_time_s):
+    # TODO(rcadene): refactor warmup_record and reset_environment
+    # TODO(alibets): allow for teleop during reset
+    if has_method(robot, "teleop_safety_stop"):
+        robot.teleop_safety_stop()
+
+    timestamp = 0
+    start_vencod_t = time.perf_counter()
+
+    # Wait if necessary
+    with tqdm.tqdm(total=reset_time_s, desc="Waiting") as pbar:
+        while timestamp < reset_time_s:
+            time.sleep(1)
+            timestamp = time.perf_counter() - start_vencod_t
+            pbar.update(1)
+            if events["exit_early"]:
+                events["exit_early"] = False
+                break
+
+
+def stop_recording(robot, listener, display_cameras):
+    robot.disconnect()
+
+    if not is_headless():
+        if listener is not None:
+            listener.stop()
+
+        if display_cameras:
+            cv2.destroyAllWindows()
+
+
+def sanity_check_dataset_name(repo_id, policy):
+    _, dataset_name = repo_id.split("/")
+    # either repo_id doesnt start with "eval_" and there is no policy
+    # or repo_id starts with "eval_" and there is a policy
+    if dataset_name.startswith("eval_") == (policy is None):
+        raise ValueError(
+            f"Your dataset name begins by 'eval_' ({dataset_name}) but no policy is provided ({policy})."
+        )
--- a/lerobot/common/robot_devices/motors/dynamixel.py
+++ b/lerobot/common/robot_devices/motors/dynamixel.py
@@ -8,17 +8,6 @@ from pathlib import Path

 import numpy as np
 import tqdm
-from dynamixel_sdk import (
-    COMM_SUCCESS,
-    DXL_HIBYTE,
-    DXL_HIWORD,
-    DXL_LOBYTE,
-    DXL_LOWORD,
-    GroupSyncRead,
-    GroupSyncWrite,
-    PacketHandler,
-    PortHandler,
-)

 from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
 from lerobot.common.utils.utils import capture_timestamp_utc
@@ -166,24 +155,29 @@ def convert_degrees_to_steps(degrees: float | np.ndarray, models: str | list[str
    return steps


-def convert_to_bytes(value, bytes):
+def convert_to_bytes(value, bytes, mock=False):
+    if mock:
+        return value
+
+    import dynamixel_sdk as dxl
+
    # Note: No need to convert back into unsigned int, since this byte preprocessing
    # already handles it for us.
    if bytes == 1:
        data = [
-            DXL_LOBYTE(DXL_LOWORD(value)),
+            dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
        ]
    elif bytes == 2:
        data = [
-            DXL_LOBYTE(DXL_LOWORD(value)),
-            DXL_HIBYTE(DXL_LOWORD(value)),
+            dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
+            dxl.DXL_HIBYTE(dxl.DXL_LOWORD(value)),
        ]
    elif bytes == 4:
        data = [
-            DXL_LOBYTE(DXL_LOWORD(value)),
-            DXL_HIBYTE(DXL_LOWORD(value)),
-            DXL_LOBYTE(DXL_HIWORD(value)),
-            DXL_HIBYTE(DXL_HIWORD(value)),
+            dxl.DXL_LOBYTE(dxl.DXL_LOWORD(value)),
+            dxl.DXL_HIBYTE(dxl.DXL_LOWORD(value)),
+            dxl.DXL_LOBYTE(dxl.DXL_HIWORD(value)),
+            dxl.DXL_HIBYTE(dxl.DXL_HIWORD(value)),
        ]
    else:
        raise NotImplementedError(
@@ -333,9 +327,11 @@ class DynamixelMotorsBus:
        motors: dict[str, tuple[int, str]],
        extra_model_control_table: dict[str, list[tuple]] | None = None,
        extra_model_resolution: dict[str, int] | None = None,
+        mock=False,
    ):
        self.port = port
        self.motors = motors
+        self.mock = mock

        self.model_ctrl_table = deepcopy(MODEL_CONTROL_TABLE)
        if extra_model_control_table:
@@ -359,8 +355,13 @@ class DynamixelMotorsBus:
                f"DynamixelMotorsBus({self.port}) is already connected. Do not call `motors_bus.connect()` twice."
            )

-        self.port_handler = PortHandler(self.port)
-        self.packet_handler = PacketHandler(PROTOCOL_VERSION)
+        if self.mock:
+            import tests.mock_dynamixel_sdk as dxl
+        else:
+            import dynamixel_sdk as dxl
+
+        self.port_handler = dxl.PortHandler(self.port)
+        self.packet_handler = dxl.PacketHandler(PROTOCOL_VERSION)

        try:
            if not self.port_handler.openPort():
@@ -392,10 +393,17 @@ class DynamixelMotorsBus:
            self.configure_motors()

    def reconnect(self):
-        self.port_handler = PortHandler(self.port)
-        self.packet_handler = PacketHandler(PROTOCOL_VERSION)
+        if self.mock:
+            import tests.mock_dynamixel_sdk as dxl
+        else:
+            import dynamixel_sdk as dxl
+
+        self.port_handler = dxl.PortHandler(self.port)
+        self.packet_handler = dxl.PacketHandler(PROTOCOL_VERSION)
+
        if not self.port_handler.openPort():
            raise OSError(f"Failed to open port '{self.port}'.")
+
        self.is_connected = True

    def are_motors_configured(self):
@@ -781,6 +789,11 @@ class DynamixelMotorsBus:
        return values

    def _read_with_motor_ids(self, motor_models, motor_ids, data_name):
+        if self.mock:
+            import tests.mock_dynamixel_sdk as dxl
+        else:
+            import dynamixel_sdk as dxl
+
        return_list = True
        if not isinstance(motor_ids, list):
            return_list = False
@@ -788,12 +801,12 @@ class DynamixelMotorsBus:

        assert_same_address(self.model_ctrl_table, self.motor_models, data_name)
        addr, bytes = self.model_ctrl_table[motor_models[0]][data_name]
-        group = GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
+        group = dxl.GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
        for idx in motor_ids:
            group.addParam(idx)

        comm = group.txRxPacket()
-        if comm != COMM_SUCCESS:
+        if comm != dxl.COMM_SUCCESS:
            raise ConnectionError(
                f"Read failed due to communication error on port {self.port_handler.port_name} for indices {motor_ids}: "
                f"{self.packet_handler.getTxRxResult(comm)}"
@@ -817,6 +830,11 @@ class DynamixelMotorsBus:

        start_time = time.perf_counter()

+        if self.mock:
+            import tests.mock_dynamixel_sdk as dxl
+        else:
+            import dynamixel_sdk as dxl
+
        if motor_names is None:
            motor_names = self.motor_names

@@ -836,16 +854,18 @@ class DynamixelMotorsBus:

        if data_name not in self.group_readers:
            # create new group reader
-            self.group_readers[group_key] = GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes)
+            self.group_readers[group_key] = dxl.GroupSyncRead(
+                self.port_handler, self.packet_handler, addr, bytes
+            )
            for idx in motor_ids:
                self.group_readers[group_key].addParam(idx)

        for _ in range(NUM_READ_RETRY):
            comm = self.group_readers[group_key].txRxPacket()
-            if comm == COMM_SUCCESS:
+            if comm == dxl.COMM_SUCCESS:
                break

-        if comm != COMM_SUCCESS:
+        if comm != dxl.COMM_SUCCESS:
            raise ConnectionError(
                f"Read failed due to communication error on port {self.port} for group_key {group_key}: "
                f"{self.packet_handler.getTxRxResult(comm)}"
@@ -876,6 +896,11 @@ class DynamixelMotorsBus:
        return values

    def _write_with_motor_ids(self, motor_models, motor_ids, data_name, values):
+        if self.mock:
+            import tests.mock_dynamixel_sdk as dxl
+        else:
+            import dynamixel_sdk as dxl
+
        if not isinstance(motor_ids, list):
            motor_ids = [motor_ids]
        if not isinstance(values, list):
@@ -883,13 +908,13 @@ class DynamixelMotorsBus:

        assert_same_address(self.model_ctrl_table, motor_models, data_name)
        addr, bytes = self.model_ctrl_table[motor_models[0]][data_name]
-        group = GroupSyncWrite(self.port_handler, self.packet_handler, addr, bytes)
+        group = dxl.GroupSyncWrite(self.port_handler, self.packet_handler, addr, bytes)
        for idx, value in zip(motor_ids, values, strict=True):
-            data = convert_to_bytes(value, bytes)
+            data = convert_to_bytes(value, bytes, self.mock)
            group.addParam(idx, data)

        comm = group.txPacket()
-        if comm != COMM_SUCCESS:
+        if comm != dxl.COMM_SUCCESS:
            raise ConnectionError(
                f"Write failed due to communication error on port {self.port_handler.port_name} for indices {motor_ids}: "
                f"{self.packet_handler.getTxRxResult(comm)}"
@@ -903,6 +928,11 @@ class DynamixelMotorsBus:

        start_time = time.perf_counter()

+        if self.mock:
+            import tests.mock_dynamixel_sdk as dxl
+        else:
+            import dynamixel_sdk as dxl
+
        if motor_names is None:
            motor_names = self.motor_names

@@ -932,19 +962,19 @@ class DynamixelMotorsBus:

        init_group = data_name not in self.group_readers
        if init_group:
-            self.group_writers[group_key] = GroupSyncWrite(
+            self.group_writers[group_key] = dxl.GroupSyncWrite(
                self.port_handler, self.packet_handler, addr, bytes
            )

        for idx, value in zip(motor_ids, values, strict=True):
-            data = convert_to_bytes(value, bytes)
+            data = convert_to_bytes(value, bytes, self.mock)
            if init_group:
                self.group_writers[group_key].addParam(idx, data)
            else:
                self.group_writers[group_key].changeParam(idx, data)

        comm = self.group_writers[group_key].txPacket()
-        if comm != COMM_SUCCESS:
+        if comm != dxl.COMM_SUCCESS:
            raise ConnectionError(
                f"Write failed due to communication error on port {self.port} for group_key {group_key}: "
                f"{self.packet_handler.getTxRxResult(comm)}"
--- a/lerobot/common/robot_devices/robots/factory.py
+++ b/lerobot/common/robot_devices/robots/factory.py
@@ -1,7 +1,9 @@
 import hydra
 from omegaconf import DictConfig

+from lerobot.common.robot_devices.robots.utils import Robot

-def make_robot(cfg: DictConfig):
+
+def make_robot(cfg: DictConfig) -> Robot:
    robot = hydra.utils.instantiate(cfg)
    return robot
--- a/lerobot/common/robot_devices/robots/manipulator.py
+++ b/lerobot/common/robot_devices/robots/manipulator.py
@@ -349,6 +349,25 @@ class ManipulatorRobot:
        self.is_connected = False
        self.logs = {}

+    @property
+    def has_camera(self):
+        return len(self.cameras) > 0
+
+    @property
+    def num_cameras(self):
+        return len(self.cameras)
+
+    @property
+    def available_arms(self):
+        available_arms = []
+        for name in self.follower_arms:
+            arm_id = get_arm_id(name, "follower")
+            available_arms.append(arm_id)
+        for name in self.leader_arms:
+            arm_id = get_arm_id(name, "leader")
+            available_arms.append(arm_id)
+        return available_arms
+
    def connect(self):
        if self.is_connected:
            raise RobotDeviceAlreadyConnectedError(
@@ -364,6 +383,7 @@ class ManipulatorRobot:
        for name in self.follower_arms:
            print(f"Connecting {name} follower arm.")
            self.follower_arms[name].connect()
+        for name in self.leader_arms:
            print(f"Connecting {name} leader arm.")
            self.leader_arms[name].connect()

@@ -681,6 +701,10 @@ class ManipulatorRobot:

        return torch.cat(action_sent)

+    def print_logs(self):
+        pass
+        # TODO(aliberts): move robot-specific logs logic here
+
    def disconnect(self):
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
--- a/lerobot/common/robot_devices/robots/stretch.py
+++ b/lerobot/common/robot_devices/robots/stretch.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+from dataclasses import dataclass, field, replace
+
+import torch
+from stretch_body.gamepad_teleop import GamePadTeleop
+from stretch_body.robot import Robot as StretchAPI
+from stretch_body.robot_params import RobotParams
+
+from lerobot.common.robot_devices.cameras.utils import Camera
+
+
+@dataclass
+class StretchRobotConfig:
+    robot_type: str | None = "stretch"
+    cameras: dict[str, Camera] = field(default_factory=lambda: {})
+    # TODO(aliberts): add feature with max_relative target
+    # TODO(aliberts): add comment on max_relative target
+    max_relative_target: list[float] | float | None = None
+
+
+class StretchRobot(StretchAPI):
+    """Wrapper of stretch_body.robot.Robot"""
+
+    def __init__(self, config: StretchRobotConfig | None = None, **kwargs):
+        super().__init__()
+        if config is None:
+            config = StretchRobotConfig()
+        # Overwrite config arguments using kwargs
+        self.config = replace(config, **kwargs)
+
+        self.robot_type = self.config.robot_type
+        self.cameras = self.config.cameras
+        self.is_connected = False
+        self.teleop = None
+        self.logs = {}
+
+        # TODO(aliberts): test this
+        RobotParams.set_logging_level("WARNING")
+        RobotParams.set_logging_formatter("brief_console_formatter")
+
+        self.state_keys = None
+        self.action_keys = None
+
+    def connect(self) -> None:
+        self.is_connected = self.startup()
+        if not self.is_connected:
+            print("Another process is already using Stretch. Try running 'stretch_free_robot_process.py'")
+            raise ConnectionError()
+
+        for name in self.cameras:
+            self.cameras[name].connect()
+            self.is_connected = self.is_connected and self.cameras[name].is_connected
+
+        if not self.is_connected:
+            print("Could not connect to the cameras, check that all cameras are plugged-in.")
+            raise ConnectionError()
+
+        self.run_calibration()
+
+    def run_calibration(self) -> None:
+        if not self.is_homed():
+            self.home()
+
+    def teleop_step(
+        self, record_data=False
+    ) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
+        # TODO(aliberts): return ndarrays instead of torch.Tensors
+        if not self.is_connected:
+            raise ConnectionError()
+
+        if self.teleop is None:
+            self.teleop = GamePadTeleop(robot_instance=False)
+            self.teleop.startup(robot=self)
+
+        before_read_t = time.perf_counter()
+        state = self.get_state()
+        action = self.teleop.gamepad_controller.get_state()
+        self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
+
+        before_write_t = time.perf_counter()
+        self.teleop.do_motion(robot=self)
+        self.push_command()
+        self.logs["write_pos_dt_s"] = time.perf_counter() - before_write_t
+
+        if self.state_keys is None:
+            self.state_keys = list(state)
+
+        if not record_data:
+            return
+
+        state = torch.as_tensor(list(state.values()))
+        action = torch.as_tensor(list(action.values()))
+
+        # Capture images from cameras
+        images = {}
+        for name in self.cameras:
+            before_camread_t = time.perf_counter()
+            images[name] = self.cameras[name].async_read()
+            images[name] = torch.from_numpy(images[name])
+            self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
+            self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
+
+        # Populate output dictionnaries
+        obs_dict, action_dict = {}, {}
+        obs_dict["observation.state"] = state
+        action_dict["action"] = action
+        for name in self.cameras:
+            obs_dict[f"observation.images.{name}"] = images[name]
+
+        return obs_dict, action_dict
+
+    def get_state(self) -> dict:
+        status = self.get_status()
+        return {
+            "head_pan.pos": status["head"]["head_pan"]["pos"],
+            "head_tilt.pos": status["head"]["head_tilt"]["pos"],
+            "lift.pos": status["lift"]["pos"],
+            "arm.pos": status["arm"]["pos"],
+            "wrist_pitch.pos": status["end_of_arm"]["wrist_pitch"]["pos"],
+            "wrist_roll.pos": status["end_of_arm"]["wrist_roll"]["pos"],
+            "wrist_yaw.pos": status["end_of_arm"]["wrist_yaw"]["pos"],
+            "gripper.pos": status["end_of_arm"]["stretch_gripper"]["pos"],
+            "base_x.vel": status["base"]["x_vel"],
+            "base_y.vel": status["base"]["y_vel"],
+            "base_theta.vel": status["base"]["theta_vel"],
+        }
+
+    def capture_observation(self) -> dict:
+        # TODO(aliberts): return ndarrays instead of torch.Tensors
+        before_read_t = time.perf_counter()
+        state = self.get_state()
+        self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
+
+        if self.state_keys is None:
+            self.state_keys = list(state)
+
+        state = torch.as_tensor(list(state.values()))
+
+        # Capture images from cameras
+        images = {}
+        for name in self.cameras:
+            before_camread_t = time.perf_counter()
+            images[name] = self.cameras[name].async_read()
+            images[name] = torch.from_numpy(images[name])
+            self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
+            self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
+
+        # Populate output dictionnaries
+        obs_dict = {}
+        obs_dict["observation.state"] = state
+        for name in self.cameras:
+            obs_dict[f"observation.images.{name}"] = images[name]
+
+        return obs_dict
+
+    def send_action(self, action: torch.Tensor) -> torch.Tensor:
+        # TODO(aliberts): return ndarrays instead of torch.Tensors
+        if not self.is_connected:
+            raise ConnectionError()
+
+        if self.teleop is None:
+            self.teleop = GamePadTeleop(robot_instance=False)
+            self.teleop.startup(robot=self)
+
+        if self.action_keys is None:
+            dummy_action = self.teleop.gamepad_controller.get_state()
+            self.action_keys = list(dummy_action.keys())
+
+        action_dict = dict(zip(self.action_keys, action.tolist(), strict=True))
+
+        before_write_t = time.perf_counter()
+        self.teleop.do_motion(state=action_dict, robot=self)
+        self.push_command()
+        self.logs["write_pos_dt_s"] = time.perf_counter() - before_write_t
+
+        # TODO(aliberts): return action_sent when motion is limited
+        return action
+
+    def print_logs(self) -> None:
+        pass
+        # TODO(aliberts): move robot-specific logs logic here
+
+    def teleop_safety_stop(self) -> None:
+        if self.teleop is not None:
+            self.teleop._safety_stop(robot=self)
+
+    def disconnect(self) -> None:
+        self.stop()
+        if self.teleop is not None:
+            self.teleop.gamepad_controller.stop()
+            self.teleop.stop()
+
+        if len(self.cameras) > 0:
+            for cam in self.cameras.values():
+                cam.disconnect()
+
+        self.is_connected = False
+
+    def __del__(self):
+        self.disconnect()
--- a/lerobot/common/robot_devices/robots/utils.py
+++ b/lerobot/common/robot_devices/robots/utils.py
@@ -9,8 +9,12 @@ def get_arm_id(name, arm_type):


 class Robot(Protocol):
-    def init_teleop(self): ...
+    # TODO(rcadene, aliberts): Add unit test checking the protocol is implemented in the corresponding classes
+    robot_type: str
+
+    def connect(self): ...
    def run_calibration(self): ...
    def teleop_step(self, record_data=False): ...
    def capture_observation(self): ...
    def send_action(self, action): ...
+    def disconnect(self): ...
--- a/lerobot/common/robot_devices/utils.py
+++ b/lerobot/common/robot_devices/utils.py
@@ -23,6 +23,20 @@ def busy_wait(seconds):
        time.sleep(seconds)


+def safe_disconnect(func):
+    # TODO(aliberts): Allow to pass custom exceptions
+    # (e.g. ThreadServiceExit, KeyboardInterrupt, SystemExit, UnpluggedError, DynamixelCommError)
+    def wrapper(robot, *args, **kwargs):
+        try:
+            return func(robot, *args, **kwargs)
+        except Exception as e:
+            if robot.is_connected:
+                robot.disconnect()
+            raise e
+
+    return wrapper
+
+
 class RobotDeviceNotConnectedError(Exception):
    """Exception raised when the robot device is not connected."""

--- a/lerobot/common/utils/utils.py
+++ b/lerobot/common/utils/utils.py
@@ -16,6 +16,7 @@
 import logging
 import os
 import os.path as osp
+import platform
 import random
 from contextlib import contextmanager
 from datetime import datetime, timezone
@@ -28,6 +29,12 @@ import torch
 from omegaconf import DictConfig


+def none_or_int(value):
+    if value == "None":
+        return None
+    return int(value)
+
+
 def inside_slurm():
    """Check whether the python process was launched through slurm"""
    # TODO(rcadene): return False for interactive mode `--pty bash`
@@ -183,3 +190,30 @@ def print_cuda_memory_usage():

 def capture_timestamp_utc():
    return datetime.now(timezone.utc)
+
+
+def say(text, blocking=False):
+    # Check if mac, linux, or windows.
+    if platform.system() == "Darwin":
+        cmd = f'say "{text}"'
+    elif platform.system() == "Linux":
+        cmd = f'spd-say "{text}"'
+    elif platform.system() == "Windows":
+        cmd = (
+            'PowerShell -Command "Add-Type -AssemblyName System.Speech; '
+            f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')\""
+        )
+
+    if not blocking and platform.system() in ["Darwin", "Linux"]:
+        # TODO(rcadene): Make it work for Windows
+        # Use the ampersand to run command in the background
+        cmd += " &"
+
+    os.system(cmd)
+
+
+def log_say(text, play_sounds, blocking=False):
+    logging.info(text)
+
+    if play_sounds:
+        say(text, blocking)
--- a/lerobot/configs/env/aloha_real.yaml
+++ b/lerobot/configs/env/aloha_real.yaml
@@ -0,0 +1,10 @@
+# @package _global_
+
+fps: 30
+
+env:
+  name: real_world
+  task: null
+  state_dim: 18
+  action_dim: 18
+  fps: ${fps}
--- a/lerobot/configs/policy/act_aloha_real.yaml
+++ b/lerobot/configs/policy/act_aloha_real.yaml
@@ -1,16 +1,22 @@
 # @package _global_

-# Use `act_real.yaml` to train on real-world Aloha/Aloha2 datasets.
-# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, images,
-# cam_low) instead of 1 camera (i.e. top). Also, `training.eval_freq` is set to -1. This config is used
-# to evaluate checkpoints at a certain frequency of training steps. When it is set to -1, it deactivates evaluation.
-# This is because real-world evaluation is done through [dora-lerobot](https://github.com/dora-rs/dora-lerobot).
-# Look at its README for more information on how to evaluate a checkpoint in the real-world.
+# Use `act_aloha_real.yaml` to train on real-world datasets collected on Aloha or Aloha-2 robots.
+# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, cam_high, cam_low) instead of 1 camera (i.e. top).
+# Also, `training.eval_freq` is set to -1. This config is used to evaluate checkpoints at a certain frequency of training steps.
+# When it is set to -1, it deactivates evaluation. This is because real-world evaluation is done through our `control_robot.py` script.
+# Look at the documentation in header of `control_robot.py` for more information on how to collect data , train and evaluate a policy.
 #
-# Example of usage for training:
+# Example of usage for training and inference with `control_robot.py`:
 # ```bash
 # python lerobot/scripts/train.py \
-#   policy=act_real \
+#   policy=act_aloha_real \
+#   env=aloha_real
+# ```
+#
+# Example of usage for training and inference with [Dora-rs](https://github.com/dora-rs/dora-lerobot):
+# ```bash
+# python lerobot/scripts/train.py \
+#   policy=act_aloha_real \
 #   env=dora_aloha_real
 # ```

@@ -36,10 +42,11 @@ override_dataset_stats:
    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)

 training:
-  offline_steps: 100000
+  offline_steps: 80000
  online_steps: 0
  eval_freq: -1
-  save_freq: 20000
+  save_freq: 10000
+  log_freq: 100
  save_checkpoint: true

  batch_size: 8
@@ -62,7 +69,7 @@ policy:

  # Input / output structure.
  n_obs_steps: 1
-  chunk_size: 100 # chunk_size
+  chunk_size: 100
  n_action_steps: 100

  input_shapes:
@@ -107,7 +114,7 @@ policy:
  n_vae_encoder_layers: 4

  # Inference.
-  temporal_ensemble_coeff: null
+  temporal_ensemble_momentum: null

  # Training and loss computation.
  dropout: 0.1
--- a/lerobot/configs/policy/act_real_no_state.yaml
+++ b/lerobot/configs/policy/act_real_no_state.yaml
@@ -1,110 +0,0 @@
-# @package _global_
-
-# Use `act_real_no_state.yaml` to train on real-world Aloha/Aloha2 datasets when cameras are moving (e.g. wrist cameras)
-# Compared to `act_real.yaml`, it is camera only and does not use the state as input which is vector of robot joint positions.
-# We validated experimentaly that not using state reaches better success rate. Our hypothesis is that `act_real.yaml` might
-# overfits to the state, because the images are more complex to learn from since they are moving.
-#
-# Example of usage for training:
-# ```bash
-# python lerobot/scripts/train.py \
-#   policy=act_real_no_state \
-#   env=dora_aloha_real
-# ```
-
-seed: 1000
-dataset_repo_id: lerobot/aloha_static_vinh_cup
-
-override_dataset_stats:
-  observation.images.cam_right_wrist:
-    # stats from imagenet, since we use a pretrained vision model
-    mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
-    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
-  observation.images.cam_left_wrist:
-    # stats from imagenet, since we use a pretrained vision model
-    mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
-    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
-  observation.images.cam_high:
-    # stats from imagenet, since we use a pretrained vision model
-    mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
-    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
-  observation.images.cam_low:
-    # stats from imagenet, since we use a pretrained vision model
-    mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
-    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
-
-training:
-  offline_steps: 100000
-  online_steps: 0
-  eval_freq: -1
-  save_freq: 20000
-  save_checkpoint: true
-
-  batch_size: 8
-  lr: 1e-5
-  lr_backbone: 1e-5
-  weight_decay: 1e-4
-  grad_clip_norm: 10
-  online_steps_between_rollouts: 1
-
-  delta_timestamps:
-    action: "[i / ${fps} for i in range(${policy.chunk_size})]"
-
-eval:
-  n_episodes: 50
-  batch_size: 50
-
-# See `configuration_act.py` for more details.
-policy:
-  name: act
-
-  # Input / output structure.
-  n_obs_steps: 1
-  chunk_size: 100 # chunk_size
-  n_action_steps: 100
-
-  input_shapes:
-    # TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
-    observation.images.cam_right_wrist: [3, 480, 640]
-    observation.images.cam_left_wrist: [3, 480, 640]
-    observation.images.cam_high: [3, 480, 640]
-    observation.images.cam_low: [3, 480, 640]
-  output_shapes:
-    action: ["${env.action_dim}"]
-
-  # Normalization / Unnormalization
-  input_normalization_modes:
-    observation.images.cam_right_wrist: mean_std
-    observation.images.cam_left_wrist: mean_std
-    observation.images.cam_high: mean_std
-    observation.images.cam_low: mean_std
-  output_normalization_modes:
-    action: mean_std
-
-  # Architecture.
-  # Vision backbone.
-  vision_backbone: resnet18
-  pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
-  replace_final_stride_with_dilation: false
-  # Transformer layers.
-  pre_norm: false
-  dim_model: 512
-  n_heads: 8
-  dim_feedforward: 3200
-  feedforward_activation: relu
-  n_encoder_layers: 4
-  # Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code
-  # that means only the first layer is used. Here we match the original implementation by setting this to 1.
-  # See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521.
-  n_decoder_layers: 1
-  # VAE.
-  use_vae: true
-  latent_dim: 32
-  n_vae_encoder_layers: 4
-
-  # Inference.
-  temporal_ensemble_coeff: null
-
-  # Training and loss computation.
-  dropout: 0.1
-  kl_weight: 10.0
--- a/lerobot/configs/robot/stretch.yaml
+++ b/lerobot/configs/robot/stretch.yaml
@@ -0,0 +1,24 @@
+_target_: lerobot.common.robot_devices.robots.stretch.StretchRobot
+robot_type: stretch3
+
+cameras:
+  navigation:
+    _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera
+    camera_index: /dev/hello-nav-head-camera
+    fps: 10
+    width: 1280
+    height: 720
+    rotation: -90
+  head:
+    _target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera.init_from_name
+    name: Intel RealSense D435I
+    fps: 30
+    width: 640
+    height: 480
+    rotation: 90
+  wrist:
+    _target_: lerobot.common.robot_devices.cameras.intelrealsense.IntelRealSenseCamera.init_from_name
+    name: Intel RealSense D405
+    fps: 30
+    width: 640
+    height: 480
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -99,161 +99,53 @@ python lerobot/scripts/control_robot.py record \
 """

 import argparse
-import concurrent.futures
-import json
 import logging
-import os
-import platform
-import shutil
 import time
-import traceback
-from contextlib import nullcontext
-from functools import cache
 from pathlib import Path
-
-import cv2
-import torch
-import tqdm
-from omegaconf import DictConfig
-from PIL import Image
-from termcolor import colored
+from typing import List

 # from safetensors.torch import load_file, save_file
-from lerobot.common.datasets.compute_stats import compute_stats
-from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
-from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import to_hf_dataset
-from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, get_default_encoding
-from lerobot.common.datasets.utils import calculate_episode_data_index, create_branch
-from lerobot.common.datasets.video_utils import encode_video_frames
-from lerobot.common.policies.factory import make_policy
-from lerobot.common.robot_devices.robots.factory import make_robot
-from lerobot.common.robot_devices.robots.utils import Robot, get_arm_id
-from lerobot.common.robot_devices.utils import busy_wait
-from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed
-from lerobot.scripts.eval import get_pretrained_policy_path
-from lerobot.scripts.push_dataset_to_hub import (
-    push_dataset_card_to_hub,
-    push_meta_data_to_hub,
-    push_videos_to_hub,
-    save_meta_data,
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.populate_dataset import (
+    create_lerobot_dataset,
+    delete_current_episode,
+    init_dataset,
+    save_current_episode,
 )
-
-########################################################################################
-# Utilities
-########################################################################################
-
-
-def say(text, blocking=False):
-    # Check if mac, linux, or windows.
-    if platform.system() == "Darwin":
-        cmd = f'say "{text}"'
-    elif platform.system() == "Linux":
-        cmd = f'spd-say "{text}"'
-    elif platform.system() == "Windows":
-        cmd = (
-            'PowerShell -Command "Add-Type -AssemblyName System.Speech; '
-            f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')\""
-        )
-
-    if not blocking and platform.system() in ["Darwin", "Linux"]:
-        # TODO(rcadene): Make it work for Windows
-        # Use the ampersand to run command in the background
-        cmd += " &"
-
-    os.system(cmd)
-
-
-def save_image(img_tensor, key, frame_index, episode_index, videos_dir):
-    img = Image.fromarray(img_tensor.numpy())
-    path = videos_dir / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
-    path.parent.mkdir(parents=True, exist_ok=True)
-    img.save(str(path), quality=100)
-
-
-def none_or_int(value):
-    if value == "None":
-        return None
-    return int(value)
-
-
-def log_control_info(robot, dt_s, episode_index=None, frame_index=None, fps=None):
-    log_items = []
-    if episode_index is not None:
-        log_items.append(f"ep:{episode_index}")
-    if frame_index is not None:
-        log_items.append(f"frame:{frame_index}")
-
-    def log_dt(shortname, dt_val_s):
-        nonlocal log_items, fps
-        info_str = f"{shortname}:{dt_val_s * 1000:5.2f} ({1/ dt_val_s:3.1f}hz)"
-        if fps is not None:
-            actual_fps = 1 / dt_val_s
-            if actual_fps < fps - 1:
-                info_str = colored(info_str, "yellow")
-        log_items.append(info_str)
-
-    # total step time displayed in milliseconds and its frequency
-    log_dt("dt", dt_s)
-
-    for name in robot.leader_arms:
-        key = f"read_leader_{name}_pos_dt_s"
-        if key in robot.logs:
-            log_dt("dtRlead", robot.logs[key])
-
-    for name in robot.follower_arms:
-        key = f"write_follower_{name}_goal_pos_dt_s"
-        if key in robot.logs:
-            log_dt("dtWfoll", robot.logs[key])
-
-        key = f"read_follower_{name}_pos_dt_s"
-        if key in robot.logs:
-            log_dt("dtRfoll", robot.logs[key])
-
-    for name in robot.cameras:
-        key = f"read_camera_{name}_dt_s"
-        if key in robot.logs:
-            log_dt(f"dtR{name}", robot.logs[key])
-
-    info_str = " ".join(log_items)
-    logging.info(info_str)
-
-
-@cache
-def is_headless():
-    """Detects if python is running without a monitor."""
-    try:
-        import pynput  # noqa
-
-        return False
-    except Exception:
-        print(
-            "Error trying to import pynput. Switching to headless mode. "
-            "As a result, the video stream from the cameras won't be shown, "
-            "and you won't be able to change the control flow with keyboards. "
-            "For more info, see traceback below.\n"
-        )
-        traceback.print_exc()
-        print()
-        return True
-
+from lerobot.common.robot_devices.control_utils import (
+    control_loop,
+    has_method,
+    init_keyboard_listener,
+    init_policy,
+    log_control_info,
+    record_episode,
+    reset_environment,
+    sanity_check_dataset_name,
+    stop_recording,
+    warmup_record,
+)
+from lerobot.common.robot_devices.robots.factory import make_robot
+from lerobot.common.robot_devices.robots.utils import Robot
+from lerobot.common.robot_devices.utils import busy_wait, safe_disconnect
+from lerobot.common.utils.utils import init_hydra_config, init_logging, log_say, none_or_int

 ########################################################################################
 # Control modes
 ########################################################################################


+@safe_disconnect
 def calibrate(robot: Robot, arms: list[str] | None):
-    available_arms = []
-    for name in robot.follower_arms:
-        arm_id = get_arm_id(name, "follower")
-        available_arms.append(arm_id)
-    for name in robot.leader_arms:
-        arm_id = get_arm_id(name, "leader")
-        available_arms.append(arm_id)
+    # TODO(aliberts): move this code in robots' classes
+    if robot.robot_type.startswith("stretch"):
+        if not robot.is_connected:
+            robot.connect()
+        if not robot.is_homed():
+            robot.home()
+        return

-    unknown_arms = [arm_id for arm_id in arms if arm_id not in available_arms]
-
-    available_arms_str = " ".join(available_arms)
+    unknown_arms = [arm_id for arm_id in arms if arm_id not in robot.available_arms]
+    available_arms_str = " ".join(robot.available_arms)
    unknown_arms_str = " ".join(unknown_arms)

    if arms is None or len(arms) == 0:
@@ -285,34 +177,27 @@ def calibrate(robot: Robot, arms: list[str] | None):
    print("Calibration is done! You can now teleoperate and record datasets!")


-def teleoperate(robot: Robot, fps: int | None = None, teleop_time_s: float | None = None):
-    # TODO(rcadene): Add option to record logs
-    if not robot.is_connected:
-        robot.connect()
-
-    start_teleop_t = time.perf_counter()
-    while True:
-        start_loop_t = time.perf_counter()
-        robot.teleop_step()
-
-        if fps is not None:
-            dt_s = time.perf_counter() - start_loop_t
-            busy_wait(1 / fps - dt_s)
-
-        dt_s = time.perf_counter() - start_loop_t
-        log_control_info(robot, dt_s, fps=fps)
-
-        if teleop_time_s is not None and time.perf_counter() - start_teleop_t > teleop_time_s:
-            break
+@safe_disconnect
+def teleoperate(
+    robot: Robot, fps: int | None = None, teleop_time_s: float | None = None, display_cameras: bool = False
+):
+    control_loop(
+        robot,
+        control_time_s=teleop_time_s,
+        fps=fps,
+        teleoperate=True,
+        display_cameras=display_cameras,
+    )


+@safe_disconnect
 def record(
    robot: Robot,
-    policy: torch.nn.Module | None = None,
-    hydra_cfg: DictConfig | None = None,
+    root: str,
+    repo_id: str,
+    pretrained_policy_name_or_path: str | None = None,
+    policy_overrides: List[str] | None = None,
    fps: int | None = None,
-    root="data",
-    repo_id="lerobot/debug",
    warmup_time_s=2,
    episode_time_s=10,
    reset_time_s=5,
@@ -321,375 +206,115 @@ def record(
    run_compute_stats=True,
    push_to_hub=True,
    tags=None,
-    num_image_writers_per_camera=4,
+    num_image_writer_processes=0,
+    num_image_writer_threads_per_camera=4,
    force_override=False,
+    display_cameras=True,
+    play_sounds=True,
 ):
    # TODO(rcadene): Add option to record logs
-    # TODO(rcadene): Clean this function via decomposition in higher level functions
+    listener = None
+    events = None
+    policy = None
+    device = None
+    use_amp = None

-    _, dataset_name = repo_id.split("/")
-    if dataset_name.startswith("eval_") and policy is None:
-        raise ValueError(
-            f"Your dataset name begins by 'eval_' ({dataset_name}) but no policy is provided ({policy})."
-        )
+    # Load pretrained policy
+    if pretrained_policy_name_or_path is not None:
+        policy, policy_fps, device, use_amp = init_policy(pretrained_policy_name_or_path, policy_overrides)

-    if not video:
-        raise NotImplementedError()
+        if fps is None:
+            fps = policy_fps
+            logging.warning(f"No fps provided, so using the fps from policy config ({policy_fps}).")
+        elif fps != policy_fps:
+            logging.warning(
+                f"There is a mismatch between the provided fps ({fps}) and the one from policy config ({policy_fps})."
+            )
+
+    # Create empty dataset or load existing saved episodes
+    sanity_check_dataset_name(repo_id, policy)
+    dataset = init_dataset(
+        repo_id,
+        root,
+        force_override,
+        fps,
+        video,
+        write_images=robot.has_camera,
+        num_image_writer_processes=num_image_writer_processes,
+        num_image_writer_threads=num_image_writer_threads_per_camera * robot.num_cameras,
+    )

    if not robot.is_connected:
        robot.connect()

-    local_dir = Path(root) / repo_id
-    if local_dir.exists() and force_override:
-        shutil.rmtree(local_dir)
+    listener, events = init_keyboard_listener()

-    episodes_dir = local_dir / "episodes"
-    episodes_dir.mkdir(parents=True, exist_ok=True)
+    # Execute a few seconds without recording to:
+    # 1. teleoperate the robot to move it in starting position if no policy provided,
+    # 2. give times to the robot devices to connect and start synchronizing,
+    # 3. place the cameras windows on screen
+    enable_teleoperation = policy is None
+    log_say("Warmup record", play_sounds)
+    warmup_record(robot, events, enable_teleoperation, warmup_time_s, display_cameras, fps)

-    videos_dir = local_dir / "videos"
-    videos_dir.mkdir(parents=True, exist_ok=True)
+    if has_method(robot, "teleop_safety_stop"):
+        robot.teleop_safety_stop()

-    # Logic to resume data recording
-    rec_info_path = episodes_dir / "data_recording_info.json"
-    if rec_info_path.exists():
-        with open(rec_info_path) as f:
-            rec_info = json.load(f)
-        episode_index = rec_info["last_episode_index"] + 1
-    else:
-        episode_index = 0
+    while True:
+        if dataset["num_episodes"] >= num_episodes:
+            break

-    if is_headless():
-        logging.info(
-            "Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
+        episode_index = dataset["num_episodes"]
+        log_say(f"Recording episode {episode_index}", play_sounds)
+        record_episode(
+            dataset=dataset,
+            robot=robot,
+            events=events,
+            episode_time_s=episode_time_s,
+            display_cameras=display_cameras,
+            policy=policy,
+            device=device,
+            use_amp=use_amp,
+            fps=fps,
        )

-    # Allow to exit early while recording an episode or resetting the environment,
-    # by tapping the right arrow key '->'. This might require a sudo permission
-    # to allow your terminal to monitor keyboard events.
-    exit_early = False
-    rerecord_episode = False
-    stop_recording = False
+        # Execute a few seconds without recording to give time to manually reset the environment
+        # Current code logic doesn't allow to teleoperate during this time.
+        # TODO(rcadene): add an option to enable teleoperation during reset
+        # Skip reset for the last episode to be recorded
+        if not events["stop_recording"] and (
+            (episode_index < num_episodes - 1) or events["rerecord_episode"]
+        ):
+            log_say("Reset the environment", play_sounds)
+            reset_environment(robot, events, reset_time_s)

-    # Only import pynput if not in a headless environment
-    if not is_headless():
-        from pynput import keyboard
+        if events["rerecord_episode"]:
+            log_say("Re-record episode", play_sounds)
+            events["rerecord_episode"] = False
+            events["exit_early"] = False
+            delete_current_episode(dataset)
+            continue

-        def on_press(key):
-            nonlocal exit_early, rerecord_episode, stop_recording
-            try:
-                if key == keyboard.Key.right:
-                    print("Right arrow key pressed. Exiting loop...")
-                    exit_early = True
-                elif key == keyboard.Key.left:
-                    print("Left arrow key pressed. Exiting loop and rerecord the last episode...")
-                    rerecord_episode = True
-                    exit_early = True
-                elif key == keyboard.Key.esc:
-                    print("Escape key pressed. Stopping data recording...")
-                    stop_recording = True
-                    exit_early = True
-            except Exception as e:
-                print(f"Error handling key press: {e}")
+        # Increment by one dataset["current_episode_index"]
+        save_current_episode(dataset)

-        listener = keyboard.Listener(on_press=on_press)
-        listener.start()
+        if events["stop_recording"]:
+            break

-    # Load policy if any
-    if policy is not None:
-        # Check device is available
-        device = get_safe_torch_device(hydra_cfg.device, log=True)
+    log_say("Stop recording", play_sounds, blocking=True)
+    stop_recording(robot, listener, display_cameras)

-        policy.eval()
-        policy.to(device)
+    lerobot_dataset = create_lerobot_dataset(dataset, run_compute_stats, push_to_hub, tags, play_sounds)

-        torch.backends.cudnn.benchmark = True
-        torch.backends.cuda.matmul.allow_tf32 = True
-        set_global_seed(hydra_cfg.seed)
-
-        # override fps using policy fps
-        fps = hydra_cfg.env.fps
-
-    # Execute a few seconds without recording data, to give times
-    # to the robot devices to connect and start synchronizing.
-    timestamp = 0
-    start_warmup_t = time.perf_counter()
-    is_warmup_print = False
-    while timestamp < warmup_time_s:
-        if not is_warmup_print:
-            logging.info("Warming up (no data recording)")
-            say("Warming up")
-            is_warmup_print = True
-
-        start_loop_t = time.perf_counter()
-
-        if policy is None:
-            observation, action = robot.teleop_step(record_data=True)
-        else:
-            observation = robot.capture_observation()
-
-        if not is_headless():
-            image_keys = [key for key in observation if "image" in key]
-            for key in image_keys:
-                cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
-            cv2.waitKey(1)
-
-        dt_s = time.perf_counter() - start_loop_t
-        busy_wait(1 / fps - dt_s)
-
-        dt_s = time.perf_counter() - start_loop_t
-        log_control_info(robot, dt_s, fps=fps)
-
-        timestamp = time.perf_counter() - start_warmup_t
-
-    # Save images using threads to reach high fps (30 and more)
-    # Using `with` to exist smoothly if an execption is raised.
-    futures = []
-    num_image_writers = num_image_writers_per_camera * len(robot.cameras)
-    with concurrent.futures.ThreadPoolExecutor(max_workers=num_image_writers) as executor:
-        # Start recording all episodes
-        while episode_index < num_episodes:
-            logging.info(f"Recording episode {episode_index}")
-            say(f"Recording episode {episode_index}")
-            ep_dict = {}
-            frame_index = 0
-            timestamp = 0
-            start_episode_t = time.perf_counter()
-            while timestamp < episode_time_s:
-                start_loop_t = time.perf_counter()
-
-                if policy is None:
-                    observation, action = robot.teleop_step(record_data=True)
-                else:
-                    observation = robot.capture_observation()
-
-                image_keys = [key for key in observation if "image" in key]
-                not_image_keys = [key for key in observation if "image" not in key]
-
-                for key in image_keys:
-                    futures += [
-                        executor.submit(
-                            save_image, observation[key], key, frame_index, episode_index, videos_dir
-                        )
-                    ]
-
-                if not is_headless():
-                    image_keys = [key for key in observation if "image" in key]
-                    for key in image_keys:
-                        cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR))
-                    cv2.waitKey(1)
-
-                for key in not_image_keys:
-                    if key not in ep_dict:
-                        ep_dict[key] = []
-                    ep_dict[key].append(observation[key])
-
-                if policy is not None:
-                    with (
-                        torch.inference_mode(),
-                        torch.autocast(device_type=device.type)
-                        if device.type == "cuda" and hydra_cfg.use_amp
-                        else nullcontext(),
-                    ):
-                        # Convert to pytorch format: channel first and float32 in [0,1] with batch dimension
-                        for name in observation:
-                            if "image" in name:
-                                observation[name] = observation[name].type(torch.float32) / 255
-                                observation[name] = observation[name].permute(2, 0, 1).contiguous()
-                            observation[name] = observation[name].unsqueeze(0)
-                            observation[name] = observation[name].to(device)
-
-                        # Compute the next action with the policy
-                        # based on the current observation
-                        action = policy.select_action(observation)
-
-                        # Remove batch dimension
-                        action = action.squeeze(0)
-
-                        # Move to cpu, if not already the case
-                        action = action.to("cpu")
-
-                    # Order the robot to move
-                    action_sent = robot.send_action(action)
-
-                    # Action can eventually be clipped using `max_relative_target`,
-                    # so action actually sent is saved in the dataset.
-                    action = {"action": action_sent}
-
-                for key in action:
-                    if key not in ep_dict:
-                        ep_dict[key] = []
-                    ep_dict[key].append(action[key])
-
-                frame_index += 1
-
-                dt_s = time.perf_counter() - start_loop_t
-                busy_wait(1 / fps - dt_s)
-
-                dt_s = time.perf_counter() - start_loop_t
-                log_control_info(robot, dt_s, fps=fps)
-
-                timestamp = time.perf_counter() - start_episode_t
-                if exit_early:
-                    exit_early = False
-                    break
-
-            if not stop_recording:
-                # Start resetting env while the executor are finishing
-                logging.info("Reset the environment")
-                say("Reset the environment")
-
-            timestamp = 0
-            start_vencod_t = time.perf_counter()
-
-            # During env reset we save the data and encode the videos
-            num_frames = frame_index
-
-            for key in image_keys:
-                tmp_imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
-                fname = f"{key}_episode_{episode_index:06d}.mp4"
-                video_path = local_dir / "videos" / fname
-                if video_path.exists():
-                    video_path.unlink()
-                # Store the reference to the video frame, even tho the videos are not yet encoded
-                ep_dict[key] = []
-                for i in range(num_frames):
-                    ep_dict[key].append({"path": f"videos/{fname}", "timestamp": i / fps})
-
-            for key in not_image_keys:
-                ep_dict[key] = torch.stack(ep_dict[key])
-
-            for key in action:
-                ep_dict[key] = torch.stack(ep_dict[key])
-
-            ep_dict["episode_index"] = torch.tensor([episode_index] * num_frames)
-            ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
-            ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
-
-            done = torch.zeros(num_frames, dtype=torch.bool)
-            done[-1] = True
-            ep_dict["next.done"] = done
-
-            ep_path = episodes_dir / f"episode_{episode_index}.pth"
-            print("Saving episode dictionary...")
-            torch.save(ep_dict, ep_path)
-
-            rec_info = {
-                "last_episode_index": episode_index,
-            }
-            with open(rec_info_path, "w") as f:
-                json.dump(rec_info, f)
-
-            is_last_episode = stop_recording or (episode_index == (num_episodes - 1))
-
-            # Wait if necessary
-            with tqdm.tqdm(total=reset_time_s, desc="Waiting") as pbar:
-                while timestamp < reset_time_s and not is_last_episode:
-                    time.sleep(1)
-                    timestamp = time.perf_counter() - start_vencod_t
-                    pbar.update(1)
-                    if exit_early:
-                        exit_early = False
-                        break
-
-            # Skip updating episode index which forces re-recording episode
-            if rerecord_episode:
-                rerecord_episode = False
-                continue
-
-            episode_index += 1
-
-            if is_last_episode:
-                logging.info("Done recording")
-                say("Done recording", blocking=True)
-                if not is_headless():
-                    listener.stop()
-
-                logging.info("Waiting for threads writing the images on disk to terminate...")
-                for _ in tqdm.tqdm(
-                    concurrent.futures.as_completed(futures), total=len(futures), desc="Writting images"
-                ):
-                    pass
-                break
-
-    robot.disconnect()
-    if not is_headless():
-        cv2.destroyAllWindows()
-
-    num_episodes = episode_index
-
-    logging.info("Encoding videos")
-    say("Encoding videos")
-    # Use ffmpeg to convert frames stored as png into mp4 videos
-    for episode_index in tqdm.tqdm(range(num_episodes)):
-        for key in image_keys:
-            tmp_imgs_dir = videos_dir / f"{key}_episode_{episode_index:06d}"
-            fname = f"{key}_episode_{episode_index:06d}.mp4"
-            video_path = local_dir / "videos" / fname
-            if video_path.exists():
-                # Skip if video is already encoded. Could be the case when resuming data recording.
-                continue
-            # note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
-            # since video encoding with ffmpeg is already using multithreading.
-            encode_video_frames(tmp_imgs_dir, video_path, fps, overwrite=True)
-            shutil.rmtree(tmp_imgs_dir)
-
-    logging.info("Concatenating episodes")
-    ep_dicts = []
-    for episode_index in tqdm.tqdm(range(num_episodes)):
-        ep_path = episodes_dir / f"episode_{episode_index}.pth"
-        ep_dict = torch.load(ep_path)
-        ep_dicts.append(ep_dict)
-    data_dict = concatenate_episodes(ep_dicts)
-
-    total_frames = data_dict["frame_index"].shape[0]
-    data_dict["index"] = torch.arange(0, total_frames, 1)
-
-    hf_dataset = to_hf_dataset(data_dict, video)
-    episode_data_index = calculate_episode_data_index(hf_dataset)
-    info = {
-        "codebase_version": CODEBASE_VERSION,
-        "fps": fps,
-        "video": video,
-    }
-    if video:
-        info["encoding"] = get_default_encoding()
-
-    lerobot_dataset = LeRobotDataset.from_preloaded(
-        repo_id=repo_id,
-        hf_dataset=hf_dataset,
-        episode_data_index=episode_data_index,
-        info=info,
-        videos_dir=videos_dir,
-    )
-    if run_compute_stats:
-        logging.info("Computing dataset statistics")
-        say("Computing dataset statistics")
-        stats = compute_stats(lerobot_dataset)
-        lerobot_dataset.stats = stats
-    else:
-        stats = {}
-        logging.info("Skipping computation of the dataset statistics")
-
-    hf_dataset = hf_dataset.with_format(None)  # to remove transforms that cant be saved
-    hf_dataset.save_to_disk(str(local_dir / "train"))
-
-    meta_data_dir = local_dir / "meta_data"
-    save_meta_data(info, stats, episode_data_index, meta_data_dir)
-
-    if push_to_hub:
-        hf_dataset.push_to_hub(repo_id, revision="main")
-        push_meta_data_to_hub(repo_id, meta_data_dir, revision="main")
-        push_dataset_card_to_hub(repo_id, revision="main", tags=tags)
-        if video:
-            push_videos_to_hub(repo_id, videos_dir, revision="main")
-        create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION)
-
-    logging.info("Exiting")
-    say("Exiting")
+    log_say("Exiting", play_sounds)
    return lerobot_dataset


-def replay(robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug"):
+@safe_disconnect
+def replay(
+    robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug", play_sounds=True
+):
+    # TODO(rcadene, aliberts): refactor with control_loop, once `dataset` is an instance of LeRobotDataset
    # TODO(rcadene): Add option to record logs
    local_dir = Path(root) / repo_id
    if not local_dir.exists():
@@ -703,8 +328,7 @@ def replay(robot: Robot, episode: int, fps: int | None = None, root="data", repo
    if not robot.is_connected:
        robot.connect()

-    logging.info("Replaying episode")
-    say("Replaying episode", blocking=True)
+    log_say("Replaying episode", play_sounds, blocking=True)
    for idx in range(from_idx, to_idx):
        start_episode_t = time.perf_counter()

@@ -749,6 +373,12 @@ if __name__ == "__main__":
    parser_teleop.add_argument(
        "--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)"
    )
+    parser_teleop.add_argument(
+        "--display-cameras",
+        type=int,
+        default=1,
+        help="Display all cameras on screen (set to 1 to display or 0).",
+    )

    parser_record = subparsers.add_parser("record", parents=[base_parser])
    parser_record.add_argument(
@@ -804,12 +434,23 @@ if __name__ == "__main__":
        help="Add tags to your dataset on the hub.",
    )
    parser_record.add_argument(
-        "--num-image-writers-per-camera",
+        "--num-image-writer-processes",
+        type=int,
+        default=0,
+        help=(
+            "Number of subprocesses handling the saving of frames as PNGs. Set to 0 to use threads only; "
+            "set to ≥1 to use subprocesses, each using threads to write images. The best number of processes "
+            "and threads depends on your system. We recommend 4 threads per camera with 0 processes. "
+            "If fps is unstable, adjust the thread count. If still unstable, try using 1 or more subprocesses."
+        ),
+    )
+    parser_record.add_argument(
+        "--num-image-writer-threads-per-camera",
        type=int,
        default=4,
        help=(
            "Number of threads writing the frames as png images on disk, per camera. "
-            "Too much threads might cause unstable teleoperation fps due to main thread being blocked. "
+            "Too many threads might cause unstable teleoperation fps due to main thread being blocked. "
            "Not enough threads might cause low camera fps."
        ),
    )
@@ -875,19 +516,7 @@ if __name__ == "__main__":
        teleoperate(robot, **kwargs)

    elif control_mode == "record":
-        pretrained_policy_name_or_path = args.pretrained_policy_name_or_path
-        policy_overrides = args.policy_overrides
-        del kwargs["pretrained_policy_name_or_path"]
-        del kwargs["policy_overrides"]
-
-        policy_cfg = None
-        if pretrained_policy_name_or_path is not None:
-            pretrained_policy_path = get_pretrained_policy_path(pretrained_policy_name_or_path)
-            policy_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", policy_overrides)
-            policy = make_policy(hydra_cfg=policy_cfg, pretrained_policy_name_or_path=pretrained_policy_path)
-            record(robot, policy, policy_cfg, **kwargs)
-        else:
-            record(robot, **kwargs)
+        record(robot, **kwargs)

    elif control_mode == "replay":
        replay(robot, **kwargs)
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -383,7 +383,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
            logging.info(f"Checkpoint policy after step {step}")
            # Note: Save with step as the identifier, and format it to have at least 6 digits but more if
            # needed (choose 6 as a minimum for consistency without being overkill).
-            logger.save_checkpont(
+            logger.save_checkpoint(
                step,
                policy,
                optimizer,
--- a/lerobot/templates/visualize_dataset_template.html
+++ b/lerobot/templates/visualize_dataset_template.html
@@ -250,7 +250,7 @@
                    if(!canPlayVideos){
                        this.videoCodecError = true;
                    }
-                    
+
                    // process CSV data
                    this.videos = document.querySelectorAll('video');
                    this.video = this.videos[0];
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,9 +64,11 @@ pandas = {version = ">=2.2.2", optional = true}
 scikit-image = {version = ">=0.23.2", optional = true}
 dynamixel-sdk = {version = ">=3.7.31", optional = true}
 pynput = {version = ">=1.7.7", optional = true}
-# TODO(rcadene, salibert): 71.0.1 has a bug
-setuptools = {version = "!=71.0.1", optional = true}
-pyrealsense2 = {version = ">=2.55.1.6486", markers = "sys_platform != 'darwin'", optional = true}
+setuptools = {version = "!=71.0.1", optional = true}  # TODO(rcadene, aliberts): 71.0.1 has a bug
+pyrealsense2 = {version = ">=2.55.1.6486", markers = "sys_platform != 'darwin'", optional = true}  # TODO(rcadene, aliberts): Fix on Mac
+pyrender = {git = "https://github.com/mmatl/pyrender.git", markers = "sys_platform == 'linux'", optional = true}
+hello-robot-stretch-body = {version = ">=0.7.27", markers = "sys_platform == 'linux'", optional = true}
+pyserial = {version = ">=3.5", optional = true}


 [tool.poetry.extras]
@@ -75,11 +77,12 @@ pusht = ["gym-pusht"]
 xarm = ["gym-xarm"]
 aloha = ["gym-aloha"]
 dev = ["pre-commit", "debugpy"]
-test = ["pytest", "pytest-cov"]
+test = ["pytest", "pytest-cov", "pyserial"]
 umi = ["imagecodecs"]
 video_benchmark = ["scikit-image", "pandas"]
 dynamixel = ["dynamixel-sdk", "pynput"]
 intelrealsense = ["pyrealsense2"]
+stretch = ["hello-robot-stretch-body", "pyrender", "pyrealsense2", "pynput"]

 [tool.ruff]
 line-length = 110
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,13 +13,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import traceback

 import pytest
+from serial import SerialException

+from lerobot import available_cameras, available_motors, available_robots
 from lerobot.common.utils.utils import init_hydra_config
-
-from .utils import DEVICE, ROBOT_CONFIG_PATH_TEMPLATE
+from tests.utils import DEVICE, ROBOT_CONFIG_PATH_TEMPLATE, make_camera, make_motors_bus


 def pytest_collection_finish():
@@ -28,6 +30,11 @@ def pytest_collection_finish():

@pytest.fixture
 def is_robot_available(robot_type):
+    if robot_type not in available_robots:
+        raise ValueError(
+            f"The robot type '{robot_type}' is not valid. Expected one of these '{available_robots}"
+        )
+
    try:
        from lerobot.common.robot_devices.robots.factory import make_robot

@@ -37,7 +44,76 @@ def is_robot_available(robot_type):
        robot.connect()
        del robot
        return True
-    except Exception:
-        traceback.print_exc()
+
+    except Exception as e:
        print(f"\nA {robot_type} robot is not available.")
+
+        if isinstance(e, ModuleNotFoundError):
+            print(f"\nInstall module '{e.name}'")
+        elif isinstance(e, SerialException):
+            print("\nNo physical motors bus detected.")
+        else:
+            traceback.print_exc()
+
        return False
+
+
+@pytest.fixture
+def is_camera_available(camera_type):
+    if camera_type not in available_cameras:
+        raise ValueError(
+            f"The camera type '{camera_type}' is not valid. Expected one of these '{available_cameras}"
+        )
+
+    try:
+        camera = make_camera(camera_type)
+        camera.connect()
+        del camera
+        return True
+
+    except Exception as e:
+        print(f"\nA {camera_type} camera is not available.")
+
+        if isinstance(e, ModuleNotFoundError):
+            print(f"\nInstall module '{e.name}'")
+        elif isinstance(e, ValueError) and "camera_index" in e.args[0]:
+            print("\nNo physical camera detected.")
+        else:
+            traceback.print_exc()
+
+        return False
+
+
+@pytest.fixture
+def is_motor_available(motor_type):
+    if motor_type not in available_motors:
+        raise ValueError(
+            f"The motor type '{motor_type}' is not valid. Expected one of these '{available_motors}"
+        )
+
+    try:
+        motors_bus = make_motors_bus(motor_type)
+        motors_bus.connect()
+        del motors_bus
+        return True
+
+    except Exception as e:
+        print(f"\nA {motor_type} motor is not available.")
+
+        if isinstance(e, ModuleNotFoundError):
+            print(f"\nInstall module '{e.name}'")
+        elif isinstance(e, SerialException):
+            print("\nNo physical motors bus detected.")
+        else:
+            traceback.print_exc()
+
+        return False
+
+
+@pytest.fixture
+def patch_builtins_input(monkeypatch):
+    def print_text(text=None):
+        if text is not None:
+            print(text)
+
+    monkeypatch.setattr("builtins.input", print_text)
--- a/tests/data/save_policy_to_safetensors/dora_aloha_real_act_aloha_real/actions.safetensors
+++ b/tests/data/save_policy_to_safetensors/dora_aloha_real_act_aloha_real/actions.safetensors
--- a/tests/data/save_policy_to_safetensors/dora_aloha_real_act_aloha_real/grad_stats.safetensors
+++ b/tests/data/save_policy_to_safetensors/dora_aloha_real_act_aloha_real/grad_stats.safetensors
--- a/tests/data/save_policy_to_safetensors/dora_aloha_real_act_aloha_real/output_dict.safetensors
+++ b/tests/data/save_policy_to_safetensors/dora_aloha_real_act_aloha_real/output_dict.safetensors
--- a/tests/data/save_policy_to_safetensors/dora_aloha_real_act_aloha_real/param_stats.safetensors
+++ b/tests/data/save_policy_to_safetensors/dora_aloha_real_act_aloha_real/param_stats.safetensors
--- a/tests/data/save_policy_to_safetensors/dora_aloha_real_act_real_no_state/actions.safetensors
+++ b/tests/data/save_policy_to_safetensors/dora_aloha_real_act_real_no_state/actions.safetensors
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b5a9f73a2356aff9c717cdfd0d37a6da08b0cf2cc09c98edbc9492501b7f64a5
-size 5104
--- a/tests/data/save_policy_to_safetensors/dora_aloha_real_act_real_no_state/grad_stats.safetensors
+++ b/tests/data/save_policy_to_safetensors/dora_aloha_real_act_real_no_state/grad_stats.safetensors
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:28738b3cfad17af0ac5181effdd796acdf7953cd5bcca3f421a11ddfd6b0076f
-size 30800
--- a/tests/data/save_policy_to_safetensors/dora_aloha_real_act_real_no_state/output_dict.safetensors
+++ b/tests/data/save_policy_to_safetensors/dora_aloha_real_act_real_no_state/output_dict.safetensors
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4bb8a197a40456fdbc16029126268e6bcef3eca1837d88235165dc7e14618bea
-size 68
--- a/tests/data/save_policy_to_safetensors/dora_aloha_real_act_real_no_state/param_stats.safetensors
+++ b/tests/data/save_policy_to_safetensors/dora_aloha_real_act_real_no_state/param_stats.safetensors
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bea60cce42d324f539dd3bca1e66b5ba6391838fdcadb00efc25f3240edb529a
-size 33600
--- a/tests/mock_cv2.py
+++ b/tests/mock_cv2.py
@@ -0,0 +1,83 @@
+from functools import cache
+
+import numpy as np
+
+CAP_PROP_FPS = 5
+CAP_PROP_FRAME_WIDTH = 3
+CAP_PROP_FRAME_HEIGHT = 4
+COLOR_RGB2BGR = 4
+COLOR_BGR2RGB = 4
+
+ROTATE_90_COUNTERCLOCKWISE = 2
+ROTATE_90_CLOCKWISE = 0
+ROTATE_180 = 1
+
+
+@cache
+def _generate_image(width: int, height: int):
+    return np.random.randint(0, 256, size=(height, width, 3), dtype=np.uint8)
+
+
+def cvtColor(color_image, color_convertion):  # noqa: N802
+    if color_convertion in [COLOR_RGB2BGR, COLOR_BGR2RGB]:
+        return color_image[:, :, [2, 1, 0]]
+    else:
+        raise NotImplementedError(color_convertion)
+
+
+def rotate(color_image, rotation):
+    if rotation is None:
+        return color_image
+    elif rotation == ROTATE_90_CLOCKWISE:
+        return np.rot90(color_image, k=1)
+    elif rotation == ROTATE_180:
+        return np.rot90(color_image, k=2)
+    elif rotation == ROTATE_90_COUNTERCLOCKWISE:
+        return np.rot90(color_image, k=3)
+    else:
+        raise NotImplementedError(rotation)
+
+
+class VideoCapture:
+    def __init__(self, *args, **kwargs):
+        self._mock_dict = {
+            CAP_PROP_FPS: 30,
+            CAP_PROP_FRAME_WIDTH: 640,
+            CAP_PROP_FRAME_HEIGHT: 480,
+        }
+        self._is_opened = True
+
+    def isOpened(self):  # noqa: N802
+        return self._is_opened
+
+    def set(self, propId: int, value: float) -> bool:  # noqa: N803
+        if not self._is_opened:
+            raise RuntimeError("Camera is not opened")
+        self._mock_dict[propId] = value
+        return True
+
+    def get(self, propId: int) -> float:  # noqa: N803
+        if not self._is_opened:
+            raise RuntimeError("Camera is not opened")
+        value = self._mock_dict[propId]
+        if value == 0:
+            if propId == CAP_PROP_FRAME_HEIGHT:
+                value = 480
+            elif propId == CAP_PROP_FRAME_WIDTH:
+                value = 640
+        return value
+
+    def read(self):
+        if not self._is_opened:
+            raise RuntimeError("Camera is not opened")
+        h = self.get(CAP_PROP_FRAME_HEIGHT)
+        w = self.get(CAP_PROP_FRAME_WIDTH)
+        ret = True
+        return ret, _generate_image(width=w, height=h)
+
+    def release(self):
+        self._is_opened = False
+
+    def __del__(self):
+        if self._is_opened:
+            self.release()
--- a/tests/mock_dynamixel_sdk.py
+++ b/tests/mock_dynamixel_sdk.py
@@ -0,0 +1,87 @@
+"""Mocked classes and functions from dynamixel_sdk to allow for continuous integration
+and testing code logic that requires hardware and devices (e.g. robot arms, cameras)
+
+Warning: These mocked versions are minimalist. They do not exactly mock every behaviors
+from the original classes and functions (e.g. return types might be None instead of boolean).
+"""
+
+# from dynamixel_sdk import COMM_SUCCESS
+
+DEFAULT_BAUDRATE = 9_600
+COMM_SUCCESS = 0  # tx or rx packet communication success
+
+
+def convert_to_bytes(value, bytes):
+    # TODO(rcadene): remove need to mock `convert_to_bytes` by implemented the inverse transform
+    # `convert_bytes_to_value`
+    del bytes  # unused
+    return value
+
+
+class PortHandler:
+    def __init__(self, port):
+        self.port = port
+        # factory default baudrate
+        self.baudrate = DEFAULT_BAUDRATE
+
+    def openPort(self):  # noqa: N802
+        return True
+
+    def closePort(self):  # noqa: N802
+        pass
+
+    def setPacketTimeoutMillis(self, timeout_ms):  # noqa: N802
+        del timeout_ms  # unused
+
+    def getBaudRate(self):  # noqa: N802
+        return self.baudrate
+
+    def setBaudRate(self, baudrate):  # noqa: N802
+        self.baudrate = baudrate
+
+
+class PacketHandler:
+    def __init__(self, protocol_version):
+        del protocol_version  # unused
+        # Use packet_handler.data to communicate across Read and Write
+        self.data = {}
+
+
+class GroupSyncRead:
+    def __init__(self, port_handler, packet_handler, address, bytes):
+        self.packet_handler = packet_handler
+
+    def addParam(self, motor_index):  # noqa: N802
+        if motor_index not in self.packet_handler.data:
+            # Initialize motor default values
+            self.packet_handler.data[motor_index] = {
+                # Key (int) are from X_SERIES_CONTROL_TABLE
+                7: motor_index,  # ID
+                8: DEFAULT_BAUDRATE,  # Baud_rate
+                10: 0,  # Drive_Mode
+                64: 0,  # Torque_Enable
+                # Set 2560 since calibration values for Aloha gripper is between start_pos=2499 and end_pos=3144
+                # For other joints, 2560 will be autocorrected to be in calibration range
+                132: 2560,  # Present_Position
+            }
+
+    def txRxPacket(self):  # noqa: N802
+        return COMM_SUCCESS
+
+    def getData(self, index, address, bytes):  # noqa: N802
+        return self.packet_handler.data[index][address]
+
+
+class GroupSyncWrite:
+    def __init__(self, port_handler, packet_handler, address, bytes):
+        self.packet_handler = packet_handler
+        self.address = address
+
+    def addParam(self, index, data):  # noqa: N802
+        self.changeParam(index, data)
+
+    def txPacket(self):  # noqa: N802
+        return COMM_SUCCESS
+
+    def changeParam(self, index, data):  # noqa: N802
+        self.packet_handler.data[index][self.address] = data
--- a/tests/mock_pyrealsense2.py
+++ b/tests/mock_pyrealsense2.py
@@ -0,0 +1,135 @@
+import enum
+
+import numpy as np
+
+
+class stream(enum.Enum):  # noqa: N801
+    color = 0
+    depth = 1
+
+
+class format(enum.Enum):  # noqa: N801
+    rgb8 = 0
+    z16 = 1
+
+
+class config:  # noqa: N801
+    def enable_device(self, device_id: str):
+        self.device_enabled = device_id
+
+    def enable_stream(self, stream_type: stream, width=None, height=None, color_format=None, fps=None):
+        self.stream_type = stream_type
+        # Overwrite default values when possible
+        self.width = 848 if width is None else width
+        self.height = 480 if height is None else height
+        self.color_format = format.rgb8 if color_format is None else color_format
+        self.fps = 30 if fps is None else fps
+
+
+class RSColorProfile:
+    def __init__(self, config):
+        self.config = config
+
+    def fps(self):
+        return self.config.fps
+
+    def width(self):
+        return self.config.width
+
+    def height(self):
+        return self.config.height
+
+
+class RSColorStream:
+    def __init__(self, config):
+        self.config = config
+
+    def as_video_stream_profile(self):
+        return RSColorProfile(self.config)
+
+
+class RSProfile:
+    def __init__(self, config):
+        self.config = config
+
+    def get_stream(self, color_format):
+        del color_format  # unused
+        return RSColorStream(self.config)
+
+
+class pipeline:  # noqa: N801
+    def __init__(self):
+        self.started = False
+        self.config = None
+
+    def start(self, config):
+        self.started = True
+        self.config = config
+        return RSProfile(self.config)
+
+    def stop(self):
+        if not self.started:
+            raise RuntimeError("You need to start the camera before stop.")
+        self.started = False
+        self.config = None
+
+    def wait_for_frames(self, timeout_ms=50000):
+        del timeout_ms  # unused
+        return RSFrames(self.config)
+
+
+class RSFrames:
+    def __init__(self, config):
+        self.config = config
+
+    def get_color_frame(self):
+        return RSColorFrame(self.config)
+
+    def get_depth_frame(self):
+        return RSDepthFrame(self.config)
+
+
+class RSColorFrame:
+    def __init__(self, config):
+        self.config = config
+
+    def get_data(self):
+        data = np.ones((self.config.height, self.config.width, 3), dtype=np.uint8)
+        # Create a difference between rgb and bgr
+        data[:, :, 0] = 2
+        return data
+
+
+class RSDepthFrame:
+    def __init__(self, config):
+        self.config = config
+
+    def get_data(self):
+        return np.ones((self.config.height, self.config.width), dtype=np.uint16)
+
+
+class RSDevice:
+    def __init__(self):
+        pass
+
+    def get_info(self, camera_info) -> str:
+        del camera_info  # unused
+        # return fake serial number
+        return "123456789"
+
+
+class context:  # noqa: N801
+    def __init__(self):
+        pass
+
+    def query_devices(self):
+        return [RSDevice()]
+
+
+class camera_info:  # noqa: N801
+    # fake name
+    name = "Intel RealSense D435I"
+
+    def __init__(self, serial_number):
+        del serial_number
+        pass
--- a/tests/test_cameras.py
+++ b/tests/test_cameras.py
@@ -1,21 +1,32 @@
 """
-Tests meant to be used locally and launched manually.
+Tests for physical cameras and their mocked versions.
+If the physical camera is not connected to the computer, or not working,
+the test will be skipped.

-Example usage:
+Example of running a specific test:
 ```bash
 pytest -sx tests/test_cameras.py::test_camera
 ```
+
+Example of running test on a real camera connected to the computer:
+```bash
+pytest -sx 'tests/test_cameras.py::test_camera[opencv-False]'
+pytest -sx 'tests/test_cameras.py::test_camera[intelrealsense-False]'
+```
+
+Example of running test on a mocked version of the camera:
+```bash
+pytest -sx 'tests/test_cameras.py::test_camera[opencv-True]'
+pytest -sx 'tests/test_cameras.py::test_camera[intelrealsense-True]'
+```
 """

 import numpy as np
 import pytest

-from lerobot import available_robots
-from lerobot.common.robot_devices.cameras.opencv import OpenCVCamera, save_images_from_cameras
 from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
-from tests.utils import require_robot
+from tests.utils import TEST_CAMERA_TYPES, make_camera, require_camera

-CAMERA_INDEX = 2
 # Maximum absolute difference between two consecutive images recored by a camera.
 # This value differs with respect to the camera.
 MAX_PIXEL_DIFFERENCE = 25
@@ -25,9 +36,9 @@ def compute_max_pixel_difference(first_image, second_image):
    return np.abs(first_image.astype(float) - second_image.astype(float)).max()


-@pytest.mark.parametrize("robot_type", available_robots)
-@require_robot
-def test_camera(request, robot_type):
+@pytest.mark.parametrize("camera_type, mock", TEST_CAMERA_TYPES)
+@require_camera
+def test_camera(request, camera_type, mock):
    """Test assumes that `camera.read()` returns the same image when called multiple times in a row.
    So the environment should not change (you shouldnt be in front of the camera) and the camera should not be moving.

@@ -36,10 +47,12 @@ def test_camera(request, robot_type):
    """
    # TODO(rcadene): measure fps in nightly?
    # TODO(rcadene): test logs
-    # TODO(rcadene): add compatibility with other camera APIs
+
+    if camera_type == "opencv" and not mock:
+        pytest.skip("TODO(rcadene): fix test for opencv physical camera")

    # Test instantiating
-    camera = OpenCVCamera(CAMERA_INDEX)
+    camera = make_camera(camera_type, mock=mock)

    # Test reading, async reading, disconnecting before connecting raises an error
    with pytest.raises(RobotDeviceNotConnectedError):
@@ -53,7 +66,7 @@ def test_camera(request, robot_type):
    del camera

    # Test connecting
-    camera = OpenCVCamera(CAMERA_INDEX)
+    camera = make_camera(camera_type, mock=mock)
    camera.connect()
    assert camera.is_connected
    assert camera.fps is not None
@@ -78,11 +91,14 @@ def test_camera(request, robot_type):
        camera.read()
    color_image = camera.read()
    async_color_image = camera.async_read()
-    print(
+    error_msg = (
        "max_pixel_difference between read() and async_read()",
        compute_max_pixel_difference(color_image, async_color_image),
    )
-    assert np.allclose(color_image, async_color_image, rtol=1e-5, atol=MAX_PIXEL_DIFFERENCE)
+    # TODO(rcadene): properly set `rtol`
+    np.testing.assert_allclose(
+        color_image, async_color_image, rtol=1e-5, atol=MAX_PIXEL_DIFFERENCE, err_msg=error_msg
+    )

    # Test disconnecting
    camera.disconnect()
@@ -90,29 +106,60 @@ def test_camera(request, robot_type):
    assert camera.thread is None

    # Test disconnecting with `__del__`
-    camera = OpenCVCamera(CAMERA_INDEX)
+    camera = make_camera(camera_type, mock=mock)
    camera.connect()
    del camera

    # Test acquiring a bgr image
-    camera = OpenCVCamera(CAMERA_INDEX, color_mode="bgr")
+    camera = make_camera(camera_type, color_mode="bgr", mock=mock)
    camera.connect()
    assert camera.color_mode == "bgr"
    bgr_color_image = camera.read()
-    assert np.allclose(color_image, bgr_color_image[:, :, [2, 1, 0]], rtol=1e-5, atol=MAX_PIXEL_DIFFERENCE)
+    np.testing.assert_allclose(
+        color_image, bgr_color_image[:, :, [2, 1, 0]], rtol=1e-5, atol=MAX_PIXEL_DIFFERENCE, err_msg=error_msg
+    )
    del camera

+    # Test acquiring a rotated image
+    camera = make_camera(camera_type, mock=mock)
+    camera.connect()
+    ori_color_image = camera.read()
+    del camera
+
+    for rotation in [None, 90, 180, -90]:
+        camera = make_camera(camera_type, rotation=rotation, mock=mock)
+        camera.connect()
+
+        if mock:
+            import tests.mock_cv2 as cv2
+        else:
+            import cv2
+
+        if rotation is None:
+            manual_rot_img = ori_color_image
+            assert camera.rotation is None
+        elif rotation == 90:
+            manual_rot_img = np.rot90(color_image, k=1)
+            assert camera.rotation == cv2.ROTATE_90_CLOCKWISE
+        elif rotation == 180:
+            manual_rot_img = np.rot90(color_image, k=2)
+            assert camera.rotation == cv2.ROTATE_180
+        elif rotation == -90:
+            manual_rot_img = np.rot90(color_image, k=3)
+            assert camera.rotation == cv2.ROTATE_90_COUNTERCLOCKWISE
+
+        rot_color_image = camera.read()
+
+        np.testing.assert_allclose(
+            rot_color_image, manual_rot_img, rtol=1e-5, atol=MAX_PIXEL_DIFFERENCE, err_msg=error_msg
+        )
+        del camera
+
    # TODO(rcadene): Add a test for a camera that doesnt support fps=60 and raises an OSError
    # TODO(rcadene): Add a test for a camera that supports fps=60

-    # Test fps=10 raises an OSError
-    camera = OpenCVCamera(CAMERA_INDEX, fps=10)
-    with pytest.raises(OSError):
-        camera.connect()
-    del camera
-
    # Test width and height can be set
-    camera = OpenCVCamera(CAMERA_INDEX, fps=30, width=1280, height=720)
+    camera = make_camera(camera_type, fps=30, width=1280, height=720, mock=mock)
    camera.connect()
    assert camera.fps == 30
    assert camera.width == 1280
@@ -125,13 +172,20 @@ def test_camera(request, robot_type):
    del camera

    # Test not supported width and height raise an error
-    camera = OpenCVCamera(CAMERA_INDEX, fps=30, width=0, height=0)
+    camera = make_camera(camera_type, fps=30, width=0, height=0, mock=mock)
    with pytest.raises(OSError):
        camera.connect()
    del camera


-@pytest.mark.parametrize("robot_type", available_robots)
-@require_robot
-def test_save_images_from_cameras(tmpdir, request, robot_type):
-    save_images_from_cameras(tmpdir, record_time_s=1)
+@pytest.mark.parametrize("camera_type, mock", TEST_CAMERA_TYPES)
+@require_camera
+def test_save_images_from_cameras(tmpdir, request, camera_type, mock):
+    # TODO(rcadene): refactor
+    if camera_type == "opencv":
+        from lerobot.common.robot_devices.cameras.opencv import save_images_from_cameras
+    elif camera_type == "intelrealsense":
+        from lerobot.common.robot_devices.cameras.intelrealsense import save_images_from_cameras
+
+    # Small `record_time_s` to speedup unit tests
+    save_images_from_cameras(tmpdir, record_time_s=0.02, mock=mock)
--- a/tests/test_control_robot.py
+++ b/tests/test_control_robot.py
@@ -1,70 +1,450 @@
+"""
+Tests for physical robots and their mocked versions.
+If the physical robots are not connected to the computer, or not working,
+the test will be skipped.
+
+Example of running a specific test:
+```bash
+pytest -sx tests/test_control_robot.py::test_teleoperate
+```
+
+Example of running test on real robots connected to the computer:
+```bash
+pytest -sx 'tests/test_control_robot.py::test_teleoperate[koch-False]'
+pytest -sx 'tests/test_control_robot.py::test_teleoperate[koch_bimanual-False]'
+pytest -sx 'tests/test_control_robot.py::test_teleoperate[aloha-False]'
+```
+
+Example of running test on a mocked version of robots:
+```bash
+pytest -sx 'tests/test_control_robot.py::test_teleoperate[koch-True]'
+pytest -sx 'tests/test_control_robot.py::test_teleoperate[koch_bimanual-True]'
+pytest -sx 'tests/test_control_robot.py::test_teleoperate[aloha-True]'
+```
+"""
+
+import multiprocessing
 from pathlib import Path
+from unittest.mock import patch

 import pytest

-from lerobot import available_robots
+from lerobot.common.datasets.populate_dataset import add_frame, init_dataset
+from lerobot.common.logger import Logger
 from lerobot.common.policies.factory import make_policy
 from lerobot.common.utils.utils import init_hydra_config
 from lerobot.scripts.control_robot import calibrate, record, replay, teleoperate
+from lerobot.scripts.train import make_optimizer_and_scheduler
 from tests.test_robots import make_robot
-from tests.utils import DEFAULT_CONFIG_PATH, DEVICE, require_robot
+from tests.utils import DEFAULT_CONFIG_PATH, DEVICE, TEST_ROBOT_TYPES, require_robot


-@pytest.mark.parametrize("robot_type", available_robots)
+@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_teleoperate(request, robot_type):
-    robot = make_robot(robot_type)
+def test_teleoperate(tmpdir, request, robot_type, mock):
+    if mock and robot_type != "aloha":
+        request.getfixturevalue("patch_builtins_input")
+
+        # Create an empty calibration directory to trigger manual calibration
+        # and avoid writing calibration files in user .cache/calibration folder
+        tmpdir = Path(tmpdir)
+        calibration_dir = tmpdir / robot_type
+        overrides = [f"calibration_dir={calibration_dir}"]
+    else:
+        # Use the default .cache/calibration folder when mock=False
+        overrides = None
+
+    robot = make_robot(robot_type, overrides=overrides, mock=mock)
    teleoperate(robot, teleop_time_s=1)
    teleoperate(robot, fps=30, teleop_time_s=1)
    teleoperate(robot, fps=60, teleop_time_s=1)
    del robot


-@pytest.mark.parametrize("robot_type", available_robots)
+@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_calibrate(request, robot_type):
-    robot = make_robot(robot_type)
-    calibrate(robot)
+def test_calibrate(tmpdir, request, robot_type, mock):
+    if mock:
+        request.getfixturevalue("patch_builtins_input")
+
+    # Create an empty calibration directory to trigger manual calibration
+    tmpdir = Path(tmpdir)
+    calibration_dir = tmpdir / robot_type
+    overrides_calibration_dir = [f"calibration_dir={calibration_dir}"]
+
+    robot = make_robot(robot_type, overrides=overrides_calibration_dir, mock=mock)
+    calibrate(robot, arms=robot.available_arms)
    del robot


-@pytest.mark.parametrize("robot_type", available_robots)
+@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_record_without_cameras(tmpdir, request, robot_type):
-    root = Path(tmpdir)
+def test_record_without_cameras(tmpdir, request, robot_type, mock):
+    # Avoid using cameras
+    overrides = ["~cameras"]
+
+    if mock and robot_type != "aloha":
+        request.getfixturevalue("patch_builtins_input")
+
+        # Create an empty calibration directory to trigger manual calibration
+        # and avoid writing calibration files in user .cache/calibration folder
+        calibration_dir = Path(tmpdir) / robot_type
+        overrides.append(f"calibration_dir={calibration_dir}")
+
+    root = Path(tmpdir) / "data"
    repo_id = "lerobot/debug"

-    robot = make_robot(robot_type, overrides=["~cameras"])
-    record(robot, fps=30, root=root, repo_id=repo_id, warmup_time_s=1, episode_time_s=1, num_episodes=2)
+    robot = make_robot(robot_type, overrides=overrides, mock=mock)
+    record(
+        robot,
+        fps=30,
+        root=root,
+        repo_id=repo_id,
+        warmup_time_s=1,
+        episode_time_s=1,
+        num_episodes=2,
+        run_compute_stats=False,
+        push_to_hub=False,
+        video=False,
+        play_sounds=False,
+    )


-@pytest.mark.parametrize("robot_type", available_robots)
+@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_record_and_replay_and_policy(tmpdir, request, robot_type):
+def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
+    tmpdir = Path(tmpdir)
+
+    if mock and robot_type != "aloha":
+        request.getfixturevalue("patch_builtins_input")
+
+        # Create an empty calibration directory to trigger manual calibration
+        # and avoid writing calibration files in user .cache/calibration folder
+        calibration_dir = tmpdir / robot_type
+        overrides = [f"calibration_dir={calibration_dir}"]
+    else:
+        # Use the default .cache/calibration folder when mock=False or for aloha
+        overrides = None
+
    env_name = "koch_real"
    policy_name = "act_koch_real"

-    root = Path(tmpdir)
+    root = tmpdir / "data"
    repo_id = "lerobot/debug"
+    eval_repo_id = "lerobot/eval_debug"

-    robot = make_robot(robot_type)
+    robot = make_robot(robot_type, overrides=overrides, mock=mock)
    dataset = record(
-        robot, fps=30, root=root, repo_id=repo_id, warmup_time_s=1, episode_time_s=1, num_episodes=2
+        robot,
+        root,
+        repo_id,
+        fps=1,
+        warmup_time_s=1,
+        episode_time_s=1,
+        reset_time_s=1,
+        num_episodes=2,
+        push_to_hub=False,
+        # TODO(rcadene, aliberts): test video=True
+        video=False,
+        # TODO(rcadene): display cameras through cv2 sometimes crashes on mac
+        display_cameras=False,
+        play_sounds=False,
    )
+    assert dataset.num_episodes == 2
+    assert len(dataset) == 2

-    replay(robot, episode=0, fps=30, root=root, repo_id=repo_id)
+    replay(robot, episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False)
+
+    # TODO(rcadene, aliberts): rethink this design
+    if robot_type == "aloha":
+        env_name = "aloha_real"
+        policy_name = "act_aloha_real"
+    elif robot_type in ["koch", "koch_bimanual"]:
+        env_name = "koch_real"
+        policy_name = "act_koch_real"
+    else:
+        raise NotImplementedError(robot_type)
+
+    overrides = [
+        f"env={env_name}",
+        f"policy={policy_name}",
+        f"device={DEVICE}",
+    ]
+
+    if robot_type == "koch_bimanual":
+        overrides += ["env.state_dim=12", "env.action_dim=12"]
+
+    overrides += ["wandb.enable=false"]
+    overrides += ["env.fps=1"]

    cfg = init_hydra_config(
        DEFAULT_CONFIG_PATH,
-        overrides=[
-            f"env={env_name}",
-            f"policy={policy_name}",
-            f"device={DEVICE}",
-        ],
+        overrides=overrides,
    )

    policy = make_policy(hydra_cfg=cfg, dataset_stats=dataset.stats)
+    optimizer, lr_scheduler = make_optimizer_and_scheduler(cfg, policy)
+    out_dir = tmpdir / "logger"
+    logger = Logger(cfg, out_dir, wandb_job_name="debug")
+    logger.save_checkpoint(
+        0,
+        policy,
+        optimizer,
+        lr_scheduler,
+        identifier=0,
+    )
+    pretrained_policy_name_or_path = out_dir / "checkpoints/last/pretrained_model"

-    record(robot, policy, cfg, run_time_s=1)
+    # In `examples/9_use_aloha.md`, we advise using `num_image_writer_processes=1`
+    # during inference, to reach constent fps, so we test this here.
+    if robot_type == "aloha":
+        num_image_writer_processes = 1
+
+        # `multiprocessing.set_start_method("spawn", force=True)` avoids a hanging issue
+        # before exiting pytest. However, it outputs the following error in the log:
+        # Traceback (most recent call last):
+        #     File "<string>", line 1, in <module>
+        #     File "/Users/rcadene/miniconda3/envs/lerobot/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
+        #         exitcode = _main(fd, parent_sentinel)
+        #     File "/Users/rcadene/miniconda3/envs/lerobot/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
+        #         self = reduction.pickle.load(from_parent)
+        #     File "/Users/rcadene/miniconda3/envs/lerobot/lib/python3.10/multiprocessing/synchronize.py", line 110, in __setstate__
+        #         self._semlock = _multiprocessing.SemLock._rebuild(*state)
+        # FileNotFoundError: [Errno 2] No such file or directory
+        # TODO(rcadene, aliberts): fix FileNotFoundError in multiprocessing
+        multiprocessing.set_start_method("spawn", force=True)
+    else:
+        num_image_writer_processes = 0
+
+    record(
+        robot,
+        root,
+        eval_repo_id,
+        pretrained_policy_name_or_path,
+        warmup_time_s=1,
+        episode_time_s=1,
+        reset_time_s=1,
+        num_episodes=2,
+        run_compute_stats=False,
+        push_to_hub=False,
+        video=False,
+        display_cameras=False,
+        play_sounds=False,
+        num_image_writer_processes=num_image_writer_processes,
+    )
+
+    assert dataset.num_episodes == 2
+    assert len(dataset) == 2

    del robot
+
+
+@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
+@require_robot
+def test_resume_record(tmpdir, request, robot_type, mock):
+    if mock and robot_type != "aloha":
+        request.getfixturevalue("patch_builtins_input")
+
+        # Create an empty calibration directory to trigger manual calibration
+        # and avoid writing calibration files in user .cache/calibration folder
+        calibration_dir = tmpdir / robot_type
+        overrides = [f"calibration_dir={calibration_dir}"]
+    else:
+        # Use the default .cache/calibration folder when mock=False or for aloha
+        overrides = []
+
+    robot = make_robot(robot_type, overrides=overrides, mock=mock)
+
+    root = Path(tmpdir) / "data"
+    repo_id = "lerobot/debug"
+
+    dataset = record(
+        robot,
+        root,
+        repo_id,
+        fps=1,
+        warmup_time_s=0,
+        episode_time_s=1,
+        num_episodes=1,
+        push_to_hub=False,
+        video=False,
+        display_cameras=False,
+        play_sounds=False,
+        run_compute_stats=False,
+    )
+    assert len(dataset) == 1, "`dataset` should contain only 1 frame"
+
+    init_dataset_return_value = {}
+
+    def wrapped_init_dataset(*args, **kwargs):
+        nonlocal init_dataset_return_value
+        init_dataset_return_value = init_dataset(*args, **kwargs)
+        return init_dataset_return_value
+
+    with patch("lerobot.scripts.control_robot.init_dataset", wraps=wrapped_init_dataset):
+        dataset = record(
+            robot,
+            root,
+            repo_id,
+            fps=1,
+            warmup_time_s=0,
+            episode_time_s=1,
+            num_episodes=2,
+            push_to_hub=False,
+            video=False,
+            display_cameras=False,
+            play_sounds=False,
+            run_compute_stats=False,
+        )
+        assert len(dataset) == 2, "`dataset` should contain only 1 frame"
+        assert (
+            init_dataset_return_value["num_episodes"] == 2
+        ), "`init_dataset` should load the previous episode"
+
+
+@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
+@require_robot
+def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
+    if mock and robot_type != "aloha":
+        request.getfixturevalue("patch_builtins_input")
+
+        # Create an empty calibration directory to trigger manual calibration
+        # and avoid writing calibration files in user .cache/calibration folder
+        calibration_dir = tmpdir / robot_type
+        overrides = [f"calibration_dir={calibration_dir}"]
+    else:
+        # Use the default .cache/calibration folder when mock=False or for aloha
+        overrides = []
+
+    robot = make_robot(robot_type, overrides=overrides, mock=mock)
+    with (
+        patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener,
+        patch("lerobot.common.robot_devices.control_utils.add_frame", wraps=add_frame) as mock_add_frame,
+    ):
+        mock_events = {}
+        mock_events["exit_early"] = True
+        mock_events["rerecord_episode"] = True
+        mock_events["stop_recording"] = False
+        mock_listener.return_value = (None, mock_events)
+
+        root = Path(tmpdir) / "data"
+        repo_id = "lerobot/debug"
+
+        dataset = record(
+            robot,
+            root,
+            repo_id,
+            fps=1,
+            warmup_time_s=0,
+            episode_time_s=1,
+            num_episodes=1,
+            push_to_hub=False,
+            video=False,
+            display_cameras=False,
+            play_sounds=False,
+            run_compute_stats=False,
+        )
+
+        assert not mock_events["rerecord_episode"], "`rerecord_episode` wasn't properly reset to False"
+        assert not mock_events["exit_early"], "`exit_early` wasn't properly reset to False"
+        assert mock_add_frame.call_count == 2, "`add_frame` should have been called 2 times"
+        assert len(dataset) == 1, "`dataset` should contain only 1 frame"
+
+
+@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
+@require_robot
+def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
+    if mock:
+        request.getfixturevalue("patch_builtins_input")
+
+        # Create an empty calibration directory to trigger manual calibration
+        # and avoid writing calibration files in user .cache/calibration folder
+        calibration_dir = tmpdir / robot_type
+        overrides = [f"calibration_dir={calibration_dir}"]
+    else:
+        # Use the default .cache/calibration folder when mock=False or for aloha
+        overrides = []
+
+    robot = make_robot(robot_type, overrides=overrides, mock=mock)
+    with (
+        patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener,
+        patch("lerobot.common.robot_devices.control_utils.add_frame", wraps=add_frame) as mock_add_frame,
+    ):
+        mock_events = {}
+        mock_events["exit_early"] = True
+        mock_events["rerecord_episode"] = False
+        mock_events["stop_recording"] = False
+        mock_listener.return_value = (None, mock_events)
+
+        root = Path(tmpdir) / "data"
+        repo_id = "lerobot/debug"
+
+        dataset = record(
+            robot,
+            fps=2,
+            root=root,
+            repo_id=repo_id,
+            warmup_time_s=0,
+            episode_time_s=1,
+            num_episodes=1,
+            push_to_hub=False,
+            video=False,
+            display_cameras=False,
+            play_sounds=False,
+            run_compute_stats=False,
+        )
+
+        assert not mock_events["exit_early"], "`exit_early` wasn't properly reset to False"
+        assert mock_add_frame.call_count == 1, "`add_frame` should have been called 1 time"
+        assert len(dataset) == 1, "`dataset` should contain only 1 frame"
+
+
+@pytest.mark.parametrize(
+    "robot_type, mock, num_image_writer_processes", [("koch", True, 0), ("koch", True, 1)]
+)
+@require_robot
+def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num_image_writer_processes):
+    if mock:
+        request.getfixturevalue("patch_builtins_input")
+
+        # Create an empty calibration directory to trigger manual calibration
+        # and avoid writing calibration files in user .cache/calibration folder
+        calibration_dir = tmpdir / robot_type
+        overrides = [f"calibration_dir={calibration_dir}"]
+    else:
+        # Use the default .cache/calibration folder when mock=False or for aloha
+        overrides = []
+
+    robot = make_robot(robot_type, overrides=overrides, mock=mock)
+    with (
+        patch("lerobot.scripts.control_robot.init_keyboard_listener") as mock_listener,
+        patch("lerobot.common.robot_devices.control_utils.add_frame", wraps=add_frame) as mock_add_frame,
+    ):
+        mock_events = {}
+        mock_events["exit_early"] = True
+        mock_events["rerecord_episode"] = False
+        mock_events["stop_recording"] = True
+        mock_listener.return_value = (None, mock_events)
+
+        root = Path(tmpdir) / "data"
+        repo_id = "lerobot/debug"
+
+        dataset = record(
+            robot,
+            root,
+            repo_id,
+            fps=1,
+            warmup_time_s=0,
+            episode_time_s=1,
+            num_episodes=2,
+            push_to_hub=False,
+            video=False,
+            display_cameras=False,
+            play_sounds=False,
+            run_compute_stats=False,
+            num_image_writer_processes=num_image_writer_processes,
+        )
+
+        assert not mock_events["exit_early"], "`exit_early` wasn't properly reset to False"
+        assert mock_add_frame.call_count == 1, "`add_frame` should have been called 1 time"
+        assert len(dataset) == 1, "`dataset` should contain only 1 frame"
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -308,12 +308,11 @@ def test_flatten_unflatten_dict():
        # "lerobot/cmu_stretch",
    ],
 )
+# TODO(rcadene, aliberts): all these tests fail locally on Mac M1, but not on Linux
 def test_backward_compatibility(repo_id):
    """The artifacts for this test have been generated by `tests/scripts/save_dataset_to_safetensors.py`."""

-    dataset = LeRobotDataset(
-        repo_id,
-    )
+    dataset = LeRobotDataset(repo_id)

    test_dir = Path("tests/data/save_dataset_to_safetensors") / repo_id

--- a/tests/test_motors.py
+++ b/tests/test_motors.py
@@ -1,11 +1,23 @@
 """
-Tests meant to be used locally and launched manually.
+Tests for physical motors and their mocked versions.
+If the physical motors are not connected to the computer, or not working,
+the test will be skipped.

-Example usage:
+Example of running a specific test:
 ```bash
 pytest -sx tests/test_motors.py::test_find_port
 pytest -sx tests/test_motors.py::test_motors_bus
 ```
+
+Example of running test on real dynamixel motors connected to the computer:
+```bash
+pytest -sx 'tests/test_motors.py::test_motors_bus[dynamixel-False]'
+```
+
+Example of running test on a mocked version of dynamixel motors:
+```bash
+pytest -sx 'tests/test_motors.py::test_motors_bus[dynamixel-True]'
+```
 """

 # TODO(rcadene): measure fps in nightly?
@@ -18,38 +30,31 @@ import time
 import numpy as np
 import pytest

-from lerobot import available_robots
-from lerobot.common.robot_devices.motors.utils import MotorsBus
-from lerobot.common.robot_devices.robots.factory import make_robot
+from lerobot.common.robot_devices.motors.dynamixel import find_port
 from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
-from lerobot.common.utils.utils import init_hydra_config
-from tests.utils import ROBOT_CONFIG_PATH_TEMPLATE, require_robot
+from tests.utils import TEST_MOTOR_TYPES, make_motors_bus, require_motor


-def make_motors_bus(robot_type: str) -> MotorsBus:
-    # Instantiate a robot and return one of its leader arms
-    config_path = ROBOT_CONFIG_PATH_TEMPLATE.format(robot=robot_type)
-    robot_cfg = init_hydra_config(config_path)
-    robot = make_robot(robot_cfg)
-    first_bus_name = list(robot.leader_arms.keys())[0]
-    motors_bus = robot.leader_arms[first_bus_name]
-    return motors_bus
+@pytest.mark.parametrize("motor_type, mock", TEST_MOTOR_TYPES)
+@require_motor
+def test_find_port(request, motor_type, mock):
+    if mock:
+        request.getfixturevalue("patch_builtins_input")
+        with pytest.raises(OSError):
+            find_port()
+    else:
+        find_port()


-@pytest.mark.parametrize("robot_type", available_robots)
-@require_robot
-def test_find_port(request, robot_type):
-    from lerobot.common.robot_devices.motors.dynamixel import find_port
+@pytest.mark.parametrize("motor_type, mock", TEST_MOTOR_TYPES)
+@require_motor
+def test_configure_motors_all_ids_1(request, motor_type, mock):
+    if mock:
+        request.getfixturevalue("patch_builtins_input")

-    find_port()
-
-
-@pytest.mark.parametrize("robot_type", available_robots)
-@require_robot
-def test_configure_motors_all_ids_1(request, robot_type):
    input("Are you sure you want to re-configure the motors? Press enter to continue...")
    # This test expect the configuration was already correct.
-    motors_bus = make_motors_bus(robot_type)
+    motors_bus = make_motors_bus(motor_type, mock=mock)
    motors_bus.connect()
    motors_bus.write("Baud_Rate", [0] * len(motors_bus.motors))
    motors_bus.set_bus_baudrate(9_600)
@@ -57,16 +62,19 @@ def test_configure_motors_all_ids_1(request, robot_type):
    del motors_bus

    # Test configure
-    motors_bus = make_motors_bus(robot_type)
+    motors_bus = make_motors_bus(motor_type, mock=mock)
    motors_bus.connect()
    assert motors_bus.are_motors_configured()
    del motors_bus


-@pytest.mark.parametrize("robot_type", available_robots)
-@require_robot
-def test_motors_bus(request, robot_type):
-    motors_bus = make_motors_bus(robot_type)
+@pytest.mark.parametrize("motor_type, mock", TEST_MOTOR_TYPES)
+@require_motor
+def test_motors_bus(request, motor_type, mock):
+    if mock:
+        request.getfixturevalue("patch_builtins_input")
+
+    motors_bus = make_motors_bus(motor_type, mock=mock)

    # Test reading and writting before connecting raises an error
    with pytest.raises(RobotDeviceNotConnectedError):
@@ -80,7 +88,7 @@ def test_motors_bus(request, robot_type):
    del motors_bus

    # Test connecting
-    motors_bus = make_motors_bus(robot_type)
+    motors_bus = make_motors_bus(motor_type, mock=mock)
    motors_bus.connect()

    # Test connecting twice raises an error
--- a/tests/test_policies.py
+++ b/tests/test_policies.py
@@ -367,8 +367,7 @@ def test_normalize(insert_temporal_dim):
        ),
        ("aloha", "act", ["policy.n_action_steps=10"], ""),
        ("aloha", "act", ["policy.n_action_steps=1000", "policy.chunk_size=1000"], "_1000_steps"),
-        ("dora_aloha_real", "act_real", ["policy.n_action_steps=10"], ""),
-        ("dora_aloha_real", "act_real_no_state", ["policy.n_action_steps=10"], ""),
+        ("dora_aloha_real", "act_aloha_real", ["policy.n_action_steps=10"], ""),
    ],
 )
 # As artifacts have been generated on an x86_64 kernel, this test won't
--- a/tests/test_robots.py
+++ b/tests/test_robots.py
@@ -1,10 +1,26 @@
 """
-Tests meant to be used locally and launched manually.
+Tests for physical robots and their mocked versions.
+If the physical robots are not connected to the computer, or not working,
+the test will be skipped.

-Example usage:
+Example of running a specific test:
 ```bash
 pytest -sx tests/test_robots.py::test_robot
 ```
+
+Example of running test on real robots connected to the computer:
+```bash
+pytest -sx 'tests/test_robots.py::test_robot[koch-False]'
+pytest -sx 'tests/test_robots.py::test_robot[koch_bimanual-False]'
+pytest -sx 'tests/test_robots.py::test_robot[aloha-False]'
+```
+
+Example of running test on a mocked version of robots:
+```bash
+pytest -sx 'tests/test_robots.py::test_robot[koch-True]'
+pytest -sx 'tests/test_robots.py::test_robot[koch_bimanual-True]'
+pytest -sx 'tests/test_robots.py::test_robot[aloha-True]'
+```
 """

 from pathlib import Path
@@ -12,41 +28,42 @@ from pathlib import Path
 import pytest
 import torch

-from lerobot import available_robots
-from lerobot.common.robot_devices.robots.factory import make_robot as make_robot_from_cfg
-from lerobot.common.robot_devices.robots.utils import Robot
+from lerobot.common.robot_devices.robots.manipulator import ManipulatorRobot
 from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
-from lerobot.common.utils.utils import init_hydra_config
-from tests.utils import ROBOT_CONFIG_PATH_TEMPLATE, require_robot
+from tests.utils import TEST_ROBOT_TYPES, make_robot, require_robot


-def make_robot(robot_type: str, overrides: list[str] | None = None) -> Robot:
-    config_path = ROBOT_CONFIG_PATH_TEMPLATE.format(robot=robot_type)
-    robot_cfg = init_hydra_config(config_path, overrides)
-    robot = make_robot_from_cfg(robot_cfg)
-    return robot
-
-
-@pytest.mark.parametrize("robot_type", available_robots)
+@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_robot(tmpdir, request, robot_type):
+def test_robot(tmpdir, request, robot_type, mock):
    # TODO(rcadene): measure fps in nightly?
    # TODO(rcadene): test logs
    # TODO(rcadene): add compatibility with other robots
-    from lerobot.common.robot_devices.robots.manipulator import ManipulatorRobot

-    # Save calibration preset
-    tmpdir = Path(tmpdir)
-    calibration_dir = tmpdir / robot_type
+    robot_kwargs = {"robot_type": robot_type}
+
+    if robot_type == "aloha" and mock:
+        # To simplify unit test, we do not rerun manual calibration for Aloha mock=True.
+        # Instead, we use the files from '.cache/calibration/aloha_default'
+        overrides_calibration_dir = None
+    else:
+        if mock:
+            request.getfixturevalue("patch_builtins_input")
+
+        # Create an empty calibration directory to trigger manual calibration
+        tmpdir = Path(tmpdir)
+        calibration_dir = tmpdir / robot_type
+        overrides_calibration_dir = [f"calibration_dir={calibration_dir}"]
+        robot_kwargs["calibration_dir"] = calibration_dir

    # Test connecting without devices raises an error
-    robot = ManipulatorRobot()
+    robot = ManipulatorRobot(**robot_kwargs)
    with pytest.raises(ValueError):
        robot.connect()
    del robot

    # Test using robot before connecting raises an error
-    robot = ManipulatorRobot()
+    robot = ManipulatorRobot(**robot_kwargs)
    with pytest.raises(RobotDeviceNotConnectedError):
        robot.teleop_step()
    with pytest.raises(RobotDeviceNotConnectedError):
@@ -61,21 +78,23 @@ def test_robot(tmpdir, request, robot_type):
    # Test deleting the object without connecting first
    del robot

-    # Test connecting
-    robot = make_robot(robot_type, overrides=[f"calibration_dir={calibration_dir}"])
-    robot.connect()  # run the manual calibration precedure
+    # Test connecting (triggers manual calibration)
+    robot = make_robot(robot_type, overrides=overrides_calibration_dir, mock=mock)
+    robot.connect()
    assert robot.is_connected

    # Test connecting twice raises an error
    with pytest.raises(RobotDeviceAlreadyConnectedError):
        robot.connect()

-    # Test disconnecting with `__del__`
-    del robot
+    # TODO(rcadene, aliberts): Test disconnecting with `__del__` instead of `disconnect`
+    # del robot
+    robot.disconnect()

    # Test teleop can run
-    robot = make_robot(robot_type, overrides=[f"calibration_dir={calibration_dir}"])
-    robot.calibration_dir = calibration_dir
+    robot = make_robot(robot_type, overrides=overrides_calibration_dir, mock=mock)
+    if overrides_calibration_dir is not None:
+        robot.calibration_dir = calibration_dir
    robot.connect()
    robot.teleop_step()

@@ -108,6 +127,7 @@ def test_robot(tmpdir, request, robot_type):
            # TODO(rcadene): skipping image for now as it's challenging to assess equality between two consecutive frames
            continue
        assert torch.allclose(captured_observation[name], observation[name], atol=1)
+        assert captured_observation[name].shape == observation[name].shape

    # Test send_action can run
    robot.send_action(action["action"])
@@ -121,4 +141,3 @@ def test_robot(tmpdir, request, robot_type):
        assert not robot.leader_arms[name].is_connected
    for name in robot.cameras:
        assert not robot.cameras[name].is_connected
-    del robot
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -13,13 +13,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import platform
+from copy import copy
 from functools import wraps

 import pytest
 import torch

+from lerobot import available_cameras, available_motors, available_robots
+from lerobot.common.robot_devices.cameras.utils import Camera
+from lerobot.common.robot_devices.motors.utils import MotorsBus
+from lerobot.common.robot_devices.robots.factory import make_robot as make_robot_from_cfg
+from lerobot.common.robot_devices.robots.utils import Robot
 from lerobot.common.utils.import_utils import is_package_available
+from lerobot.common.utils.utils import init_hydra_config

 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

@@ -28,6 +36,32 @@ DEFAULT_CONFIG_PATH = "lerobot/configs/default.yaml"

 ROBOT_CONFIG_PATH_TEMPLATE = "lerobot/configs/robot/{robot}.yaml"

+TEST_ROBOT_TYPES = []
+for robot_type in available_robots:
+    TEST_ROBOT_TYPES += [(robot_type, True), (robot_type, False)]
+
+TEST_CAMERA_TYPES = []
+for camera_type in available_cameras:
+    TEST_CAMERA_TYPES += [(camera_type, True), (camera_type, False)]
+
+TEST_MOTOR_TYPES = []
+for motor_type in available_motors:
+    TEST_MOTOR_TYPES += [(motor_type, True), (motor_type, False)]
+
+# Camera indices used for connecting physical cameras
+OPENCV_CAMERA_INDEX = int(os.environ.get("LEROBOT_TEST_OPENCV_CAMERA_INDEX", 0))
+INTELREALSENSE_CAMERA_INDEX = int(os.environ.get("LEROBOT_TEST_INTELREALSENSE_CAMERA_INDEX", 128422271614))
+
+DYNAMIXEL_PORT = "/dev/tty.usbmodem575E0032081"
+DYNAMIXEL_MOTORS = {
+    "shoulder_pan": [1, "xl430-w250"],
+    "shoulder_lift": [2, "xl430-w250"],
+    "elbow_flex": [3, "xl330-m288"],
+    "wrist_flex": [4, "xl330-m288"],
+    "wrist_roll": [5, "xl330-m288"],
+    "gripper": [6, "xl330-m288"],
+}
+

 def require_x86_64_kernel(func):
    """
@@ -173,13 +207,136 @@ def require_robot(func):
        # Access the pytest request context to get the is_robot_available fixture
        request = kwargs.get("request")
        robot_type = kwargs.get("robot_type")
+        mock = kwargs.get("mock")

+        if robot_type is None:
+            raise ValueError("The 'robot_type' must be an argument of the test function.")
        if request is None:
-            raise ValueError("The 'request' fixture must be passed to the test function as a parameter.")
+            raise ValueError("The 'request' fixture must be an argument of the test function.")
+        if mock is None:
+            raise ValueError("The 'mock' variable must be an argument of the test function.")

-        # The function `is_robot_available` is defined in `tests/conftest.py`
-        if not request.getfixturevalue("is_robot_available"):
+        # Run test with a real robot. Skip test if robot connection fails.
+        if not mock and not request.getfixturevalue("is_robot_available"):
            pytest.skip(f"A {robot_type} robot is not available.")
+
        return func(*args, **kwargs)

    return wrapper
+
+
+def require_camera(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        # Access the pytest request context to get the is_camera_available fixture
+        request = kwargs.get("request")
+        camera_type = kwargs.get("camera_type")
+        mock = kwargs.get("mock")
+
+        if request is None:
+            raise ValueError("The 'request' fixture must be an argument of the test function.")
+        if camera_type is None:
+            raise ValueError("The 'camera_type' must be an argument of the test function.")
+        if mock is None:
+            raise ValueError("The 'mock' variable must be an argument of the test function.")
+
+        if not mock and not request.getfixturevalue("is_camera_available"):
+            pytest.skip(f"A {camera_type} camera is not available.")
+
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+def require_motor(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        # Access the pytest request context to get the is_motor_available fixture
+        request = kwargs.get("request")
+        motor_type = kwargs.get("motor_type")
+        mock = kwargs.get("mock")
+
+        if request is None:
+            raise ValueError("The 'request' fixture must be an argument of the test function.")
+        if motor_type is None:
+            raise ValueError("The 'motor_type' must be an argument of the test function.")
+        if mock is None:
+            raise ValueError("The 'mock' variable must be an argument of the test function.")
+
+        if not mock and not request.getfixturevalue("is_motor_available"):
+            pytest.skip(f"A {motor_type} motor is not available.")
+
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+def make_robot(robot_type: str, overrides: list[str] | None = None, mock=False) -> Robot:
+    if mock:
+        overrides = [] if overrides is None else copy(overrides)
+
+        # Explicitely add mock argument to the cameras and set it to true
+        # TODO(rcadene, aliberts): redesign when we drop hydra
+        if robot_type == "koch":
+            overrides.append("+leader_arms.main.mock=true")
+            overrides.append("+follower_arms.main.mock=true")
+            if "~cameras" not in overrides:
+                overrides.append("+cameras.laptop.mock=true")
+                overrides.append("+cameras.phone.mock=true")
+
+        elif robot_type == "koch_bimanual":
+            overrides.append("+leader_arms.left.mock=true")
+            overrides.append("+leader_arms.right.mock=true")
+            overrides.append("+follower_arms.left.mock=true")
+            overrides.append("+follower_arms.right.mock=true")
+            if "~cameras" not in overrides:
+                overrides.append("+cameras.laptop.mock=true")
+                overrides.append("+cameras.phone.mock=true")
+
+        elif robot_type == "aloha":
+            overrides.append("+leader_arms.left.mock=true")
+            overrides.append("+leader_arms.right.mock=true")
+            overrides.append("+follower_arms.left.mock=true")
+            overrides.append("+follower_arms.right.mock=true")
+            if "~cameras" not in overrides:
+                overrides.append("+cameras.cam_high.mock=true")
+                overrides.append("+cameras.cam_low.mock=true")
+                overrides.append("+cameras.cam_left_wrist.mock=true")
+                overrides.append("+cameras.cam_right_wrist.mock=true")
+
+        else:
+            raise NotImplementedError(robot_type)
+
+    config_path = ROBOT_CONFIG_PATH_TEMPLATE.format(robot=robot_type)
+    robot_cfg = init_hydra_config(config_path, overrides)
+    robot = make_robot_from_cfg(robot_cfg)
+    return robot
+
+
+def make_camera(camera_type, **kwargs) -> Camera:
+    if camera_type == "opencv":
+        from lerobot.common.robot_devices.cameras.opencv import OpenCVCamera
+
+        camera_index = kwargs.pop("camera_index", OPENCV_CAMERA_INDEX)
+        return OpenCVCamera(camera_index, **kwargs)
+
+    elif camera_type == "intelrealsense":
+        from lerobot.common.robot_devices.cameras.intelrealsense import IntelRealSenseCamera
+
+        camera_index = kwargs.pop("camera_index", INTELREALSENSE_CAMERA_INDEX)
+        return IntelRealSenseCamera(camera_index, **kwargs)
+
+    else:
+        raise ValueError(f"The camera type '{camera_type}' is not valid.")
+
+
+def make_motors_bus(motor_type: str, **kwargs) -> MotorsBus:
+    if motor_type == "dynamixel":
+        from lerobot.common.robot_devices.motors.dynamixel import DynamixelMotorsBus
+
+        port = kwargs.pop("port", DYNAMIXEL_PORT)
+        motors = kwargs.pop("motors", DYNAMIXEL_MOTORS)
+        return DynamixelMotorsBus(port, motors, **kwargs)
+
+    else:
+        raise ValueError(f"The motor type '{motor_type}' is not valid.")