LeRobotDataset v2.1 (#711)

Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Remi Cadene <re.cadene@gmail.com>
2025-02-25 15:27:29 +01:00
parent aca464ca72
commit 3354d919fc
43 changed files with 2023 additions and 1322 deletions
--- a/tests/fixtures/constants.py
+++ b/tests/fixtures/constants.py
@@ -1,6 +1,6 @@
-from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME
+from lerobot.common.constants import HF_LEROBOT_HOME

-LEROBOT_TEST_DIR = LEROBOT_HOME / "_testing"
+LEROBOT_TEST_DIR = HF_LEROBOT_HOME / "_testing"
 DUMMY_REPO_ID = "dummy/repo"
 DUMMY_ROBOT_TYPE = "dummy_robot"
 DUMMY_MOTOR_FEATURES = {
@@ -27,3 +27,5 @@ DUMMY_VIDEO_INFO = {
    "video.is_depth_map": False,
    "has_audio": False,
 }
+DUMMY_CHW = (3, 96, 128)
+DUMMY_HWC = (96, 128, 3)
--- a/tests/fixtures/dataset_factories.py
+++ b/tests/fixtures/dataset_factories.py
@@ -1,5 +1,7 @@
 import random
+from functools import partial
 from pathlib import Path
+from typing import Protocol
 from unittest.mock import patch

 import datasets
@@ -27,8 +29,12 @@ from tests.fixtures.constants import (
 )


+class LeRobotDatasetFactory(Protocol):
+    def __call__(self, *args, **kwargs) -> LeRobotDataset: ...
+
+
 def get_task_index(task_dicts: dict, task: str) -> int:
-    tasks = {d["task_index"]: d["task"] for d in task_dicts}
+    tasks = {d["task_index"]: d["task"] for d in task_dicts.values()}
    task_to_task_index = {task: task_idx for task_idx, task in tasks.items()}
    return task_to_task_index[task]

@@ -141,6 +147,7 @@ def stats_factory():
                    "mean": np.full((3, 1, 1), 0.5, dtype=np.float32).tolist(),
                    "min": np.full((3, 1, 1), 0, dtype=np.float32).tolist(),
                    "std": np.full((3, 1, 1), 0.25, dtype=np.float32).tolist(),
+                    "count": [10],
                }
            else:
                stats[key] = {
@@ -148,20 +155,38 @@ def stats_factory():
                    "mean": np.full(shape, 0.5, dtype=dtype).tolist(),
                    "min": np.full(shape, 0, dtype=dtype).tolist(),
                    "std": np.full(shape, 0.25, dtype=dtype).tolist(),
+                    "count": [10],
                }
        return stats

    return _create_stats


+@pytest.fixture(scope="session")
+def episodes_stats_factory(stats_factory):
+    def _create_episodes_stats(
+        features: dict[str],
+        total_episodes: int = 3,
+    ) -> dict:
+        episodes_stats = {}
+        for episode_index in range(total_episodes):
+            episodes_stats[episode_index] = {
+                "episode_index": episode_index,
+                "stats": stats_factory(features),
+            }
+        return episodes_stats
+
+    return _create_episodes_stats
+
+
@pytest.fixture(scope="session")
 def tasks_factory():
    def _create_tasks(total_tasks: int = 3) -> int:
-        tasks_list = []
-        for i in range(total_tasks):
-            task_dict = {"task_index": i, "task": f"Perform action {i}."}
-            tasks_list.append(task_dict)
-        return tasks_list
+        tasks = {}
+        for task_index in range(total_tasks):
+            task_dict = {"task_index": task_index, "task": f"Perform action {task_index}."}
+            tasks[task_index] = task_dict
+        return tasks

    return _create_tasks

@@ -190,10 +215,10 @@ def episodes_factory(tasks_factory):
        # Generate random lengths that sum up to total_length
        lengths = np.random.multinomial(total_frames, [1 / total_episodes] * total_episodes).tolist()

-        tasks_list = [task_dict["task"] for task_dict in tasks]
+        tasks_list = [task_dict["task"] for task_dict in tasks.values()]
        num_tasks_available = len(tasks_list)

-        episodes_list = []
+        episodes = {}
        remaining_tasks = tasks_list.copy()
        for ep_idx in range(total_episodes):
            num_tasks_in_episode = random.randint(1, min(3, num_tasks_available)) if multi_task else 1
@@ -203,15 +228,13 @@ def episodes_factory(tasks_factory):
                for task in episode_tasks:
                    remaining_tasks.remove(task)

-            episodes_list.append(
-                {
-                    "episode_index": ep_idx,
-                    "tasks": episode_tasks,
-                    "length": lengths[ep_idx],
-                }
-            )
+            episodes[ep_idx] = {
+                "episode_index": ep_idx,
+                "tasks": episode_tasks,
+                "length": lengths[ep_idx],
+            }

-        return episodes_list
+        return episodes

    return _create_episodes

@@ -235,7 +258,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar
        frame_index_col = np.array([], dtype=np.int64)
        episode_index_col = np.array([], dtype=np.int64)
        task_index = np.array([], dtype=np.int64)
-        for ep_dict in episodes:
+        for ep_dict in episodes.values():
            timestamp_col = np.concatenate((timestamp_col, np.arange(ep_dict["length"]) / fps))
            frame_index_col = np.concatenate((frame_index_col, np.arange(ep_dict["length"], dtype=int)))
            episode_index_col = np.concatenate(
@@ -278,6 +301,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar
 def lerobot_dataset_metadata_factory(
    info_factory,
    stats_factory,
+    episodes_stats_factory,
    tasks_factory,
    episodes_factory,
    mock_snapshot_download_factory,
@@ -287,14 +311,18 @@ def lerobot_dataset_metadata_factory(
        repo_id: str = DUMMY_REPO_ID,
        info: dict | None = None,
        stats: dict | None = None,
+        episodes_stats: list[dict] | None = None,
        tasks: list[dict] | None = None,
        episodes: list[dict] | None = None,
-        local_files_only: bool = False,
    ) -> LeRobotDatasetMetadata:
        if not info:
            info = info_factory()
        if not stats:
            stats = stats_factory(features=info["features"])
+        if not episodes_stats:
+            episodes_stats = episodes_stats_factory(
+                features=info["features"], total_episodes=info["total_episodes"]
+            )
        if not tasks:
            tasks = tasks_factory(total_tasks=info["total_tasks"])
        if not episodes:
@@ -305,21 +333,20 @@ def lerobot_dataset_metadata_factory(
        mock_snapshot_download = mock_snapshot_download_factory(
            info=info,
            stats=stats,
+            episodes_stats=episodes_stats,
            tasks=tasks,
            episodes=episodes,
        )
        with (
-            patch(
-                "lerobot.common.datasets.lerobot_dataset.get_hub_safe_version"
-            ) as mock_get_hub_safe_version_patch,
+            patch("lerobot.common.datasets.lerobot_dataset.get_safe_version") as mock_get_safe_version_patch,
            patch(
                "lerobot.common.datasets.lerobot_dataset.snapshot_download"
            ) as mock_snapshot_download_patch,
        ):
-            mock_get_hub_safe_version_patch.side_effect = lambda repo_id, version: version
+            mock_get_safe_version_patch.side_effect = lambda repo_id, version: version
            mock_snapshot_download_patch.side_effect = mock_snapshot_download

-            return LeRobotDatasetMetadata(repo_id=repo_id, root=root, local_files_only=local_files_only)
+            return LeRobotDatasetMetadata(repo_id=repo_id, root=root)

    return _create_lerobot_dataset_metadata

@@ -328,12 +355,13 @@ def lerobot_dataset_metadata_factory(
 def lerobot_dataset_factory(
    info_factory,
    stats_factory,
+    episodes_stats_factory,
    tasks_factory,
    episodes_factory,
    hf_dataset_factory,
    mock_snapshot_download_factory,
    lerobot_dataset_metadata_factory,
-):
+) -> LeRobotDatasetFactory:
    def _create_lerobot_dataset(
        root: Path,
        repo_id: str = DUMMY_REPO_ID,
@@ -343,6 +371,7 @@ def lerobot_dataset_factory(
        multi_task: bool = False,
        info: dict | None = None,
        stats: dict | None = None,
+        episodes_stats: list[dict] | None = None,
        tasks: list[dict] | None = None,
        episode_dicts: list[dict] | None = None,
        hf_dataset: datasets.Dataset | None = None,
@@ -354,6 +383,8 @@ def lerobot_dataset_factory(
            )
        if not stats:
            stats = stats_factory(features=info["features"])
+        if not episodes_stats:
+            episodes_stats = episodes_stats_factory(features=info["features"], total_episodes=total_episodes)
        if not tasks:
            tasks = tasks_factory(total_tasks=info["total_tasks"])
        if not episode_dicts:
@@ -369,6 +400,7 @@ def lerobot_dataset_factory(
        mock_snapshot_download = mock_snapshot_download_factory(
            info=info,
            stats=stats,
+            episodes_stats=episodes_stats,
            tasks=tasks,
            episodes=episode_dicts,
            hf_dataset=hf_dataset,
@@ -378,19 +410,26 @@ def lerobot_dataset_factory(
            repo_id=repo_id,
            info=info,
            stats=stats,
+            episodes_stats=episodes_stats,
            tasks=tasks,
            episodes=episode_dicts,
-            local_files_only=kwargs.get("local_files_only", False),
        )
        with (
            patch("lerobot.common.datasets.lerobot_dataset.LeRobotDatasetMetadata") as mock_metadata_patch,
+            patch("lerobot.common.datasets.lerobot_dataset.get_safe_version") as mock_get_safe_version_patch,
            patch(
                "lerobot.common.datasets.lerobot_dataset.snapshot_download"
            ) as mock_snapshot_download_patch,
        ):
            mock_metadata_patch.return_value = mock_metadata
+            mock_get_safe_version_patch.side_effect = lambda repo_id, version: version
            mock_snapshot_download_patch.side_effect = mock_snapshot_download

            return LeRobotDataset(repo_id=repo_id, root=root, **kwargs)

    return _create_lerobot_dataset
+
+
+@pytest.fixture(scope="session")
+def empty_lerobot_dataset_factory() -> LeRobotDatasetFactory:
+    return partial(LeRobotDataset.create, repo_id=DUMMY_REPO_ID, fps=DEFAULT_FPS)
--- a/tests/fixtures/files.py
+++ b/tests/fixtures/files.py
@@ -7,7 +7,13 @@ import pyarrow.compute as pc
 import pyarrow.parquet as pq
 import pytest

-from lerobot.common.datasets.utils import EPISODES_PATH, INFO_PATH, STATS_PATH, TASKS_PATH
+from lerobot.common.datasets.utils import (
+    EPISODES_PATH,
+    EPISODES_STATS_PATH,
+    INFO_PATH,
+    STATS_PATH,
+    TASKS_PATH,
+)


@pytest.fixture(scope="session")
@@ -38,6 +44,20 @@ def stats_path(stats_factory):
    return _create_stats_json_file


+@pytest.fixture(scope="session")
+def episodes_stats_path(episodes_stats_factory):
+    def _create_episodes_stats_jsonl_file(dir: Path, episodes_stats: list[dict] | None = None) -> Path:
+        if not episodes_stats:
+            episodes_stats = episodes_stats_factory()
+        fpath = dir / EPISODES_STATS_PATH
+        fpath.parent.mkdir(parents=True, exist_ok=True)
+        with jsonlines.open(fpath, "w") as writer:
+            writer.write_all(episodes_stats.values())
+        return fpath
+
+    return _create_episodes_stats_jsonl_file
+
+
@pytest.fixture(scope="session")
 def tasks_path(tasks_factory):
    def _create_tasks_jsonl_file(dir: Path, tasks: list | None = None) -> Path:
@@ -46,7 +66,7 @@ def tasks_path(tasks_factory):
        fpath = dir / TASKS_PATH
        fpath.parent.mkdir(parents=True, exist_ok=True)
        with jsonlines.open(fpath, "w") as writer:
-            writer.write_all(tasks)
+            writer.write_all(tasks.values())
        return fpath

    return _create_tasks_jsonl_file
@@ -60,7 +80,7 @@ def episode_path(episodes_factory):
        fpath = dir / EPISODES_PATH
        fpath.parent.mkdir(parents=True, exist_ok=True)
        with jsonlines.open(fpath, "w") as writer:
-            writer.write_all(episodes)
+            writer.write_all(episodes.values())
        return fpath

    return _create_episodes_jsonl_file
--- a/tests/fixtures/hub.py
+++ b/tests/fixtures/hub.py
@@ -4,7 +4,13 @@ import datasets
 import pytest
 from huggingface_hub.utils import filter_repo_objects

-from lerobot.common.datasets.utils import EPISODES_PATH, INFO_PATH, STATS_PATH, TASKS_PATH
+from lerobot.common.datasets.utils import (
+    EPISODES_PATH,
+    EPISODES_STATS_PATH,
+    INFO_PATH,
+    STATS_PATH,
+    TASKS_PATH,
+)
 from tests.fixtures.constants import LEROBOT_TEST_DIR


@@ -14,6 +20,8 @@ def mock_snapshot_download_factory(
    info_path,
    stats_factory,
    stats_path,
+    episodes_stats_factory,
+    episodes_stats_path,
    tasks_factory,
    tasks_path,
    episodes_factory,
@@ -29,6 +37,7 @@ def mock_snapshot_download_factory(
    def _mock_snapshot_download_func(
        info: dict | None = None,
        stats: dict | None = None,
+        episodes_stats: list[dict] | None = None,
        tasks: list[dict] | None = None,
        episodes: list[dict] | None = None,
        hf_dataset: datasets.Dataset | None = None,
@@ -37,6 +46,10 @@ def mock_snapshot_download_factory(
            info = info_factory()
        if not stats:
            stats = stats_factory(features=info["features"])
+        if not episodes_stats:
+            episodes_stats = episodes_stats_factory(
+                features=info["features"], total_episodes=info["total_episodes"]
+            )
        if not tasks:
            tasks = tasks_factory(total_tasks=info["total_tasks"])
        if not episodes:
@@ -67,11 +80,11 @@ def mock_snapshot_download_factory(

            # List all possible files
            all_files = []
-            meta_files = [INFO_PATH, STATS_PATH, TASKS_PATH, EPISODES_PATH]
+            meta_files = [INFO_PATH, STATS_PATH, EPISODES_STATS_PATH, TASKS_PATH, EPISODES_PATH]
            all_files.extend(meta_files)

            data_files = []
-            for episode_dict in episodes:
+            for episode_dict in episodes.values():
                ep_idx = episode_dict["episode_index"]
                ep_chunk = ep_idx // info["chunks_size"]
                data_path = info["data_path"].format(episode_chunk=ep_chunk, episode_index=ep_idx)
@@ -92,6 +105,8 @@ def mock_snapshot_download_factory(
                    _ = info_path(local_dir, info)
                elif rel_path == STATS_PATH:
                    _ = stats_path(local_dir, stats)
+                elif rel_path == EPISODES_STATS_PATH:
+                    _ = episodes_stats_path(local_dir, episodes_stats)
                elif rel_path == TASKS_PATH:
                    _ = tasks_path(local_dir, tasks)
                elif rel_path == EPISODES_PATH:
--- a/tests/test_cameras.py
+++ b/tests/test_cameras.py
@@ -182,7 +182,7 @@ def test_camera(request, camera_type, mock):

@pytest.mark.parametrize("camera_type, mock", TEST_CAMERA_TYPES)
@require_camera
-def test_save_images_from_cameras(tmpdir, request, camera_type, mock):
+def test_save_images_from_cameras(tmp_path, request, camera_type, mock):
    # TODO(rcadene): refactor
    if camera_type == "opencv":
        from lerobot.common.robot_devices.cameras.opencv import save_images_from_cameras
@@ -190,4 +190,4 @@ def test_save_images_from_cameras(tmpdir, request, camera_type, mock):
        from lerobot.common.robot_devices.cameras.intelrealsense import save_images_from_cameras

    # Small `record_time_s` to speedup unit tests
-    save_images_from_cameras(tmpdir, record_time_s=0.02, mock=mock)
+    save_images_from_cameras(tmp_path, record_time_s=0.02, mock=mock)
--- a/tests/test_compute_stats.py
+++ b/tests/test_compute_stats.py
@@ -0,0 +1,311 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+
+from lerobot.common.datasets.compute_stats import (
+    _assert_type_and_shape,
+    aggregate_feature_stats,
+    aggregate_stats,
+    compute_episode_stats,
+    estimate_num_samples,
+    get_feature_stats,
+    sample_images,
+    sample_indices,
+)
+
+
+def mock_load_image_as_numpy(path, dtype, channel_first):
+    return np.ones((3, 32, 32), dtype=dtype) if channel_first else np.ones((32, 32, 3), dtype=dtype)
+
+
+@pytest.fixture
+def sample_array():
+    return np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+
+
+def test_estimate_num_samples():
+    assert estimate_num_samples(1) == 1
+    assert estimate_num_samples(10) == 10
+    assert estimate_num_samples(100) == 100
+    assert estimate_num_samples(200) == 100
+    assert estimate_num_samples(1000) == 177
+    assert estimate_num_samples(2000) == 299
+    assert estimate_num_samples(5000) == 594
+    assert estimate_num_samples(10_000) == 1000
+    assert estimate_num_samples(20_000) == 1681
+    assert estimate_num_samples(50_000) == 3343
+    assert estimate_num_samples(500_000) == 10_000
+
+
+def test_sample_indices():
+    indices = sample_indices(10)
+    assert len(indices) > 0
+    assert indices[0] == 0
+    assert indices[-1] == 9
+    assert len(indices) == estimate_num_samples(10)
+
+
+@patch("lerobot.common.datasets.compute_stats.load_image_as_numpy", side_effect=mock_load_image_as_numpy)
+def test_sample_images(mock_load):
+    image_paths = [f"image_{i}.jpg" for i in range(100)]
+    images = sample_images(image_paths)
+    assert isinstance(images, np.ndarray)
+    assert images.shape[1:] == (3, 32, 32)
+    assert images.dtype == np.uint8
+    assert len(images) == estimate_num_samples(100)
+
+
+def test_get_feature_stats_images():
+    data = np.random.rand(100, 3, 32, 32)
+    stats = get_feature_stats(data, axis=(0, 2, 3), keepdims=True)
+    assert "min" in stats and "max" in stats and "mean" in stats and "std" in stats and "count" in stats
+    np.testing.assert_equal(stats["count"], np.array([100]))
+    assert stats["min"].shape == stats["max"].shape == stats["mean"].shape == stats["std"].shape
+
+
+def test_get_feature_stats_axis_0_keepdims(sample_array):
+    expected = {
+        "min": np.array([[1, 2, 3]]),
+        "max": np.array([[7, 8, 9]]),
+        "mean": np.array([[4.0, 5.0, 6.0]]),
+        "std": np.array([[2.44948974, 2.44948974, 2.44948974]]),
+        "count": np.array([3]),
+    }
+    result = get_feature_stats(sample_array, axis=(0,), keepdims=True)
+    for key in expected:
+        np.testing.assert_allclose(result[key], expected[key])
+
+
+def test_get_feature_stats_axis_1(sample_array):
+    expected = {
+        "min": np.array([1, 4, 7]),
+        "max": np.array([3, 6, 9]),
+        "mean": np.array([2.0, 5.0, 8.0]),
+        "std": np.array([0.81649658, 0.81649658, 0.81649658]),
+        "count": np.array([3]),
+    }
+    result = get_feature_stats(sample_array, axis=(1,), keepdims=False)
+    for key in expected:
+        np.testing.assert_allclose(result[key], expected[key])
+
+
+def test_get_feature_stats_no_axis(sample_array):
+    expected = {
+        "min": np.array(1),
+        "max": np.array(9),
+        "mean": np.array(5.0),
+        "std": np.array(2.5819889),
+        "count": np.array([3]),
+    }
+    result = get_feature_stats(sample_array, axis=None, keepdims=False)
+    for key in expected:
+        np.testing.assert_allclose(result[key], expected[key])
+
+
+def test_get_feature_stats_empty_array():
+    array = np.array([])
+    with pytest.raises(ValueError):
+        get_feature_stats(array, axis=(0,), keepdims=True)
+
+
+def test_get_feature_stats_single_value():
+    array = np.array([[1337]])
+    result = get_feature_stats(array, axis=None, keepdims=True)
+    np.testing.assert_equal(result["min"], np.array(1337))
+    np.testing.assert_equal(result["max"], np.array(1337))
+    np.testing.assert_equal(result["mean"], np.array(1337.0))
+    np.testing.assert_equal(result["std"], np.array(0.0))
+    np.testing.assert_equal(result["count"], np.array([1]))
+
+
+def test_compute_episode_stats():
+    episode_data = {
+        "observation.image": [f"image_{i}.jpg" for i in range(100)],
+        "observation.state": np.random.rand(100, 10),
+    }
+    features = {
+        "observation.image": {"dtype": "image"},
+        "observation.state": {"dtype": "numeric"},
+    }
+
+    with patch(
+        "lerobot.common.datasets.compute_stats.load_image_as_numpy", side_effect=mock_load_image_as_numpy
+    ):
+        stats = compute_episode_stats(episode_data, features)
+
+    assert "observation.image" in stats and "observation.state" in stats
+    assert stats["observation.image"]["count"].item() == 100
+    assert stats["observation.state"]["count"].item() == 100
+    assert stats["observation.image"]["mean"].shape == (3, 1, 1)
+
+
+def test_assert_type_and_shape_valid():
+    valid_stats = [
+        {
+            "feature1": {
+                "min": np.array([1.0]),
+                "max": np.array([10.0]),
+                "mean": np.array([5.0]),
+                "std": np.array([2.0]),
+                "count": np.array([1]),
+            }
+        }
+    ]
+    _assert_type_and_shape(valid_stats)
+
+
+def test_assert_type_and_shape_invalid_type():
+    invalid_stats = [
+        {
+            "feature1": {
+                "min": [1.0],  # Not a numpy array
+                "max": np.array([10.0]),
+                "mean": np.array([5.0]),
+                "std": np.array([2.0]),
+                "count": np.array([1]),
+            }
+        }
+    ]
+    with pytest.raises(ValueError, match="Stats must be composed of numpy array"):
+        _assert_type_and_shape(invalid_stats)
+
+
+def test_assert_type_and_shape_invalid_shape():
+    invalid_stats = [
+        {
+            "feature1": {
+                "count": np.array([1, 2]),  # Wrong shape
+            }
+        }
+    ]
+    with pytest.raises(ValueError, match=r"Shape of 'count' must be \(1\)"):
+        _assert_type_and_shape(invalid_stats)
+
+
+def test_aggregate_feature_stats():
+    stats_ft_list = [
+        {
+            "min": np.array([1.0]),
+            "max": np.array([10.0]),
+            "mean": np.array([5.0]),
+            "std": np.array([2.0]),
+            "count": np.array([1]),
+        },
+        {
+            "min": np.array([2.0]),
+            "max": np.array([12.0]),
+            "mean": np.array([6.0]),
+            "std": np.array([2.5]),
+            "count": np.array([1]),
+        },
+    ]
+    result = aggregate_feature_stats(stats_ft_list)
+    np.testing.assert_allclose(result["min"], np.array([1.0]))
+    np.testing.assert_allclose(result["max"], np.array([12.0]))
+    np.testing.assert_allclose(result["mean"], np.array([5.5]))
+    np.testing.assert_allclose(result["std"], np.array([2.318405]), atol=1e-6)
+    np.testing.assert_allclose(result["count"], np.array([2]))
+
+
+def test_aggregate_stats():
+    all_stats = [
+        {
+            "observation.image": {
+                "min": [1, 2, 3],
+                "max": [10, 20, 30],
+                "mean": [5.5, 10.5, 15.5],
+                "std": [2.87, 5.87, 8.87],
+                "count": 10,
+            },
+            "observation.state": {"min": 1, "max": 10, "mean": 5.5, "std": 2.87, "count": 10},
+            "extra_key_0": {"min": 5, "max": 25, "mean": 15, "std": 6, "count": 6},
+        },
+        {
+            "observation.image": {
+                "min": [2, 1, 0],
+                "max": [15, 10, 5],
+                "mean": [8.5, 5.5, 2.5],
+                "std": [3.42, 2.42, 1.42],
+                "count": 15,
+            },
+            "observation.state": {"min": 2, "max": 15, "mean": 8.5, "std": 3.42, "count": 15},
+            "extra_key_1": {"min": 0, "max": 20, "mean": 10, "std": 5, "count": 5},
+        },
+    ]
+
+    expected_agg_stats = {
+        "observation.image": {
+            "min": [1, 1, 0],
+            "max": [15, 20, 30],
+            "mean": [7.3, 7.5, 7.7],
+            "std": [3.5317, 4.8267, 8.5581],
+            "count": 25,
+        },
+        "observation.state": {
+            "min": 1,
+            "max": 15,
+            "mean": 7.3,
+            "std": 3.5317,
+            "count": 25,
+        },
+        "extra_key_0": {
+            "min": 5,
+            "max": 25,
+            "mean": 15.0,
+            "std": 6.0,
+            "count": 6,
+        },
+        "extra_key_1": {
+            "min": 0,
+            "max": 20,
+            "mean": 10.0,
+            "std": 5.0,
+            "count": 5,
+        },
+    }
+
+    # cast to numpy
+    for ep_stats in all_stats:
+        for fkey, stats in ep_stats.items():
+            for k in stats:
+                stats[k] = np.array(stats[k], dtype=np.int64 if k == "count" else np.float32)
+                if fkey == "observation.image" and k != "count":
+                    stats[k] = stats[k].reshape(3, 1, 1)  # for normalization on image channels
+                else:
+                    stats[k] = stats[k].reshape(1)
+
+    # cast to numpy
+    for fkey, stats in expected_agg_stats.items():
+        for k in stats:
+            stats[k] = np.array(stats[k], dtype=np.int64 if k == "count" else np.float32)
+            if fkey == "observation.image" and k != "count":
+                stats[k] = stats[k].reshape(3, 1, 1)  # for normalization on image channels
+            else:
+                stats[k] = stats[k].reshape(1)
+
+    results = aggregate_stats(all_stats)
+
+    for fkey in expected_agg_stats:
+        np.testing.assert_allclose(results[fkey]["min"], expected_agg_stats[fkey]["min"])
+        np.testing.assert_allclose(results[fkey]["max"], expected_agg_stats[fkey]["max"])
+        np.testing.assert_allclose(results[fkey]["mean"], expected_agg_stats[fkey]["mean"])
+        np.testing.assert_allclose(
+            results[fkey]["std"], expected_agg_stats[fkey]["std"], atol=1e-04, rtol=1e-04
+        )
+        np.testing.assert_allclose(results[fkey]["count"], expected_agg_stats[fkey]["count"])
--- a/tests/test_control_robot.py
+++ b/tests/test_control_robot.py
@@ -24,7 +24,6 @@ pytest -sx 'tests/test_control_robot.py::test_teleoperate[aloha-True]'
 """

 import multiprocessing
-from pathlib import Path
 from unittest.mock import patch

 import pytest
@@ -45,7 +44,7 @@ from tests.utils import DEVICE, TEST_ROBOT_TYPES, mock_calibration_dir, require_

@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_teleoperate(tmpdir, request, robot_type, mock):
+def test_teleoperate(tmp_path, request, robot_type, mock):
    robot_kwargs = {"robot_type": robot_type, "mock": mock}

    if mock and robot_type != "aloha":
@@ -53,8 +52,7 @@ def test_teleoperate(tmpdir, request, robot_type, mock):

        # Create an empty calibration directory to trigger manual calibration
        # and avoid writing calibration files in user .cache/calibration folder
-        tmpdir = Path(tmpdir)
-        calibration_dir = tmpdir / robot_type
+        calibration_dir = tmp_path / robot_type
        mock_calibration_dir(calibration_dir)
        robot_kwargs["calibration_dir"] = calibration_dir
    else:
@@ -70,15 +68,14 @@ def test_teleoperate(tmpdir, request, robot_type, mock):

@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_calibrate(tmpdir, request, robot_type, mock):
+def test_calibrate(tmp_path, request, robot_type, mock):
    robot_kwargs = {"robot_type": robot_type, "mock": mock}

    if mock:
        request.getfixturevalue("patch_builtins_input")

    # Create an empty calibration directory to trigger manual calibration
-    tmpdir = Path(tmpdir)
-    calibration_dir = tmpdir / robot_type
+    calibration_dir = tmp_path / robot_type
    robot_kwargs["calibration_dir"] = calibration_dir

    robot = make_robot(**robot_kwargs)
@@ -89,7 +86,7 @@ def test_calibrate(tmpdir, request, robot_type, mock):

@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_record_without_cameras(tmpdir, request, robot_type, mock):
+def test_record_without_cameras(tmp_path, request, robot_type, mock):
    robot_kwargs = {"robot_type": robot_type, "mock": mock}

    # Avoid using cameras
@@ -100,7 +97,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):

        # Create an empty calibration directory to trigger manual calibration
        # and avoid writing calibration files in user .cache/calibration folder
-        calibration_dir = Path(tmpdir) / robot_type
+        calibration_dir = tmp_path / robot_type
        mock_calibration_dir(calibration_dir)
        robot_kwargs["calibration_dir"] = calibration_dir
    else:
@@ -108,7 +105,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):
        pass

    repo_id = "lerobot/debug"
-    root = Path(tmpdir) / "data" / repo_id
+    root = tmp_path / "data" / repo_id
    single_task = "Do something."

    robot = make_robot(**robot_kwargs)
@@ -121,7 +118,6 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):
        episode_time_s=1,
        reset_time_s=0.1,
        num_episodes=2,
-        run_compute_stats=False,
        push_to_hub=False,
        video=False,
        play_sounds=False,
@@ -131,8 +127,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):

@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
-    tmpdir = Path(tmpdir)
+def test_record_and_replay_and_policy(tmp_path, request, robot_type, mock):
    robot_kwargs = {"robot_type": robot_type, "mock": mock}

    if mock and robot_type != "aloha":
@@ -140,7 +135,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):

        # Create an empty calibration directory to trigger manual calibration
        # and avoid writing calibration files in user .cache/calibration folder
-        calibration_dir = tmpdir / robot_type
+        calibration_dir = tmp_path / robot_type
        mock_calibration_dir(calibration_dir)
        robot_kwargs["calibration_dir"] = calibration_dir
    else:
@@ -148,7 +143,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
        pass

    repo_id = "lerobot_test/debug"
-    root = tmpdir / "data" / repo_id
+    root = tmp_path / "data" / repo_id
    single_task = "Do something."

    robot = make_robot(**robot_kwargs)
@@ -172,15 +167,13 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
    assert dataset.meta.total_episodes == 2
    assert len(dataset) == 2

-    replay_cfg = ReplayControlConfig(
-        episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False, local_files_only=True
-    )
+    replay_cfg = ReplayControlConfig(episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False)
    replay(robot, replay_cfg)

    policy_cfg = ACTConfig()
    policy = make_policy(policy_cfg, ds_meta=dataset.meta, device=DEVICE)

-    out_dir = tmpdir / "logger"
+    out_dir = tmp_path / "logger"

    pretrained_policy_path = out_dir / "checkpoints/last/pretrained_model"
    policy.save_pretrained(pretrained_policy_path)
@@ -207,7 +200,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
        num_image_writer_processes = 0

    eval_repo_id = "lerobot/eval_debug"
-    eval_root = tmpdir / "data" / eval_repo_id
+    eval_root = tmp_path / "data" / eval_repo_id

    rec_eval_cfg = RecordControlConfig(
        repo_id=eval_repo_id,
@@ -218,7 +211,6 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
        episode_time_s=1,
        reset_time_s=0.1,
        num_episodes=2,
-        run_compute_stats=False,
        push_to_hub=False,
        video=False,
        display_cameras=False,
@@ -240,7 +232,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):

@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
@require_robot
-def test_resume_record(tmpdir, request, robot_type, mock):
+def test_resume_record(tmp_path, request, robot_type, mock):
    robot_kwargs = {"robot_type": robot_type, "mock": mock}

    if mock and robot_type != "aloha":
@@ -248,7 +240,7 @@ def test_resume_record(tmpdir, request, robot_type, mock):

        # Create an empty calibration directory to trigger manual calibration
        # and avoid writing calibration files in user .cache/calibration folder
-        calibration_dir = tmpdir / robot_type
+        calibration_dir = tmp_path / robot_type
        mock_calibration_dir(calibration_dir)
        robot_kwargs["calibration_dir"] = calibration_dir
    else:
@@ -258,7 +250,7 @@ def test_resume_record(tmpdir, request, robot_type, mock):
    robot = make_robot(**robot_kwargs)

    repo_id = "lerobot/debug"
-    root = Path(tmpdir) / "data" / repo_id
+    root = tmp_path / "data" / repo_id
    single_task = "Do something."

    rec_cfg = RecordControlConfig(
@@ -272,8 +264,6 @@ def test_resume_record(tmpdir, request, robot_type, mock):
        video=False,
        display_cameras=False,
        play_sounds=False,
-        run_compute_stats=False,
-        local_files_only=True,
        num_episodes=1,
    )

@@ -291,7 +281,7 @@ def test_resume_record(tmpdir, request, robot_type, mock):

@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
@require_robot
-def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
+def test_record_with_event_rerecord_episode(tmp_path, request, robot_type, mock):
    robot_kwargs = {"robot_type": robot_type, "mock": mock}

    if mock and robot_type != "aloha":
@@ -299,7 +289,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):

        # Create an empty calibration directory to trigger manual calibration
        # and avoid writing calibration files in user .cache/calibration folder
-        calibration_dir = tmpdir / robot_type
+        calibration_dir = tmp_path / robot_type
        mock_calibration_dir(calibration_dir)
        robot_kwargs["calibration_dir"] = calibration_dir
    else:
@@ -316,7 +306,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
        mock_listener.return_value = (None, mock_events)

        repo_id = "lerobot/debug"
-        root = Path(tmpdir) / "data" / repo_id
+        root = tmp_path / "data" / repo_id
        single_task = "Do something."

        rec_cfg = RecordControlConfig(
@@ -331,7 +321,6 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
            video=False,
            display_cameras=False,
            play_sounds=False,
-            run_compute_stats=False,
        )
        dataset = record(robot, rec_cfg)

@@ -342,7 +331,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):

@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
@require_robot
-def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
+def test_record_with_event_exit_early(tmp_path, request, robot_type, mock):
    robot_kwargs = {"robot_type": robot_type, "mock": mock}

    if mock:
@@ -350,7 +339,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):

        # Create an empty calibration directory to trigger manual calibration
        # and avoid writing calibration files in user .cache/calibration folder
-        calibration_dir = tmpdir / robot_type
+        calibration_dir = tmp_path / robot_type
        mock_calibration_dir(calibration_dir)
        robot_kwargs["calibration_dir"] = calibration_dir
    else:
@@ -367,7 +356,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
        mock_listener.return_value = (None, mock_events)

        repo_id = "lerobot/debug"
-        root = Path(tmpdir) / "data" / repo_id
+        root = tmp_path / "data" / repo_id
        single_task = "Do something."

        rec_cfg = RecordControlConfig(
@@ -382,7 +371,6 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
            video=False,
            display_cameras=False,
            play_sounds=False,
-            run_compute_stats=False,
        )

        dataset = record(robot, rec_cfg)
@@ -395,7 +383,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
    "robot_type, mock, num_image_writer_processes", [("koch", True, 0), ("koch", True, 1)]
 )
@require_robot
-def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num_image_writer_processes):
+def test_record_with_event_stop_recording(tmp_path, request, robot_type, mock, num_image_writer_processes):
    robot_kwargs = {"robot_type": robot_type, "mock": mock}

    if mock:
@@ -403,7 +391,7 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num

        # Create an empty calibration directory to trigger manual calibration
        # and avoid writing calibration files in user .cache/calibration folder
-        calibration_dir = tmpdir / robot_type
+        calibration_dir = tmp_path / robot_type
        mock_calibration_dir(calibration_dir)
        robot_kwargs["calibration_dir"] = calibration_dir
    else:
@@ -420,7 +408,7 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num
        mock_listener.return_value = (None, mock_events)

        repo_id = "lerobot/debug"
-        root = Path(tmpdir) / "data" / repo_id
+        root = tmp_path / "data" / repo_id
        single_task = "Do something."

        rec_cfg = RecordControlConfig(
@@ -436,7 +424,6 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num
            video=False,
            display_cameras=False,
            play_sounds=False,
-            run_compute_stats=False,
            num_image_writer_processes=num_image_writer_processes,
        )

--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -15,24 +15,21 @@
 # limitations under the License.
 import json
 import logging
+import re
 from copy import deepcopy
 from itertools import chain
 from pathlib import Path

-import einops
+import numpy as np
 import pytest
 import torch
-from datasets import Dataset
 from huggingface_hub import HfApi
+from PIL import Image
 from safetensors.torch import load_file

 import lerobot
-from lerobot.common.datasets.compute_stats import (
-    aggregate_stats,
-    compute_stats,
-    get_stats_einops_patterns,
-)
 from lerobot.common.datasets.factory import make_dataset
+from lerobot.common.datasets.image_writer import image_array_to_pil_image
 from lerobot.common.datasets.lerobot_dataset import (
    LeRobotDataset,
    MultiLeRobotDataset,
@@ -40,20 +37,34 @@ from lerobot.common.datasets.lerobot_dataset import (
 from lerobot.common.datasets.utils import (
    create_branch,
    flatten_dict,
-    hf_transform_to_torch,
    unflatten_dict,
 )
 from lerobot.common.envs.factory import make_env_config
 from lerobot.common.policies.factory import make_policy_config
 from lerobot.common.robot_devices.robots.utils import make_robot
-from lerobot.common.utils.random_utils import seeded_context
 from lerobot.configs.default import DatasetConfig
 from lerobot.configs.train import TrainPipelineConfig
-from tests.fixtures.constants import DUMMY_REPO_ID
+from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_REPO_ID
 from tests.utils import DEVICE, require_x86_64_kernel


-def test_same_attributes_defined(lerobot_dataset_factory, tmp_path):
+@pytest.fixture
+def image_dataset(tmp_path, empty_lerobot_dataset_factory):
+    features = {
+        "image": {
+            "dtype": "image",
+            "shape": DUMMY_CHW,
+            "names": [
+                "channels",
+                "height",
+                "width",
+            ],
+        }
+    }
+    return empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+
+
+def test_same_attributes_defined(tmp_path, lerobot_dataset_factory):
    """
    Instantiate a LeRobotDataset both ways with '__init__()' and 'create()' and verify that instantiated
    objects have the same sets of attributes defined.
@@ -66,24 +77,20 @@ def test_same_attributes_defined(lerobot_dataset_factory, tmp_path):
    root_init = tmp_path / "init"
    dataset_init = lerobot_dataset_factory(root=root_init)

-    # Access the '_hub_version' cached_property in both instances to force its creation
-    _ = dataset_init.meta._hub_version
-    _ = dataset_create.meta._hub_version
-
    init_attr = set(vars(dataset_init).keys())
    create_attr = set(vars(dataset_create).keys())

    assert init_attr == create_attr


-def test_dataset_initialization(lerobot_dataset_factory, tmp_path):
+def test_dataset_initialization(tmp_path, lerobot_dataset_factory):
    kwargs = {
        "repo_id": DUMMY_REPO_ID,
        "total_episodes": 10,
        "total_frames": 400,
        "episodes": [2, 5, 6],
    }
-    dataset = lerobot_dataset_factory(root=tmp_path, **kwargs)
+    dataset = lerobot_dataset_factory(root=tmp_path / "test", **kwargs)

    assert dataset.repo_id == kwargs["repo_id"]
    assert dataset.meta.total_episodes == kwargs["total_episodes"]
@@ -93,12 +100,232 @@ def test_dataset_initialization(lerobot_dataset_factory, tmp_path):
    assert dataset.num_frames == len(dataset)


+def test_add_frame_missing_task(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    with pytest.raises(
+        ValueError, match="Feature mismatch in `frame` dictionary:\nMissing features: {'task'}\n"
+    ):
+        dataset.add_frame({"state": torch.randn(1)})
+
+
+def test_add_frame_missing_feature(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    with pytest.raises(
+        ValueError, match="Feature mismatch in `frame` dictionary:\nMissing features: {'state'}\n"
+    ):
+        dataset.add_frame({"task": "Dummy task"})
+
+
+def test_add_frame_extra_feature(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    with pytest.raises(
+        ValueError, match="Feature mismatch in `frame` dictionary:\nExtra features: {'extra'}\n"
+    ):
+        dataset.add_frame({"state": torch.randn(1), "task": "Dummy task", "extra": "dummy_extra"})
+
+
+def test_add_frame_wrong_type(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    with pytest.raises(
+        ValueError, match="The feature 'state' of dtype 'float16' is not of the expected dtype 'float32'.\n"
+    ):
+        dataset.add_frame({"state": torch.randn(1, dtype=torch.float16), "task": "Dummy task"})
+
+
+def test_add_frame_wrong_shape(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (2,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    with pytest.raises(
+        ValueError,
+        match=re.escape("The feature 'state' of shape '(1,)' does not have the expected shape '(2,)'.\n"),
+    ):
+        dataset.add_frame({"state": torch.randn(1), "task": "Dummy task"})
+
+
+def test_add_frame_wrong_shape_python_float(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "The feature 'state' is not a 'np.ndarray'. Expected type is 'float32', but type '<class 'float'>' provided instead.\n"
+        ),
+    ):
+        dataset.add_frame({"state": 1.0, "task": "Dummy task"})
+
+
+def test_add_frame_wrong_shape_torch_ndim_0(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    with pytest.raises(
+        ValueError,
+        match=re.escape("The feature 'state' of shape '()' does not have the expected shape '(1,)'.\n"),
+    ):
+        dataset.add_frame({"state": torch.tensor(1.0), "task": "Dummy task"})
+
+
+def test_add_frame_wrong_shape_numpy_ndim_0(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "The feature 'state' is not a 'np.ndarray'. Expected type is 'float32', but type '<class 'numpy.float32'>' provided instead.\n"
+        ),
+    ):
+        dataset.add_frame({"state": np.float32(1.0), "task": "Dummy task"})
+
+
+def test_add_frame(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    dataset.add_frame({"state": torch.randn(1), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert len(dataset) == 1
+    assert dataset[0]["task"] == "Dummy task"
+    assert dataset[0]["task_index"] == 0
+    assert dataset[0]["state"].ndim == 0
+
+
+def test_add_frame_state_1d(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (2,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    dataset.add_frame({"state": torch.randn(2), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["state"].shape == torch.Size([2])
+
+
+def test_add_frame_state_2d(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (2, 4), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    dataset.add_frame({"state": torch.randn(2, 4), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["state"].shape == torch.Size([2, 4])
+
+
+def test_add_frame_state_3d(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (2, 4, 3), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    dataset.add_frame({"state": torch.randn(2, 4, 3), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["state"].shape == torch.Size([2, 4, 3])
+
+
+def test_add_frame_state_4d(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    dataset.add_frame({"state": torch.randn(2, 4, 3, 5), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5])
+
+
+def test_add_frame_state_5d(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5, 1), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    dataset.add_frame({"state": torch.randn(2, 4, 3, 5, 1), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5, 1])
+
+
+def test_add_frame_state_numpy(tmp_path, empty_lerobot_dataset_factory):
+    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    dataset.add_frame({"state": np.array([1], dtype=np.float32), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["state"].ndim == 0
+
+
+def test_add_frame_string(tmp_path, empty_lerobot_dataset_factory):
+    features = {"caption": {"dtype": "string", "shape": (1,), "names": None}}
+    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
+    dataset.add_frame({"caption": "Dummy caption", "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["caption"] == "Dummy caption"
+
+
+def test_add_frame_image_wrong_shape(image_dataset):
+    dataset = image_dataset
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "The feature 'image' of shape '(3, 128, 96)' does not have the expected shape '(3, 96, 128)' or '(96, 128, 3)'.\n"
+        ),
+    ):
+        c, h, w = DUMMY_CHW
+        dataset.add_frame({"image": torch.randn(c, w, h), "task": "Dummy task"})
+
+
+def test_add_frame_image_wrong_range(image_dataset):
+    """This test will display the following error message from a thread:
+    ```
+    Error writing image ...test_add_frame_image_wrong_ran0/test/images/image/episode_000000/frame_000000.png:
+    The image data type is float, which requires values in the range [0.0, 1.0]. However, the provided range is [0.009678772038470007, 254.9776492089887].
+    Please adjust the range or provide a uint8 image with values in the range [0, 255]
+    ```
+    Hence the image won't be saved on disk and save_episode will raise `FileNotFoundError`.
+    """
+    dataset = image_dataset
+    dataset.add_frame({"image": np.random.rand(*DUMMY_CHW) * 255, "task": "Dummy task"})
+    with pytest.raises(FileNotFoundError):
+        dataset.save_episode()
+
+
+def test_add_frame_image(image_dataset):
+    dataset = image_dataset
+    dataset.add_frame({"image": np.random.rand(*DUMMY_CHW), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
+
+
+def test_add_frame_image_h_w_c(image_dataset):
+    dataset = image_dataset
+    dataset.add_frame({"image": np.random.rand(*DUMMY_HWC), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
+
+
+def test_add_frame_image_uint8(image_dataset):
+    dataset = image_dataset
+    image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8)
+    dataset.add_frame({"image": image, "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
+
+
+def test_add_frame_image_pil(image_dataset):
+    dataset = image_dataset
+    image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8)
+    dataset.add_frame({"image": Image.fromarray(image), "task": "Dummy task"})
+    dataset.save_episode()
+
+    assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
+
+
+def test_image_array_to_pil_image_wrong_range_float_0_255():
+    image = np.random.rand(*DUMMY_HWC) * 255
+    with pytest.raises(ValueError):
+        image_array_to_pil_image(image)
+
+
 # TODO(aliberts):
 # - [ ] test various attributes & state from init and create
 # - [ ] test init with episodes and check num_frames
-# - [ ] test add_frame
 # - [ ] test add_episode
-# - [ ] test consolidate
 # - [ ] test push_to_hub
 # - [ ] test smaller methods

@@ -210,67 +437,6 @@ def test_multidataset_frames():
            assert torch.equal(sub_dataset_item[k], dataset_item[k])


-# TODO(aliberts, rcadene): Refactor and move this to a tests/test_compute_stats.py
-def test_compute_stats_on_xarm():
-    """Check that the statistics are computed correctly according to the stats_patterns property.
-
-    We compare with taking a straight min, mean, max, std of all the data in one pass (which we can do
-    because we are working with a small dataset).
-    """
-    # TODO(rcadene, aliberts): remove dataset download
-    dataset = LeRobotDataset("lerobot/xarm_lift_medium", episodes=[0])
-
-    # reduce size of dataset sample on which stats compute is tested to 10 frames
-    dataset.hf_dataset = dataset.hf_dataset.select(range(10))
-
-    # Note: we set the batch size to be smaller than the whole dataset to make sure we are testing batched
-    # computation of the statistics. While doing this, we also make sure it works when we don't divide the
-    # dataset into even batches.
-    computed_stats = compute_stats(dataset, batch_size=int(len(dataset) * 0.25), num_workers=0)
-
-    # get einops patterns to aggregate batches and compute statistics
-    stats_patterns = get_stats_einops_patterns(dataset)
-
-    # get all frames from the dataset in the same dtype and range as during compute_stats
-    dataloader = torch.utils.data.DataLoader(
-        dataset,
-        num_workers=0,
-        batch_size=len(dataset),
-        shuffle=False,
-    )
-    full_batch = next(iter(dataloader))
-
-    # compute stats based on all frames from the dataset without any batching
-    expected_stats = {}
-    for k, pattern in stats_patterns.items():
-        full_batch[k] = full_batch[k].float()
-        expected_stats[k] = {}
-        expected_stats[k]["mean"] = einops.reduce(full_batch[k], pattern, "mean")
-        expected_stats[k]["std"] = torch.sqrt(
-            einops.reduce((full_batch[k] - expected_stats[k]["mean"]) ** 2, pattern, "mean")
-        )
-        expected_stats[k]["min"] = einops.reduce(full_batch[k], pattern, "min")
-        expected_stats[k]["max"] = einops.reduce(full_batch[k], pattern, "max")
-
-    # test computed stats match expected stats
-    for k in stats_patterns:
-        assert torch.allclose(computed_stats[k]["mean"], expected_stats[k]["mean"])
-        assert torch.allclose(computed_stats[k]["std"], expected_stats[k]["std"])
-        assert torch.allclose(computed_stats[k]["min"], expected_stats[k]["min"])
-        assert torch.allclose(computed_stats[k]["max"], expected_stats[k]["max"])
-
-    # load stats used during training which are expected to match the ones returned by computed_stats
-    loaded_stats = dataset.meta.stats  # noqa: F841
-
-    # TODO(rcadene): we can't test this because expected_stats is computed on a subset
-    # # test loaded stats match expected stats
-    # for k in stats_patterns:
-    #     assert torch.allclose(loaded_stats[k]["mean"], expected_stats[k]["mean"])
-    #     assert torch.allclose(loaded_stats[k]["std"], expected_stats[k]["std"])
-    #     assert torch.allclose(loaded_stats[k]["min"], expected_stats[k]["min"])
-    #     assert torch.allclose(loaded_stats[k]["max"], expected_stats[k]["max"])
-
-
 # TODO(aliberts): Move to more appropriate location
 def test_flatten_unflatten_dict():
    d = {
@@ -374,35 +540,6 @@ def test_backward_compatibility(repo_id):
    # load_and_compare(i - 1)


-@pytest.mark.skip("TODO after fix multidataset")
-def test_multidataset_aggregate_stats():
-    """Makes 3 basic datasets and checks that aggregate stats are computed correctly."""
-    with seeded_context(0):
-        data_a = torch.rand(30, dtype=torch.float32)
-        data_b = torch.rand(20, dtype=torch.float32)
-        data_c = torch.rand(20, dtype=torch.float32)
-
-    hf_dataset_1 = Dataset.from_dict(
-        {"a": data_a[:10], "b": data_b[:10], "c": data_c[:10], "index": torch.arange(10)}
-    )
-    hf_dataset_1.set_transform(hf_transform_to_torch)
-    hf_dataset_2 = Dataset.from_dict({"a": data_a[10:20], "b": data_b[10:], "index": torch.arange(10)})
-    hf_dataset_2.set_transform(hf_transform_to_torch)
-    hf_dataset_3 = Dataset.from_dict({"a": data_a[20:], "c": data_c[10:], "index": torch.arange(10)})
-    hf_dataset_3.set_transform(hf_transform_to_torch)
-    dataset_1 = LeRobotDataset.from_preloaded("d1", hf_dataset=hf_dataset_1)
-    dataset_1.stats = compute_stats(dataset_1, batch_size=len(hf_dataset_1), num_workers=0)
-    dataset_2 = LeRobotDataset.from_preloaded("d2", hf_dataset=hf_dataset_2)
-    dataset_2.stats = compute_stats(dataset_2, batch_size=len(hf_dataset_2), num_workers=0)
-    dataset_3 = LeRobotDataset.from_preloaded("d3", hf_dataset=hf_dataset_3)
-    dataset_3.stats = compute_stats(dataset_3, batch_size=len(hf_dataset_3), num_workers=0)
-    stats = aggregate_stats([dataset_1, dataset_2, dataset_3])
-    for data_key, data in zip(["a", "b", "c"], [data_a, data_b, data_c], strict=True):
-        for agg_fn in ["mean", "min", "max"]:
-            assert torch.allclose(stats[data_key][agg_fn], einops.reduce(data, "n -> 1", agg_fn))
-        assert torch.allclose(stats[data_key]["std"], torch.std(data, correction=0))
-
-
@pytest.mark.skip("Requires internet access")
 def test_create_branch():
    api = HfApi()
@@ -431,9 +568,9 @@ def test_create_branch():

 def test_dataset_feature_with_forward_slash_raises_error():
    # make sure dir does not exist
-    from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME
+    from lerobot.common.constants import HF_LEROBOT_HOME

-    dataset_dir = LEROBOT_HOME / "lerobot/test/with/slash"
+    dataset_dir = HF_LEROBOT_HOME / "lerobot/test/with/slash"
    # make sure does not exist
    if dataset_dir.exists():
        dataset_dir.rmdir()
--- a/tests/test_delta_timestamps.py
+++ b/tests/test_delta_timestamps.py
@@ -1,55 +1,78 @@
+from itertools import accumulate
+
+import datasets
+import numpy as np
+import pyarrow.compute as pc
 import pytest
 import torch
-from datasets import Dataset

 from lerobot.common.datasets.utils import (
-    calculate_episode_data_index,
    check_delta_timestamps,
    check_timestamps_sync,
    get_delta_indices,
-    hf_transform_to_torch,
 )
 from tests.fixtures.constants import DUMMY_MOTOR_FEATURES


-@pytest.fixture(scope="module")
-def synced_hf_dataset_factory(hf_dataset_factory):
-    def _create_synced_hf_dataset(fps: int = 30) -> Dataset:
-        return hf_dataset_factory(fps=fps)
+def calculate_total_episode(
+    hf_dataset: datasets.Dataset, raise_if_not_contiguous: bool = True
+) -> dict[str, torch.Tensor]:
+    episode_indices = sorted(hf_dataset.unique("episode_index"))
+    total_episodes = len(episode_indices)
+    if raise_if_not_contiguous and episode_indices != list(range(total_episodes)):
+        raise ValueError("episode_index values are not sorted and contiguous.")
+    return total_episodes

-    return _create_synced_hf_dataset
+
+def calculate_episode_data_index(hf_dataset: datasets.Dataset) -> dict[str, np.ndarray]:
+    episode_lengths = []
+    table = hf_dataset.data.table
+    total_episodes = calculate_total_episode(hf_dataset)
+    for ep_idx in range(total_episodes):
+        ep_table = table.filter(pc.equal(table["episode_index"], ep_idx))
+        episode_lengths.insert(ep_idx, len(ep_table))
+
+    cumulative_lenghts = list(accumulate(episode_lengths))
+    return {
+        "from": np.array([0] + cumulative_lenghts[:-1], dtype=np.int64),
+        "to": np.array(cumulative_lenghts, dtype=np.int64),
+    }


@pytest.fixture(scope="module")
-def unsynced_hf_dataset_factory(synced_hf_dataset_factory):
-    def _create_unsynced_hf_dataset(fps: int = 30, tolerance_s: float = 1e-4) -> Dataset:
-        hf_dataset = synced_hf_dataset_factory(fps=fps)
-        features = hf_dataset.features
-        df = hf_dataset.to_pandas()
-        dtype = df["timestamp"].dtype  # This is to avoid pandas type warning
-        # Modify a single timestamp just outside tolerance
-        df.at[30, "timestamp"] = dtype.type(df.at[30, "timestamp"] + (tolerance_s * 1.1))
-        unsynced_hf_dataset = Dataset.from_pandas(df, features=features)
-        unsynced_hf_dataset.set_transform(hf_transform_to_torch)
-        return unsynced_hf_dataset
+def synced_timestamps_factory(hf_dataset_factory):
+    def _create_synced_timestamps(fps: int = 30) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        hf_dataset = hf_dataset_factory(fps=fps)
+        timestamps = torch.stack(hf_dataset["timestamp"]).numpy()
+        episode_indices = torch.stack(hf_dataset["episode_index"]).numpy()
+        episode_data_index = calculate_episode_data_index(hf_dataset)
+        return timestamps, episode_indices, episode_data_index

-    return _create_unsynced_hf_dataset
+    return _create_synced_timestamps


@pytest.fixture(scope="module")
-def slightly_off_hf_dataset_factory(synced_hf_dataset_factory):
-    def _create_slightly_off_hf_dataset(fps: int = 30, tolerance_s: float = 1e-4) -> Dataset:
-        hf_dataset = synced_hf_dataset_factory(fps=fps)
-        features = hf_dataset.features
-        df = hf_dataset.to_pandas()
-        dtype = df["timestamp"].dtype  # This is to avoid pandas type warning
-        # Modify a single timestamp just inside tolerance
-        df.at[30, "timestamp"] = dtype.type(df.at[30, "timestamp"] + (tolerance_s * 0.9))
-        unsynced_hf_dataset = Dataset.from_pandas(df, features=features)
-        unsynced_hf_dataset.set_transform(hf_transform_to_torch)
-        return unsynced_hf_dataset
+def unsynced_timestamps_factory(synced_timestamps_factory):
+    def _create_unsynced_timestamps(
+        fps: int = 30, tolerance_s: float = 1e-4
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        timestamps, episode_indices, episode_data_index = synced_timestamps_factory(fps=fps)
+        timestamps[30] += tolerance_s * 1.1  # Modify a single timestamp just outside tolerance
+        return timestamps, episode_indices, episode_data_index

-    return _create_slightly_off_hf_dataset
+    return _create_unsynced_timestamps
+
+
+@pytest.fixture(scope="module")
+def slightly_off_timestamps_factory(synced_timestamps_factory):
+    def _create_slightly_off_timestamps(
+        fps: int = 30, tolerance_s: float = 1e-4
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        timestamps, episode_indices, episode_data_index = synced_timestamps_factory(fps=fps)
+        timestamps[30] += tolerance_s * 0.9  # Modify a single timestamp just inside tolerance
+        return timestamps, episode_indices, episode_data_index
+
+    return _create_slightly_off_timestamps


@pytest.fixture(scope="module")
@@ -100,42 +123,42 @@ def delta_indices_factory():
    return _delta_indices


-def test_check_timestamps_sync_synced(synced_hf_dataset_factory):
+def test_check_timestamps_sync_synced(synced_timestamps_factory):
    fps = 30
    tolerance_s = 1e-4
-    synced_hf_dataset = synced_hf_dataset_factory(fps)
-    episode_data_index = calculate_episode_data_index(synced_hf_dataset)
+    timestamps, ep_idx, ep_data_index = synced_timestamps_factory(fps)
    result = check_timestamps_sync(
-        hf_dataset=synced_hf_dataset,
-        episode_data_index=episode_data_index,
+        timestamps=timestamps,
+        episode_indices=ep_idx,
+        episode_data_index=ep_data_index,
        fps=fps,
        tolerance_s=tolerance_s,
    )
    assert result is True


-def test_check_timestamps_sync_unsynced(unsynced_hf_dataset_factory):
+def test_check_timestamps_sync_unsynced(unsynced_timestamps_factory):
    fps = 30
    tolerance_s = 1e-4
-    unsynced_hf_dataset = unsynced_hf_dataset_factory(fps, tolerance_s)
-    episode_data_index = calculate_episode_data_index(unsynced_hf_dataset)
+    timestamps, ep_idx, ep_data_index = unsynced_timestamps_factory(fps, tolerance_s)
    with pytest.raises(ValueError):
        check_timestamps_sync(
-            hf_dataset=unsynced_hf_dataset,
-            episode_data_index=episode_data_index,
+            timestamps=timestamps,
+            episode_indices=ep_idx,
+            episode_data_index=ep_data_index,
            fps=fps,
            tolerance_s=tolerance_s,
        )


-def test_check_timestamps_sync_unsynced_no_exception(unsynced_hf_dataset_factory):
+def test_check_timestamps_sync_unsynced_no_exception(unsynced_timestamps_factory):
    fps = 30
    tolerance_s = 1e-4
-    unsynced_hf_dataset = unsynced_hf_dataset_factory(fps, tolerance_s)
-    episode_data_index = calculate_episode_data_index(unsynced_hf_dataset)
+    timestamps, ep_idx, ep_data_index = unsynced_timestamps_factory(fps, tolerance_s)
    result = check_timestamps_sync(
-        hf_dataset=unsynced_hf_dataset,
-        episode_data_index=episode_data_index,
+        timestamps=timestamps,
+        episode_indices=ep_idx,
+        episode_data_index=ep_data_index,
        fps=fps,
        tolerance_s=tolerance_s,
        raise_value_error=False,
@@ -143,14 +166,14 @@ def test_check_timestamps_sync_unsynced_no_exception(unsynced_hf_dataset_factory
    assert result is False


-def test_check_timestamps_sync_slightly_off(slightly_off_hf_dataset_factory):
+def test_check_timestamps_sync_slightly_off(slightly_off_timestamps_factory):
    fps = 30
    tolerance_s = 1e-4
-    slightly_off_hf_dataset = slightly_off_hf_dataset_factory(fps, tolerance_s)
-    episode_data_index = calculate_episode_data_index(slightly_off_hf_dataset)
+    timestamps, ep_idx, ep_data_index = slightly_off_timestamps_factory(fps, tolerance_s)
    result = check_timestamps_sync(
-        hf_dataset=slightly_off_hf_dataset,
-        episode_data_index=episode_data_index,
+        timestamps=timestamps,
+        episode_indices=ep_idx,
+        episode_data_index=ep_data_index,
        fps=fps,
        tolerance_s=tolerance_s,
    )
@@ -158,33 +181,13 @@ def test_check_timestamps_sync_slightly_off(slightly_off_hf_dataset_factory):


 def test_check_timestamps_sync_single_timestamp():
-    single_timestamp_hf_dataset = Dataset.from_dict({"timestamp": [0.0], "episode_index": [0]})
-    single_timestamp_hf_dataset.set_transform(hf_transform_to_torch)
-    episode_data_index = {"to": torch.tensor([1]), "from": torch.tensor([0])}
    fps = 30
    tolerance_s = 1e-4
+    timestamps, ep_idx = np.array([0.0]), np.array([0])
+    episode_data_index = {"to": np.array([1]), "from": np.array([0])}
    result = check_timestamps_sync(
-        hf_dataset=single_timestamp_hf_dataset,
-        episode_data_index=episode_data_index,
-        fps=fps,
-        tolerance_s=tolerance_s,
-    )
-    assert result is True
-
-
-# TODO(aliberts): Change behavior of hf_transform_to_torch so that it can work with empty dataset
-@pytest.mark.skip("TODO: fix")
-def test_check_timestamps_sync_empty_dataset():
-    fps = 30
-    tolerance_s = 1e-4
-    empty_hf_dataset = Dataset.from_dict({"timestamp": [], "episode_index": []})
-    empty_hf_dataset.set_transform(hf_transform_to_torch)
-    episode_data_index = {
-        "to": torch.tensor([], dtype=torch.int64),
-        "from": torch.tensor([], dtype=torch.int64),
-    }
-    result = check_timestamps_sync(
-        hf_dataset=empty_hf_dataset,
+        timestamps=timestamps,
+        episode_indices=ep_idx,
        episode_data_index=episode_data_index,
        fps=fps,
        tolerance_s=tolerance_s,
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -53,7 +53,7 @@ def test_example_1(tmp_path, lerobot_dataset_factory):
            ('repo_id = "lerobot/pusht"', f'repo_id = "{DUMMY_REPO_ID}"'),
            (
                "LeRobotDataset(repo_id",
-                f"LeRobotDataset(repo_id, root='{str(tmp_path)}', local_files_only=True",
+                f"LeRobotDataset(repo_id, root='{str(tmp_path)}'",
            ),
        ],
    )
--- a/tests/test_image_writer.py
+++ b/tests/test_image_writer.py
@@ -9,10 +9,11 @@ from PIL import Image

 from lerobot.common.datasets.image_writer import (
    AsyncImageWriter,
-    image_array_to_image,
+    image_array_to_pil_image,
    safe_stop_image_writer,
    write_image,
 )
+from tests.fixtures.constants import DUMMY_HWC

 DUMMY_IMAGE = "test_image.png"

@@ -48,49 +49,62 @@ def test_zero_threads():
        AsyncImageWriter(num_processes=0, num_threads=0)


-def test_image_array_to_image_rgb(img_array_factory):
+def test_image_array_to_pil_image_float_array_wrong_range_0_255():
+    image = np.random.rand(*DUMMY_HWC) * 255
+    with pytest.raises(ValueError):
+        image_array_to_pil_image(image)
+
+
+def test_image_array_to_pil_image_float_array_wrong_range_neg_1_1():
+    image = np.random.rand(*DUMMY_HWC) * 2 - 1
+    with pytest.raises(ValueError):
+        image_array_to_pil_image(image)
+
+
+def test_image_array_to_pil_image_rgb(img_array_factory):
    img_array = img_array_factory(100, 100)
-    result_image = image_array_to_image(img_array)
+    result_image = image_array_to_pil_image(img_array)
    assert isinstance(result_image, Image.Image)
    assert result_image.size == (100, 100)
    assert result_image.mode == "RGB"


-def test_image_array_to_image_pytorch_format(img_array_factory):
+def test_image_array_to_pil_image_pytorch_format(img_array_factory):
    img_array = img_array_factory(100, 100).transpose(2, 0, 1)
-    result_image = image_array_to_image(img_array)
+    result_image = image_array_to_pil_image(img_array)
    assert isinstance(result_image, Image.Image)
    assert result_image.size == (100, 100)
    assert result_image.mode == "RGB"


-@pytest.mark.skip("TODO: implement")
-def test_image_array_to_image_single_channel(img_array_factory):
+def test_image_array_to_pil_image_single_channel(img_array_factory):
    img_array = img_array_factory(channels=1)
-    result_image = image_array_to_image(img_array)
-    assert isinstance(result_image, Image.Image)
-    assert result_image.size == (100, 100)
-    assert result_image.mode == "L"
+    with pytest.raises(NotImplementedError):
+        image_array_to_pil_image(img_array)


-def test_image_array_to_image_float_array(img_array_factory):
+def test_image_array_to_pil_image_4_channels(img_array_factory):
+    img_array = img_array_factory(channels=4)
+    with pytest.raises(NotImplementedError):
+        image_array_to_pil_image(img_array)
+
+
+def test_image_array_to_pil_image_float_array(img_array_factory):
    img_array = img_array_factory(dtype=np.float32)
-    result_image = image_array_to_image(img_array)
+    result_image = image_array_to_pil_image(img_array)
    assert isinstance(result_image, Image.Image)
    assert result_image.size == (100, 100)
    assert result_image.mode == "RGB"
    assert np.array(result_image).dtype == np.uint8


-def test_image_array_to_image_out_of_bounds_float():
-    # Float array with values out of [0, 1]
-    img_array = np.random.uniform(-1, 2, size=(100, 100, 3)).astype(np.float32)
-    result_image = image_array_to_image(img_array)
+def test_image_array_to_pil_image_uint8_array(img_array_factory):
+    img_array = img_array_factory(dtype=np.float32)
+    result_image = image_array_to_pil_image(img_array)
    assert isinstance(result_image, Image.Image)
    assert result_image.size == (100, 100)
    assert result_image.mode == "RGB"
    assert np.array(result_image).dtype == np.uint8
-    assert np.array(result_image).min() >= 0 and np.array(result_image).max() <= 255


 def test_write_image_numpy(tmp_path, img_array_factory):
--- a/tests/test_push_dataset_to_hub.py
+++ b/tests/test_push_dataset_to_hub.py
@@ -1,370 +0,0 @@
-"""
-This file contains generic tests to ensure that nothing breaks if we modify the push_dataset_to_hub API.
-Also, this file contains backward compatibility tests. Because they are slow and require to download the raw datasets,
-we skip them for now in our CI.
-
-Example to run backward compatiblity tests locally:
-```
-python -m pytest --run-skipped tests/test_push_dataset_to_hub.py::test_push_dataset_to_hub_pusht_backward_compatibility
-```
-"""
-
-from pathlib import Path
-
-import numpy as np
-import pytest
-import torch
-
-from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.common.datasets.push_dataset_to_hub.utils import save_images_concurrently
-from lerobot.common.datasets.video_utils import encode_video_frames
-from lerobot.scripts.push_dataset_to_hub import push_dataset_to_hub
-from tests.utils import require_package_arg
-
-
-def _mock_download_raw_pusht(raw_dir, num_frames=4, num_episodes=3):
-    import zarr
-
-    raw_dir.mkdir(parents=True, exist_ok=True)
-    zarr_path = raw_dir / "pusht_cchi_v7_replay.zarr"
-    store = zarr.DirectoryStore(zarr_path)
-    zarr_data = zarr.group(store=store)
-
-    zarr_data.create_dataset(
-        "data/action", shape=(num_frames, 1), chunks=(num_frames, 1), dtype=np.float32, overwrite=True
-    )
-    zarr_data.create_dataset(
-        "data/img",
-        shape=(num_frames, 96, 96, 3),
-        chunks=(num_frames, 96, 96, 3),
-        dtype=np.uint8,
-        overwrite=True,
-    )
-    zarr_data.create_dataset(
-        "data/n_contacts", shape=(num_frames, 2), chunks=(num_frames, 2), dtype=np.float32, overwrite=True
-    )
-    zarr_data.create_dataset(
-        "data/state", shape=(num_frames, 5), chunks=(num_frames, 5), dtype=np.float32, overwrite=True
-    )
-    zarr_data.create_dataset(
-        "data/keypoint", shape=(num_frames, 9, 2), chunks=(num_frames, 9, 2), dtype=np.float32, overwrite=True
-    )
-    zarr_data.create_dataset(
-        "meta/episode_ends", shape=(num_episodes,), chunks=(num_episodes,), dtype=np.int32, overwrite=True
-    )
-
-    zarr_data["data/action"][:] = np.random.randn(num_frames, 1)
-    zarr_data["data/img"][:] = np.random.randint(0, 255, size=(num_frames, 96, 96, 3), dtype=np.uint8)
-    zarr_data["data/n_contacts"][:] = np.random.randn(num_frames, 2)
-    zarr_data["data/state"][:] = np.random.randn(num_frames, 5)
-    zarr_data["data/keypoint"][:] = np.random.randn(num_frames, 9, 2)
-    zarr_data["meta/episode_ends"][:] = np.array([1, 3, 4])
-
-    store.close()
-
-
-def _mock_download_raw_umi(raw_dir, num_frames=4, num_episodes=3):
-    import zarr
-
-    raw_dir.mkdir(parents=True, exist_ok=True)
-    zarr_path = raw_dir / "cup_in_the_wild.zarr"
-    store = zarr.DirectoryStore(zarr_path)
-    zarr_data = zarr.group(store=store)
-
-    zarr_data.create_dataset(
-        "data/camera0_rgb",
-        shape=(num_frames, 96, 96, 3),
-        chunks=(num_frames, 96, 96, 3),
-        dtype=np.uint8,
-        overwrite=True,
-    )
-    zarr_data.create_dataset(
-        "data/robot0_demo_end_pose",
-        shape=(num_frames, 5),
-        chunks=(num_frames, 5),
-        dtype=np.float32,
-        overwrite=True,
-    )
-    zarr_data.create_dataset(
-        "data/robot0_demo_start_pose",
-        shape=(num_frames, 5),
-        chunks=(num_frames, 5),
-        dtype=np.float32,
-        overwrite=True,
-    )
-    zarr_data.create_dataset(
-        "data/robot0_eef_pos", shape=(num_frames, 5), chunks=(num_frames, 5), dtype=np.float32, overwrite=True
-    )
-    zarr_data.create_dataset(
-        "data/robot0_eef_rot_axis_angle",
-        shape=(num_frames, 5),
-        chunks=(num_frames, 5),
-        dtype=np.float32,
-        overwrite=True,
-    )
-    zarr_data.create_dataset(
-        "data/robot0_gripper_width",
-        shape=(num_frames, 5),
-        chunks=(num_frames, 5),
-        dtype=np.float32,
-        overwrite=True,
-    )
-    zarr_data.create_dataset(
-        "meta/episode_ends", shape=(num_episodes,), chunks=(num_episodes,), dtype=np.int32, overwrite=True
-    )
-
-    zarr_data["data/camera0_rgb"][:] = np.random.randint(0, 255, size=(num_frames, 96, 96, 3), dtype=np.uint8)
-    zarr_data["data/robot0_demo_end_pose"][:] = np.random.randn(num_frames, 5)
-    zarr_data["data/robot0_demo_start_pose"][:] = np.random.randn(num_frames, 5)
-    zarr_data["data/robot0_eef_pos"][:] = np.random.randn(num_frames, 5)
-    zarr_data["data/robot0_eef_rot_axis_angle"][:] = np.random.randn(num_frames, 5)
-    zarr_data["data/robot0_gripper_width"][:] = np.random.randn(num_frames, 5)
-    zarr_data["meta/episode_ends"][:] = np.array([1, 3, 4])
-
-    store.close()
-
-
-def _mock_download_raw_xarm(raw_dir, num_frames=4):
-    import pickle
-
-    dataset_dict = {
-        "observations": {
-            "rgb": np.random.randint(0, 255, size=(num_frames, 3, 84, 84), dtype=np.uint8),
-            "state": np.random.randn(num_frames, 4),
-        },
-        "actions": np.random.randn(num_frames, 3),
-        "rewards": np.random.randn(num_frames),
-        "masks": np.random.randn(num_frames),
-        "dones": np.array([False, True, True, True]),
-    }
-
-    raw_dir.mkdir(parents=True, exist_ok=True)
-    pkl_path = raw_dir / "buffer.pkl"
-    with open(pkl_path, "wb") as f:
-        pickle.dump(dataset_dict, f)
-
-
-def _mock_download_raw_aloha(raw_dir, num_frames=6, num_episodes=3):
-    import h5py
-
-    for ep_idx in range(num_episodes):
-        raw_dir.mkdir(parents=True, exist_ok=True)
-        path_h5 = raw_dir / f"episode_{ep_idx}.hdf5"
-        with h5py.File(str(path_h5), "w") as f:
-            f.create_dataset("action", data=np.random.randn(num_frames // num_episodes, 14))
-            f.create_dataset("observations/qpos", data=np.random.randn(num_frames // num_episodes, 14))
-            f.create_dataset("observations/qvel", data=np.random.randn(num_frames // num_episodes, 14))
-            f.create_dataset(
-                "observations/images/top",
-                data=np.random.randint(
-                    0, 255, size=(num_frames // num_episodes, 480, 640, 3), dtype=np.uint8
-                ),
-            )
-
-
-def _mock_download_raw_dora(raw_dir, num_frames=6, num_episodes=3, fps=30):
-    from datetime import datetime, timedelta, timezone
-
-    import pandas
-
-    def write_parquet(key, timestamps, values):
-        data = {
-            "timestamp_utc": timestamps,
-            key: values,
-        }
-        df = pandas.DataFrame(data)
-        raw_dir.mkdir(parents=True, exist_ok=True)
-        df.to_parquet(raw_dir / f"{key}.parquet", engine="pyarrow")
-
-    episode_indices = [None, None, -1, None, None, -1, None, None, -1]
-    episode_indices_mapping = [0, 0, 0, 1, 1, 1, 2, 2, 2]
-    frame_indices = [0, 1, -1, 0, 1, -1, 0, 1, -1]
-
-    cam_key = "observation.images.cam_high"
-    timestamps = []
-    actions = []
-    states = []
-    frames = []
-    # `+ num_episodes`` for buffer frames associated to episode_index=-1
-    for i, frame_idx in enumerate(frame_indices):
-        t_utc = datetime.now(timezone.utc) + timedelta(seconds=i / fps)
-        action = np.random.randn(21).tolist()
-        state = np.random.randn(21).tolist()
-        ep_idx = episode_indices_mapping[i]
-        frame = [{"path": f"videos/{cam_key}_episode_{ep_idx:06d}.mp4", "timestamp": frame_idx / fps}]
-        timestamps.append(t_utc)
-        actions.append(action)
-        states.append(state)
-        frames.append(frame)
-
-    write_parquet(cam_key, timestamps, frames)
-    write_parquet("observation.state", timestamps, states)
-    write_parquet("action", timestamps, actions)
-    write_parquet("episode_index", timestamps, episode_indices)
-
-    # write fake mp4 file for each episode
-    for ep_idx in range(num_episodes):
-        imgs_array = np.random.randint(0, 255, size=(num_frames // num_episodes, 480, 640, 3), dtype=np.uint8)
-
-        tmp_imgs_dir = raw_dir / "tmp_images"
-        save_images_concurrently(imgs_array, tmp_imgs_dir)
-
-        fname = f"{cam_key}_episode_{ep_idx:06d}.mp4"
-        video_path = raw_dir / "videos" / fname
-        encode_video_frames(tmp_imgs_dir, video_path, fps, vcodec="libx264")
-
-
-def _mock_download_raw(raw_dir, repo_id):
-    if "wrist_gripper" in repo_id:
-        _mock_download_raw_dora(raw_dir)
-    elif "aloha" in repo_id:
-        _mock_download_raw_aloha(raw_dir)
-    elif "pusht" in repo_id:
-        _mock_download_raw_pusht(raw_dir)
-    elif "xarm" in repo_id:
-        _mock_download_raw_xarm(raw_dir)
-    elif "umi" in repo_id:
-        _mock_download_raw_umi(raw_dir)
-    else:
-        raise ValueError(repo_id)
-
-
-@pytest.mark.skip("push_dataset_to_hub is deprecated")
-def test_push_dataset_to_hub_invalid_repo_id(tmpdir):
-    with pytest.raises(ValueError):
-        push_dataset_to_hub(Path(tmpdir), "raw_format", "invalid_repo_id")
-
-
-@pytest.mark.skip("push_dataset_to_hub is deprecated")
-def test_push_dataset_to_hub_out_dir_force_override_false(tmpdir):
-    tmpdir = Path(tmpdir)
-    out_dir = tmpdir / "out"
-    raw_dir = tmpdir / "raw"
-    # mkdir to skip download
-    raw_dir.mkdir(parents=True, exist_ok=True)
-    with pytest.raises(ValueError):
-        push_dataset_to_hub(
-            raw_dir=raw_dir,
-            raw_format="some_format",
-            repo_id="user/dataset",
-            local_dir=out_dir,
-            force_override=False,
-        )
-
-
-@pytest.mark.skip("push_dataset_to_hub is deprecated")
-@pytest.mark.parametrize(
-    "required_packages, raw_format, repo_id, make_test_data",
-    [
-        (["gym_pusht"], "pusht_zarr", "lerobot/pusht", False),
-        (["gym_pusht"], "pusht_zarr", "lerobot/pusht", True),
-        (None, "xarm_pkl", "lerobot/xarm_lift_medium", False),
-        (None, "aloha_hdf5", "lerobot/aloha_sim_insertion_scripted", False),
-        (["imagecodecs"], "umi_zarr", "lerobot/umi_cup_in_the_wild", False),
-        (None, "dora_parquet", "cadene/wrist_gripper", False),
-    ],
-)
-@require_package_arg
-def test_push_dataset_to_hub_format(required_packages, tmpdir, raw_format, repo_id, make_test_data):
-    num_episodes = 3
-    tmpdir = Path(tmpdir)
-
-    raw_dir = tmpdir / f"{repo_id}_raw"
-    _mock_download_raw(raw_dir, repo_id)
-
-    local_dir = tmpdir / repo_id
-
-    lerobot_dataset = push_dataset_to_hub(
-        raw_dir=raw_dir,
-        raw_format=raw_format,
-        repo_id=repo_id,
-        push_to_hub=False,
-        local_dir=local_dir,
-        force_override=False,
-        cache_dir=tmpdir / "cache",
-        tests_data_dir=tmpdir / "tests/data" if make_test_data else None,
-        encoding={"vcodec": "libx264"},
-    )
-
-    # minimal generic tests on the local directory containing LeRobotDataset
-    assert (local_dir / "meta_data" / "info.json").exists()
-    assert (local_dir / "meta_data" / "stats.safetensors").exists()
-    assert (local_dir / "meta_data" / "episode_data_index.safetensors").exists()
-    for i in range(num_episodes):
-        for cam_key in lerobot_dataset.camera_keys:
-            assert (local_dir / "videos" / f"{cam_key}_episode_{i:06d}.mp4").exists()
-    assert (local_dir / "train" / "dataset_info.json").exists()
-    assert (local_dir / "train" / "state.json").exists()
-    assert len(list((local_dir / "train").glob("*.arrow"))) > 0
-
-    # minimal generic tests on the item
-    item = lerobot_dataset[0]
-    assert "index" in item
-    assert "episode_index" in item
-    assert "timestamp" in item
-    for cam_key in lerobot_dataset.camera_keys:
-        assert cam_key in item
-
-    if make_test_data:
-        # Check that only the first episode is selected.
-        test_dataset = LeRobotDataset(repo_id=repo_id, root=tmpdir / "tests/data")
-        num_frames = sum(
-            i == lerobot_dataset.hf_dataset["episode_index"][0]
-            for i in lerobot_dataset.hf_dataset["episode_index"]
-        ).item()
-        assert (
-            test_dataset.hf_dataset["episode_index"]
-            == lerobot_dataset.hf_dataset["episode_index"][:num_frames]
-        )
-        for k in ["from", "to"]:
-            assert torch.equal(test_dataset.episode_data_index[k], lerobot_dataset.episode_data_index[k][:1])
-
-
-@pytest.mark.skip("push_dataset_to_hub is deprecated")
-@pytest.mark.parametrize(
-    "raw_format, repo_id",
-    [
-        # TODO(rcadene): add raw dataset test artifacts
-        ("pusht_zarr", "lerobot/pusht"),
-        ("xarm_pkl", "lerobot/xarm_lift_medium"),
-        ("aloha_hdf5", "lerobot/aloha_sim_insertion_scripted"),
-        ("umi_zarr", "lerobot/umi_cup_in_the_wild"),
-        ("dora_parquet", "cadene/wrist_gripper"),
-    ],
-)
-def test_push_dataset_to_hub_pusht_backward_compatibility(tmpdir, raw_format, repo_id):
-    _, dataset_id = repo_id.split("/")
-
-    tmpdir = Path(tmpdir)
-    raw_dir = tmpdir / f"{dataset_id}_raw"
-    local_dir = tmpdir / repo_id
-
-    push_dataset_to_hub(
-        raw_dir=raw_dir,
-        raw_format=raw_format,
-        repo_id=repo_id,
-        push_to_hub=False,
-        local_dir=local_dir,
-        force_override=False,
-        cache_dir=tmpdir / "cache",
-        episodes=[0],
-    )
-
-    ds_actual = LeRobotDataset(repo_id, root=tmpdir)
-    ds_reference = LeRobotDataset(repo_id)
-
-    assert len(ds_reference.hf_dataset) == len(ds_actual.hf_dataset)
-
-    def check_same_items(item1, item2):
-        assert item1.keys() == item2.keys(), "Keys mismatch"
-
-        for key in item1:
-            if isinstance(item1[key], torch.Tensor) and isinstance(item2[key], torch.Tensor):
-                assert torch.equal(item1[key], item2[key]), f"Mismatch found in key: {key}"
-            else:
-                assert item1[key] == item2[key], f"Mismatch found in key: {key}"
-
-    for i in range(len(ds_reference.hf_dataset)):
-        item_reference = ds_reference.hf_dataset[i]
-        item_actual = ds_actual.hf_dataset[i]
-        check_same_items(item_reference, item_actual)
--- a/tests/test_robots.py
+++ b/tests/test_robots.py
@@ -23,8 +23,6 @@ pytest -sx 'tests/test_robots.py::test_robot[aloha-True]'
 ```
 """

-from pathlib import Path
-
 import pytest
 import torch

@@ -35,7 +33,7 @@ from tests.utils import TEST_ROBOT_TYPES, mock_calibration_dir, require_robot

@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
@require_robot
-def test_robot(tmpdir, request, robot_type, mock):
+def test_robot(tmp_path, request, robot_type, mock):
    # TODO(rcadene): measure fps in nightly?
    # TODO(rcadene): test logs
    # TODO(rcadene): add compatibility with other robots
@@ -50,8 +48,7 @@ def test_robot(tmpdir, request, robot_type, mock):
            request.getfixturevalue("patch_builtins_input")

        # Create an empty calibration directory to trigger manual calibration
-        tmpdir = Path(tmpdir)
-        calibration_dir = tmpdir / robot_type
+        calibration_dir = tmp_path / robot_type
        mock_calibration_dir(calibration_dir)
        robot_kwargs["calibration_dir"] = calibration_dir