forked from tangger/lerobot
LeRobotDataset v2.1 (#711)
Co-authored-by: Remi <remi.cadene@huggingface.co> Co-authored-by: Remi Cadene <re.cadene@gmail.com>
This commit is contained in:
6
tests/fixtures/constants.py
vendored
6
tests/fixtures/constants.py
vendored
@@ -1,6 +1,6 @@
|
||||
from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME
|
||||
from lerobot.common.constants import HF_LEROBOT_HOME
|
||||
|
||||
LEROBOT_TEST_DIR = LEROBOT_HOME / "_testing"
|
||||
LEROBOT_TEST_DIR = HF_LEROBOT_HOME / "_testing"
|
||||
DUMMY_REPO_ID = "dummy/repo"
|
||||
DUMMY_ROBOT_TYPE = "dummy_robot"
|
||||
DUMMY_MOTOR_FEATURES = {
|
||||
@@ -27,3 +27,5 @@ DUMMY_VIDEO_INFO = {
|
||||
"video.is_depth_map": False,
|
||||
"has_audio": False,
|
||||
}
|
||||
DUMMY_CHW = (3, 96, 128)
|
||||
DUMMY_HWC = (96, 128, 3)
|
||||
|
||||
89
tests/fixtures/dataset_factories.py
vendored
89
tests/fixtures/dataset_factories.py
vendored
@@ -1,5 +1,7 @@
|
||||
import random
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Protocol
|
||||
from unittest.mock import patch
|
||||
|
||||
import datasets
|
||||
@@ -27,8 +29,12 @@ from tests.fixtures.constants import (
|
||||
)
|
||||
|
||||
|
||||
class LeRobotDatasetFactory(Protocol):
|
||||
def __call__(self, *args, **kwargs) -> LeRobotDataset: ...
|
||||
|
||||
|
||||
def get_task_index(task_dicts: dict, task: str) -> int:
|
||||
tasks = {d["task_index"]: d["task"] for d in task_dicts}
|
||||
tasks = {d["task_index"]: d["task"] for d in task_dicts.values()}
|
||||
task_to_task_index = {task: task_idx for task_idx, task in tasks.items()}
|
||||
return task_to_task_index[task]
|
||||
|
||||
@@ -141,6 +147,7 @@ def stats_factory():
|
||||
"mean": np.full((3, 1, 1), 0.5, dtype=np.float32).tolist(),
|
||||
"min": np.full((3, 1, 1), 0, dtype=np.float32).tolist(),
|
||||
"std": np.full((3, 1, 1), 0.25, dtype=np.float32).tolist(),
|
||||
"count": [10],
|
||||
}
|
||||
else:
|
||||
stats[key] = {
|
||||
@@ -148,20 +155,38 @@ def stats_factory():
|
||||
"mean": np.full(shape, 0.5, dtype=dtype).tolist(),
|
||||
"min": np.full(shape, 0, dtype=dtype).tolist(),
|
||||
"std": np.full(shape, 0.25, dtype=dtype).tolist(),
|
||||
"count": [10],
|
||||
}
|
||||
return stats
|
||||
|
||||
return _create_stats
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def episodes_stats_factory(stats_factory):
|
||||
def _create_episodes_stats(
|
||||
features: dict[str],
|
||||
total_episodes: int = 3,
|
||||
) -> dict:
|
||||
episodes_stats = {}
|
||||
for episode_index in range(total_episodes):
|
||||
episodes_stats[episode_index] = {
|
||||
"episode_index": episode_index,
|
||||
"stats": stats_factory(features),
|
||||
}
|
||||
return episodes_stats
|
||||
|
||||
return _create_episodes_stats
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tasks_factory():
|
||||
def _create_tasks(total_tasks: int = 3) -> int:
|
||||
tasks_list = []
|
||||
for i in range(total_tasks):
|
||||
task_dict = {"task_index": i, "task": f"Perform action {i}."}
|
||||
tasks_list.append(task_dict)
|
||||
return tasks_list
|
||||
tasks = {}
|
||||
for task_index in range(total_tasks):
|
||||
task_dict = {"task_index": task_index, "task": f"Perform action {task_index}."}
|
||||
tasks[task_index] = task_dict
|
||||
return tasks
|
||||
|
||||
return _create_tasks
|
||||
|
||||
@@ -190,10 +215,10 @@ def episodes_factory(tasks_factory):
|
||||
# Generate random lengths that sum up to total_length
|
||||
lengths = np.random.multinomial(total_frames, [1 / total_episodes] * total_episodes).tolist()
|
||||
|
||||
tasks_list = [task_dict["task"] for task_dict in tasks]
|
||||
tasks_list = [task_dict["task"] for task_dict in tasks.values()]
|
||||
num_tasks_available = len(tasks_list)
|
||||
|
||||
episodes_list = []
|
||||
episodes = {}
|
||||
remaining_tasks = tasks_list.copy()
|
||||
for ep_idx in range(total_episodes):
|
||||
num_tasks_in_episode = random.randint(1, min(3, num_tasks_available)) if multi_task else 1
|
||||
@@ -203,15 +228,13 @@ def episodes_factory(tasks_factory):
|
||||
for task in episode_tasks:
|
||||
remaining_tasks.remove(task)
|
||||
|
||||
episodes_list.append(
|
||||
{
|
||||
"episode_index": ep_idx,
|
||||
"tasks": episode_tasks,
|
||||
"length": lengths[ep_idx],
|
||||
}
|
||||
)
|
||||
episodes[ep_idx] = {
|
||||
"episode_index": ep_idx,
|
||||
"tasks": episode_tasks,
|
||||
"length": lengths[ep_idx],
|
||||
}
|
||||
|
||||
return episodes_list
|
||||
return episodes
|
||||
|
||||
return _create_episodes
|
||||
|
||||
@@ -235,7 +258,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar
|
||||
frame_index_col = np.array([], dtype=np.int64)
|
||||
episode_index_col = np.array([], dtype=np.int64)
|
||||
task_index = np.array([], dtype=np.int64)
|
||||
for ep_dict in episodes:
|
||||
for ep_dict in episodes.values():
|
||||
timestamp_col = np.concatenate((timestamp_col, np.arange(ep_dict["length"]) / fps))
|
||||
frame_index_col = np.concatenate((frame_index_col, np.arange(ep_dict["length"], dtype=int)))
|
||||
episode_index_col = np.concatenate(
|
||||
@@ -278,6 +301,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar
|
||||
def lerobot_dataset_metadata_factory(
|
||||
info_factory,
|
||||
stats_factory,
|
||||
episodes_stats_factory,
|
||||
tasks_factory,
|
||||
episodes_factory,
|
||||
mock_snapshot_download_factory,
|
||||
@@ -287,14 +311,18 @@ def lerobot_dataset_metadata_factory(
|
||||
repo_id: str = DUMMY_REPO_ID,
|
||||
info: dict | None = None,
|
||||
stats: dict | None = None,
|
||||
episodes_stats: list[dict] | None = None,
|
||||
tasks: list[dict] | None = None,
|
||||
episodes: list[dict] | None = None,
|
||||
local_files_only: bool = False,
|
||||
) -> LeRobotDatasetMetadata:
|
||||
if not info:
|
||||
info = info_factory()
|
||||
if not stats:
|
||||
stats = stats_factory(features=info["features"])
|
||||
if not episodes_stats:
|
||||
episodes_stats = episodes_stats_factory(
|
||||
features=info["features"], total_episodes=info["total_episodes"]
|
||||
)
|
||||
if not tasks:
|
||||
tasks = tasks_factory(total_tasks=info["total_tasks"])
|
||||
if not episodes:
|
||||
@@ -305,21 +333,20 @@ def lerobot_dataset_metadata_factory(
|
||||
mock_snapshot_download = mock_snapshot_download_factory(
|
||||
info=info,
|
||||
stats=stats,
|
||||
episodes_stats=episodes_stats,
|
||||
tasks=tasks,
|
||||
episodes=episodes,
|
||||
)
|
||||
with (
|
||||
patch(
|
||||
"lerobot.common.datasets.lerobot_dataset.get_hub_safe_version"
|
||||
) as mock_get_hub_safe_version_patch,
|
||||
patch("lerobot.common.datasets.lerobot_dataset.get_safe_version") as mock_get_safe_version_patch,
|
||||
patch(
|
||||
"lerobot.common.datasets.lerobot_dataset.snapshot_download"
|
||||
) as mock_snapshot_download_patch,
|
||||
):
|
||||
mock_get_hub_safe_version_patch.side_effect = lambda repo_id, version: version
|
||||
mock_get_safe_version_patch.side_effect = lambda repo_id, version: version
|
||||
mock_snapshot_download_patch.side_effect = mock_snapshot_download
|
||||
|
||||
return LeRobotDatasetMetadata(repo_id=repo_id, root=root, local_files_only=local_files_only)
|
||||
return LeRobotDatasetMetadata(repo_id=repo_id, root=root)
|
||||
|
||||
return _create_lerobot_dataset_metadata
|
||||
|
||||
@@ -328,12 +355,13 @@ def lerobot_dataset_metadata_factory(
|
||||
def lerobot_dataset_factory(
|
||||
info_factory,
|
||||
stats_factory,
|
||||
episodes_stats_factory,
|
||||
tasks_factory,
|
||||
episodes_factory,
|
||||
hf_dataset_factory,
|
||||
mock_snapshot_download_factory,
|
||||
lerobot_dataset_metadata_factory,
|
||||
):
|
||||
) -> LeRobotDatasetFactory:
|
||||
def _create_lerobot_dataset(
|
||||
root: Path,
|
||||
repo_id: str = DUMMY_REPO_ID,
|
||||
@@ -343,6 +371,7 @@ def lerobot_dataset_factory(
|
||||
multi_task: bool = False,
|
||||
info: dict | None = None,
|
||||
stats: dict | None = None,
|
||||
episodes_stats: list[dict] | None = None,
|
||||
tasks: list[dict] | None = None,
|
||||
episode_dicts: list[dict] | None = None,
|
||||
hf_dataset: datasets.Dataset | None = None,
|
||||
@@ -354,6 +383,8 @@ def lerobot_dataset_factory(
|
||||
)
|
||||
if not stats:
|
||||
stats = stats_factory(features=info["features"])
|
||||
if not episodes_stats:
|
||||
episodes_stats = episodes_stats_factory(features=info["features"], total_episodes=total_episodes)
|
||||
if not tasks:
|
||||
tasks = tasks_factory(total_tasks=info["total_tasks"])
|
||||
if not episode_dicts:
|
||||
@@ -369,6 +400,7 @@ def lerobot_dataset_factory(
|
||||
mock_snapshot_download = mock_snapshot_download_factory(
|
||||
info=info,
|
||||
stats=stats,
|
||||
episodes_stats=episodes_stats,
|
||||
tasks=tasks,
|
||||
episodes=episode_dicts,
|
||||
hf_dataset=hf_dataset,
|
||||
@@ -378,19 +410,26 @@ def lerobot_dataset_factory(
|
||||
repo_id=repo_id,
|
||||
info=info,
|
||||
stats=stats,
|
||||
episodes_stats=episodes_stats,
|
||||
tasks=tasks,
|
||||
episodes=episode_dicts,
|
||||
local_files_only=kwargs.get("local_files_only", False),
|
||||
)
|
||||
with (
|
||||
patch("lerobot.common.datasets.lerobot_dataset.LeRobotDatasetMetadata") as mock_metadata_patch,
|
||||
patch("lerobot.common.datasets.lerobot_dataset.get_safe_version") as mock_get_safe_version_patch,
|
||||
patch(
|
||||
"lerobot.common.datasets.lerobot_dataset.snapshot_download"
|
||||
) as mock_snapshot_download_patch,
|
||||
):
|
||||
mock_metadata_patch.return_value = mock_metadata
|
||||
mock_get_safe_version_patch.side_effect = lambda repo_id, version: version
|
||||
mock_snapshot_download_patch.side_effect = mock_snapshot_download
|
||||
|
||||
return LeRobotDataset(repo_id=repo_id, root=root, **kwargs)
|
||||
|
||||
return _create_lerobot_dataset
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def empty_lerobot_dataset_factory() -> LeRobotDatasetFactory:
|
||||
return partial(LeRobotDataset.create, repo_id=DUMMY_REPO_ID, fps=DEFAULT_FPS)
|
||||
|
||||
26
tests/fixtures/files.py
vendored
26
tests/fixtures/files.py
vendored
@@ -7,7 +7,13 @@ import pyarrow.compute as pc
|
||||
import pyarrow.parquet as pq
|
||||
import pytest
|
||||
|
||||
from lerobot.common.datasets.utils import EPISODES_PATH, INFO_PATH, STATS_PATH, TASKS_PATH
|
||||
from lerobot.common.datasets.utils import (
|
||||
EPISODES_PATH,
|
||||
EPISODES_STATS_PATH,
|
||||
INFO_PATH,
|
||||
STATS_PATH,
|
||||
TASKS_PATH,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -38,6 +44,20 @@ def stats_path(stats_factory):
|
||||
return _create_stats_json_file
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def episodes_stats_path(episodes_stats_factory):
|
||||
def _create_episodes_stats_jsonl_file(dir: Path, episodes_stats: list[dict] | None = None) -> Path:
|
||||
if not episodes_stats:
|
||||
episodes_stats = episodes_stats_factory()
|
||||
fpath = dir / EPISODES_STATS_PATH
|
||||
fpath.parent.mkdir(parents=True, exist_ok=True)
|
||||
with jsonlines.open(fpath, "w") as writer:
|
||||
writer.write_all(episodes_stats.values())
|
||||
return fpath
|
||||
|
||||
return _create_episodes_stats_jsonl_file
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tasks_path(tasks_factory):
|
||||
def _create_tasks_jsonl_file(dir: Path, tasks: list | None = None) -> Path:
|
||||
@@ -46,7 +66,7 @@ def tasks_path(tasks_factory):
|
||||
fpath = dir / TASKS_PATH
|
||||
fpath.parent.mkdir(parents=True, exist_ok=True)
|
||||
with jsonlines.open(fpath, "w") as writer:
|
||||
writer.write_all(tasks)
|
||||
writer.write_all(tasks.values())
|
||||
return fpath
|
||||
|
||||
return _create_tasks_jsonl_file
|
||||
@@ -60,7 +80,7 @@ def episode_path(episodes_factory):
|
||||
fpath = dir / EPISODES_PATH
|
||||
fpath.parent.mkdir(parents=True, exist_ok=True)
|
||||
with jsonlines.open(fpath, "w") as writer:
|
||||
writer.write_all(episodes)
|
||||
writer.write_all(episodes.values())
|
||||
return fpath
|
||||
|
||||
return _create_episodes_jsonl_file
|
||||
|
||||
21
tests/fixtures/hub.py
vendored
21
tests/fixtures/hub.py
vendored
@@ -4,7 +4,13 @@ import datasets
|
||||
import pytest
|
||||
from huggingface_hub.utils import filter_repo_objects
|
||||
|
||||
from lerobot.common.datasets.utils import EPISODES_PATH, INFO_PATH, STATS_PATH, TASKS_PATH
|
||||
from lerobot.common.datasets.utils import (
|
||||
EPISODES_PATH,
|
||||
EPISODES_STATS_PATH,
|
||||
INFO_PATH,
|
||||
STATS_PATH,
|
||||
TASKS_PATH,
|
||||
)
|
||||
from tests.fixtures.constants import LEROBOT_TEST_DIR
|
||||
|
||||
|
||||
@@ -14,6 +20,8 @@ def mock_snapshot_download_factory(
|
||||
info_path,
|
||||
stats_factory,
|
||||
stats_path,
|
||||
episodes_stats_factory,
|
||||
episodes_stats_path,
|
||||
tasks_factory,
|
||||
tasks_path,
|
||||
episodes_factory,
|
||||
@@ -29,6 +37,7 @@ def mock_snapshot_download_factory(
|
||||
def _mock_snapshot_download_func(
|
||||
info: dict | None = None,
|
||||
stats: dict | None = None,
|
||||
episodes_stats: list[dict] | None = None,
|
||||
tasks: list[dict] | None = None,
|
||||
episodes: list[dict] | None = None,
|
||||
hf_dataset: datasets.Dataset | None = None,
|
||||
@@ -37,6 +46,10 @@ def mock_snapshot_download_factory(
|
||||
info = info_factory()
|
||||
if not stats:
|
||||
stats = stats_factory(features=info["features"])
|
||||
if not episodes_stats:
|
||||
episodes_stats = episodes_stats_factory(
|
||||
features=info["features"], total_episodes=info["total_episodes"]
|
||||
)
|
||||
if not tasks:
|
||||
tasks = tasks_factory(total_tasks=info["total_tasks"])
|
||||
if not episodes:
|
||||
@@ -67,11 +80,11 @@ def mock_snapshot_download_factory(
|
||||
|
||||
# List all possible files
|
||||
all_files = []
|
||||
meta_files = [INFO_PATH, STATS_PATH, TASKS_PATH, EPISODES_PATH]
|
||||
meta_files = [INFO_PATH, STATS_PATH, EPISODES_STATS_PATH, TASKS_PATH, EPISODES_PATH]
|
||||
all_files.extend(meta_files)
|
||||
|
||||
data_files = []
|
||||
for episode_dict in episodes:
|
||||
for episode_dict in episodes.values():
|
||||
ep_idx = episode_dict["episode_index"]
|
||||
ep_chunk = ep_idx // info["chunks_size"]
|
||||
data_path = info["data_path"].format(episode_chunk=ep_chunk, episode_index=ep_idx)
|
||||
@@ -92,6 +105,8 @@ def mock_snapshot_download_factory(
|
||||
_ = info_path(local_dir, info)
|
||||
elif rel_path == STATS_PATH:
|
||||
_ = stats_path(local_dir, stats)
|
||||
elif rel_path == EPISODES_STATS_PATH:
|
||||
_ = episodes_stats_path(local_dir, episodes_stats)
|
||||
elif rel_path == TASKS_PATH:
|
||||
_ = tasks_path(local_dir, tasks)
|
||||
elif rel_path == EPISODES_PATH:
|
||||
|
||||
@@ -182,7 +182,7 @@ def test_camera(request, camera_type, mock):
|
||||
|
||||
@pytest.mark.parametrize("camera_type, mock", TEST_CAMERA_TYPES)
|
||||
@require_camera
|
||||
def test_save_images_from_cameras(tmpdir, request, camera_type, mock):
|
||||
def test_save_images_from_cameras(tmp_path, request, camera_type, mock):
|
||||
# TODO(rcadene): refactor
|
||||
if camera_type == "opencv":
|
||||
from lerobot.common.robot_devices.cameras.opencv import save_images_from_cameras
|
||||
@@ -190,4 +190,4 @@ def test_save_images_from_cameras(tmpdir, request, camera_type, mock):
|
||||
from lerobot.common.robot_devices.cameras.intelrealsense import save_images_from_cameras
|
||||
|
||||
# Small `record_time_s` to speedup unit tests
|
||||
save_images_from_cameras(tmpdir, record_time_s=0.02, mock=mock)
|
||||
save_images_from_cameras(tmp_path, record_time_s=0.02, mock=mock)
|
||||
|
||||
311
tests/test_compute_stats.py
Normal file
311
tests/test_compute_stats.py
Normal file
@@ -0,0 +1,311 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from unittest.mock import patch
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from lerobot.common.datasets.compute_stats import (
|
||||
_assert_type_and_shape,
|
||||
aggregate_feature_stats,
|
||||
aggregate_stats,
|
||||
compute_episode_stats,
|
||||
estimate_num_samples,
|
||||
get_feature_stats,
|
||||
sample_images,
|
||||
sample_indices,
|
||||
)
|
||||
|
||||
|
||||
def mock_load_image_as_numpy(path, dtype, channel_first):
|
||||
return np.ones((3, 32, 32), dtype=dtype) if channel_first else np.ones((32, 32, 3), dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_array():
|
||||
return np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
|
||||
|
||||
def test_estimate_num_samples():
|
||||
assert estimate_num_samples(1) == 1
|
||||
assert estimate_num_samples(10) == 10
|
||||
assert estimate_num_samples(100) == 100
|
||||
assert estimate_num_samples(200) == 100
|
||||
assert estimate_num_samples(1000) == 177
|
||||
assert estimate_num_samples(2000) == 299
|
||||
assert estimate_num_samples(5000) == 594
|
||||
assert estimate_num_samples(10_000) == 1000
|
||||
assert estimate_num_samples(20_000) == 1681
|
||||
assert estimate_num_samples(50_000) == 3343
|
||||
assert estimate_num_samples(500_000) == 10_000
|
||||
|
||||
|
||||
def test_sample_indices():
|
||||
indices = sample_indices(10)
|
||||
assert len(indices) > 0
|
||||
assert indices[0] == 0
|
||||
assert indices[-1] == 9
|
||||
assert len(indices) == estimate_num_samples(10)
|
||||
|
||||
|
||||
@patch("lerobot.common.datasets.compute_stats.load_image_as_numpy", side_effect=mock_load_image_as_numpy)
|
||||
def test_sample_images(mock_load):
|
||||
image_paths = [f"image_{i}.jpg" for i in range(100)]
|
||||
images = sample_images(image_paths)
|
||||
assert isinstance(images, np.ndarray)
|
||||
assert images.shape[1:] == (3, 32, 32)
|
||||
assert images.dtype == np.uint8
|
||||
assert len(images) == estimate_num_samples(100)
|
||||
|
||||
|
||||
def test_get_feature_stats_images():
|
||||
data = np.random.rand(100, 3, 32, 32)
|
||||
stats = get_feature_stats(data, axis=(0, 2, 3), keepdims=True)
|
||||
assert "min" in stats and "max" in stats and "mean" in stats and "std" in stats and "count" in stats
|
||||
np.testing.assert_equal(stats["count"], np.array([100]))
|
||||
assert stats["min"].shape == stats["max"].shape == stats["mean"].shape == stats["std"].shape
|
||||
|
||||
|
||||
def test_get_feature_stats_axis_0_keepdims(sample_array):
|
||||
expected = {
|
||||
"min": np.array([[1, 2, 3]]),
|
||||
"max": np.array([[7, 8, 9]]),
|
||||
"mean": np.array([[4.0, 5.0, 6.0]]),
|
||||
"std": np.array([[2.44948974, 2.44948974, 2.44948974]]),
|
||||
"count": np.array([3]),
|
||||
}
|
||||
result = get_feature_stats(sample_array, axis=(0,), keepdims=True)
|
||||
for key in expected:
|
||||
np.testing.assert_allclose(result[key], expected[key])
|
||||
|
||||
|
||||
def test_get_feature_stats_axis_1(sample_array):
|
||||
expected = {
|
||||
"min": np.array([1, 4, 7]),
|
||||
"max": np.array([3, 6, 9]),
|
||||
"mean": np.array([2.0, 5.0, 8.0]),
|
||||
"std": np.array([0.81649658, 0.81649658, 0.81649658]),
|
||||
"count": np.array([3]),
|
||||
}
|
||||
result = get_feature_stats(sample_array, axis=(1,), keepdims=False)
|
||||
for key in expected:
|
||||
np.testing.assert_allclose(result[key], expected[key])
|
||||
|
||||
|
||||
def test_get_feature_stats_no_axis(sample_array):
|
||||
expected = {
|
||||
"min": np.array(1),
|
||||
"max": np.array(9),
|
||||
"mean": np.array(5.0),
|
||||
"std": np.array(2.5819889),
|
||||
"count": np.array([3]),
|
||||
}
|
||||
result = get_feature_stats(sample_array, axis=None, keepdims=False)
|
||||
for key in expected:
|
||||
np.testing.assert_allclose(result[key], expected[key])
|
||||
|
||||
|
||||
def test_get_feature_stats_empty_array():
|
||||
array = np.array([])
|
||||
with pytest.raises(ValueError):
|
||||
get_feature_stats(array, axis=(0,), keepdims=True)
|
||||
|
||||
|
||||
def test_get_feature_stats_single_value():
|
||||
array = np.array([[1337]])
|
||||
result = get_feature_stats(array, axis=None, keepdims=True)
|
||||
np.testing.assert_equal(result["min"], np.array(1337))
|
||||
np.testing.assert_equal(result["max"], np.array(1337))
|
||||
np.testing.assert_equal(result["mean"], np.array(1337.0))
|
||||
np.testing.assert_equal(result["std"], np.array(0.0))
|
||||
np.testing.assert_equal(result["count"], np.array([1]))
|
||||
|
||||
|
||||
def test_compute_episode_stats():
|
||||
episode_data = {
|
||||
"observation.image": [f"image_{i}.jpg" for i in range(100)],
|
||||
"observation.state": np.random.rand(100, 10),
|
||||
}
|
||||
features = {
|
||||
"observation.image": {"dtype": "image"},
|
||||
"observation.state": {"dtype": "numeric"},
|
||||
}
|
||||
|
||||
with patch(
|
||||
"lerobot.common.datasets.compute_stats.load_image_as_numpy", side_effect=mock_load_image_as_numpy
|
||||
):
|
||||
stats = compute_episode_stats(episode_data, features)
|
||||
|
||||
assert "observation.image" in stats and "observation.state" in stats
|
||||
assert stats["observation.image"]["count"].item() == 100
|
||||
assert stats["observation.state"]["count"].item() == 100
|
||||
assert stats["observation.image"]["mean"].shape == (3, 1, 1)
|
||||
|
||||
|
||||
def test_assert_type_and_shape_valid():
|
||||
valid_stats = [
|
||||
{
|
||||
"feature1": {
|
||||
"min": np.array([1.0]),
|
||||
"max": np.array([10.0]),
|
||||
"mean": np.array([5.0]),
|
||||
"std": np.array([2.0]),
|
||||
"count": np.array([1]),
|
||||
}
|
||||
}
|
||||
]
|
||||
_assert_type_and_shape(valid_stats)
|
||||
|
||||
|
||||
def test_assert_type_and_shape_invalid_type():
|
||||
invalid_stats = [
|
||||
{
|
||||
"feature1": {
|
||||
"min": [1.0], # Not a numpy array
|
||||
"max": np.array([10.0]),
|
||||
"mean": np.array([5.0]),
|
||||
"std": np.array([2.0]),
|
||||
"count": np.array([1]),
|
||||
}
|
||||
}
|
||||
]
|
||||
with pytest.raises(ValueError, match="Stats must be composed of numpy array"):
|
||||
_assert_type_and_shape(invalid_stats)
|
||||
|
||||
|
||||
def test_assert_type_and_shape_invalid_shape():
|
||||
invalid_stats = [
|
||||
{
|
||||
"feature1": {
|
||||
"count": np.array([1, 2]), # Wrong shape
|
||||
}
|
||||
}
|
||||
]
|
||||
with pytest.raises(ValueError, match=r"Shape of 'count' must be \(1\)"):
|
||||
_assert_type_and_shape(invalid_stats)
|
||||
|
||||
|
||||
def test_aggregate_feature_stats():
|
||||
stats_ft_list = [
|
||||
{
|
||||
"min": np.array([1.0]),
|
||||
"max": np.array([10.0]),
|
||||
"mean": np.array([5.0]),
|
||||
"std": np.array([2.0]),
|
||||
"count": np.array([1]),
|
||||
},
|
||||
{
|
||||
"min": np.array([2.0]),
|
||||
"max": np.array([12.0]),
|
||||
"mean": np.array([6.0]),
|
||||
"std": np.array([2.5]),
|
||||
"count": np.array([1]),
|
||||
},
|
||||
]
|
||||
result = aggregate_feature_stats(stats_ft_list)
|
||||
np.testing.assert_allclose(result["min"], np.array([1.0]))
|
||||
np.testing.assert_allclose(result["max"], np.array([12.0]))
|
||||
np.testing.assert_allclose(result["mean"], np.array([5.5]))
|
||||
np.testing.assert_allclose(result["std"], np.array([2.318405]), atol=1e-6)
|
||||
np.testing.assert_allclose(result["count"], np.array([2]))
|
||||
|
||||
|
||||
def test_aggregate_stats():
|
||||
all_stats = [
|
||||
{
|
||||
"observation.image": {
|
||||
"min": [1, 2, 3],
|
||||
"max": [10, 20, 30],
|
||||
"mean": [5.5, 10.5, 15.5],
|
||||
"std": [2.87, 5.87, 8.87],
|
||||
"count": 10,
|
||||
},
|
||||
"observation.state": {"min": 1, "max": 10, "mean": 5.5, "std": 2.87, "count": 10},
|
||||
"extra_key_0": {"min": 5, "max": 25, "mean": 15, "std": 6, "count": 6},
|
||||
},
|
||||
{
|
||||
"observation.image": {
|
||||
"min": [2, 1, 0],
|
||||
"max": [15, 10, 5],
|
||||
"mean": [8.5, 5.5, 2.5],
|
||||
"std": [3.42, 2.42, 1.42],
|
||||
"count": 15,
|
||||
},
|
||||
"observation.state": {"min": 2, "max": 15, "mean": 8.5, "std": 3.42, "count": 15},
|
||||
"extra_key_1": {"min": 0, "max": 20, "mean": 10, "std": 5, "count": 5},
|
||||
},
|
||||
]
|
||||
|
||||
expected_agg_stats = {
|
||||
"observation.image": {
|
||||
"min": [1, 1, 0],
|
||||
"max": [15, 20, 30],
|
||||
"mean": [7.3, 7.5, 7.7],
|
||||
"std": [3.5317, 4.8267, 8.5581],
|
||||
"count": 25,
|
||||
},
|
||||
"observation.state": {
|
||||
"min": 1,
|
||||
"max": 15,
|
||||
"mean": 7.3,
|
||||
"std": 3.5317,
|
||||
"count": 25,
|
||||
},
|
||||
"extra_key_0": {
|
||||
"min": 5,
|
||||
"max": 25,
|
||||
"mean": 15.0,
|
||||
"std": 6.0,
|
||||
"count": 6,
|
||||
},
|
||||
"extra_key_1": {
|
||||
"min": 0,
|
||||
"max": 20,
|
||||
"mean": 10.0,
|
||||
"std": 5.0,
|
||||
"count": 5,
|
||||
},
|
||||
}
|
||||
|
||||
# cast to numpy
|
||||
for ep_stats in all_stats:
|
||||
for fkey, stats in ep_stats.items():
|
||||
for k in stats:
|
||||
stats[k] = np.array(stats[k], dtype=np.int64 if k == "count" else np.float32)
|
||||
if fkey == "observation.image" and k != "count":
|
||||
stats[k] = stats[k].reshape(3, 1, 1) # for normalization on image channels
|
||||
else:
|
||||
stats[k] = stats[k].reshape(1)
|
||||
|
||||
# cast to numpy
|
||||
for fkey, stats in expected_agg_stats.items():
|
||||
for k in stats:
|
||||
stats[k] = np.array(stats[k], dtype=np.int64 if k == "count" else np.float32)
|
||||
if fkey == "observation.image" and k != "count":
|
||||
stats[k] = stats[k].reshape(3, 1, 1) # for normalization on image channels
|
||||
else:
|
||||
stats[k] = stats[k].reshape(1)
|
||||
|
||||
results = aggregate_stats(all_stats)
|
||||
|
||||
for fkey in expected_agg_stats:
|
||||
np.testing.assert_allclose(results[fkey]["min"], expected_agg_stats[fkey]["min"])
|
||||
np.testing.assert_allclose(results[fkey]["max"], expected_agg_stats[fkey]["max"])
|
||||
np.testing.assert_allclose(results[fkey]["mean"], expected_agg_stats[fkey]["mean"])
|
||||
np.testing.assert_allclose(
|
||||
results[fkey]["std"], expected_agg_stats[fkey]["std"], atol=1e-04, rtol=1e-04
|
||||
)
|
||||
np.testing.assert_allclose(results[fkey]["count"], expected_agg_stats[fkey]["count"])
|
||||
@@ -24,7 +24,6 @@ pytest -sx 'tests/test_control_robot.py::test_teleoperate[aloha-True]'
|
||||
"""
|
||||
|
||||
import multiprocessing
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
@@ -45,7 +44,7 @@ from tests.utils import DEVICE, TEST_ROBOT_TYPES, mock_calibration_dir, require_
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
|
||||
@require_robot
|
||||
def test_teleoperate(tmpdir, request, robot_type, mock):
|
||||
def test_teleoperate(tmp_path, request, robot_type, mock):
|
||||
robot_kwargs = {"robot_type": robot_type, "mock": mock}
|
||||
|
||||
if mock and robot_type != "aloha":
|
||||
@@ -53,8 +52,7 @@ def test_teleoperate(tmpdir, request, robot_type, mock):
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
# and avoid writing calibration files in user .cache/calibration folder
|
||||
tmpdir = Path(tmpdir)
|
||||
calibration_dir = tmpdir / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
mock_calibration_dir(calibration_dir)
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
else:
|
||||
@@ -70,15 +68,14 @@ def test_teleoperate(tmpdir, request, robot_type, mock):
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
|
||||
@require_robot
|
||||
def test_calibrate(tmpdir, request, robot_type, mock):
|
||||
def test_calibrate(tmp_path, request, robot_type, mock):
|
||||
robot_kwargs = {"robot_type": robot_type, "mock": mock}
|
||||
|
||||
if mock:
|
||||
request.getfixturevalue("patch_builtins_input")
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
tmpdir = Path(tmpdir)
|
||||
calibration_dir = tmpdir / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
|
||||
robot = make_robot(**robot_kwargs)
|
||||
@@ -89,7 +86,7 @@ def test_calibrate(tmpdir, request, robot_type, mock):
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
|
||||
@require_robot
|
||||
def test_record_without_cameras(tmpdir, request, robot_type, mock):
|
||||
def test_record_without_cameras(tmp_path, request, robot_type, mock):
|
||||
robot_kwargs = {"robot_type": robot_type, "mock": mock}
|
||||
|
||||
# Avoid using cameras
|
||||
@@ -100,7 +97,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
# and avoid writing calibration files in user .cache/calibration folder
|
||||
calibration_dir = Path(tmpdir) / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
mock_calibration_dir(calibration_dir)
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
else:
|
||||
@@ -108,7 +105,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):
|
||||
pass
|
||||
|
||||
repo_id = "lerobot/debug"
|
||||
root = Path(tmpdir) / "data" / repo_id
|
||||
root = tmp_path / "data" / repo_id
|
||||
single_task = "Do something."
|
||||
|
||||
robot = make_robot(**robot_kwargs)
|
||||
@@ -121,7 +118,6 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):
|
||||
episode_time_s=1,
|
||||
reset_time_s=0.1,
|
||||
num_episodes=2,
|
||||
run_compute_stats=False,
|
||||
push_to_hub=False,
|
||||
video=False,
|
||||
play_sounds=False,
|
||||
@@ -131,8 +127,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock):
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
|
||||
@require_robot
|
||||
def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
tmpdir = Path(tmpdir)
|
||||
def test_record_and_replay_and_policy(tmp_path, request, robot_type, mock):
|
||||
robot_kwargs = {"robot_type": robot_type, "mock": mock}
|
||||
|
||||
if mock and robot_type != "aloha":
|
||||
@@ -140,7 +135,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
# and avoid writing calibration files in user .cache/calibration folder
|
||||
calibration_dir = tmpdir / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
mock_calibration_dir(calibration_dir)
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
else:
|
||||
@@ -148,7 +143,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
pass
|
||||
|
||||
repo_id = "lerobot_test/debug"
|
||||
root = tmpdir / "data" / repo_id
|
||||
root = tmp_path / "data" / repo_id
|
||||
single_task = "Do something."
|
||||
|
||||
robot = make_robot(**robot_kwargs)
|
||||
@@ -172,15 +167,13 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
assert dataset.meta.total_episodes == 2
|
||||
assert len(dataset) == 2
|
||||
|
||||
replay_cfg = ReplayControlConfig(
|
||||
episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False, local_files_only=True
|
||||
)
|
||||
replay_cfg = ReplayControlConfig(episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False)
|
||||
replay(robot, replay_cfg)
|
||||
|
||||
policy_cfg = ACTConfig()
|
||||
policy = make_policy(policy_cfg, ds_meta=dataset.meta, device=DEVICE)
|
||||
|
||||
out_dir = tmpdir / "logger"
|
||||
out_dir = tmp_path / "logger"
|
||||
|
||||
pretrained_policy_path = out_dir / "checkpoints/last/pretrained_model"
|
||||
policy.save_pretrained(pretrained_policy_path)
|
||||
@@ -207,7 +200,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
num_image_writer_processes = 0
|
||||
|
||||
eval_repo_id = "lerobot/eval_debug"
|
||||
eval_root = tmpdir / "data" / eval_repo_id
|
||||
eval_root = tmp_path / "data" / eval_repo_id
|
||||
|
||||
rec_eval_cfg = RecordControlConfig(
|
||||
repo_id=eval_repo_id,
|
||||
@@ -218,7 +211,6 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
episode_time_s=1,
|
||||
reset_time_s=0.1,
|
||||
num_episodes=2,
|
||||
run_compute_stats=False,
|
||||
push_to_hub=False,
|
||||
video=False,
|
||||
display_cameras=False,
|
||||
@@ -240,7 +232,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock):
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
|
||||
@require_robot
|
||||
def test_resume_record(tmpdir, request, robot_type, mock):
|
||||
def test_resume_record(tmp_path, request, robot_type, mock):
|
||||
robot_kwargs = {"robot_type": robot_type, "mock": mock}
|
||||
|
||||
if mock and robot_type != "aloha":
|
||||
@@ -248,7 +240,7 @@ def test_resume_record(tmpdir, request, robot_type, mock):
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
# and avoid writing calibration files in user .cache/calibration folder
|
||||
calibration_dir = tmpdir / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
mock_calibration_dir(calibration_dir)
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
else:
|
||||
@@ -258,7 +250,7 @@ def test_resume_record(tmpdir, request, robot_type, mock):
|
||||
robot = make_robot(**robot_kwargs)
|
||||
|
||||
repo_id = "lerobot/debug"
|
||||
root = Path(tmpdir) / "data" / repo_id
|
||||
root = tmp_path / "data" / repo_id
|
||||
single_task = "Do something."
|
||||
|
||||
rec_cfg = RecordControlConfig(
|
||||
@@ -272,8 +264,6 @@ def test_resume_record(tmpdir, request, robot_type, mock):
|
||||
video=False,
|
||||
display_cameras=False,
|
||||
play_sounds=False,
|
||||
run_compute_stats=False,
|
||||
local_files_only=True,
|
||||
num_episodes=1,
|
||||
)
|
||||
|
||||
@@ -291,7 +281,7 @@ def test_resume_record(tmpdir, request, robot_type, mock):
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
|
||||
@require_robot
|
||||
def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
|
||||
def test_record_with_event_rerecord_episode(tmp_path, request, robot_type, mock):
|
||||
robot_kwargs = {"robot_type": robot_type, "mock": mock}
|
||||
|
||||
if mock and robot_type != "aloha":
|
||||
@@ -299,7 +289,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
# and avoid writing calibration files in user .cache/calibration folder
|
||||
calibration_dir = tmpdir / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
mock_calibration_dir(calibration_dir)
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
else:
|
||||
@@ -316,7 +306,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
|
||||
mock_listener.return_value = (None, mock_events)
|
||||
|
||||
repo_id = "lerobot/debug"
|
||||
root = Path(tmpdir) / "data" / repo_id
|
||||
root = tmp_path / "data" / repo_id
|
||||
single_task = "Do something."
|
||||
|
||||
rec_cfg = RecordControlConfig(
|
||||
@@ -331,7 +321,6 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
|
||||
video=False,
|
||||
display_cameras=False,
|
||||
play_sounds=False,
|
||||
run_compute_stats=False,
|
||||
)
|
||||
dataset = record(robot, rec_cfg)
|
||||
|
||||
@@ -342,7 +331,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock):
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", [("koch", True)])
|
||||
@require_robot
|
||||
def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
|
||||
def test_record_with_event_exit_early(tmp_path, request, robot_type, mock):
|
||||
robot_kwargs = {"robot_type": robot_type, "mock": mock}
|
||||
|
||||
if mock:
|
||||
@@ -350,7 +339,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
# and avoid writing calibration files in user .cache/calibration folder
|
||||
calibration_dir = tmpdir / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
mock_calibration_dir(calibration_dir)
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
else:
|
||||
@@ -367,7 +356,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
|
||||
mock_listener.return_value = (None, mock_events)
|
||||
|
||||
repo_id = "lerobot/debug"
|
||||
root = Path(tmpdir) / "data" / repo_id
|
||||
root = tmp_path / "data" / repo_id
|
||||
single_task = "Do something."
|
||||
|
||||
rec_cfg = RecordControlConfig(
|
||||
@@ -382,7 +371,6 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
|
||||
video=False,
|
||||
display_cameras=False,
|
||||
play_sounds=False,
|
||||
run_compute_stats=False,
|
||||
)
|
||||
|
||||
dataset = record(robot, rec_cfg)
|
||||
@@ -395,7 +383,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock):
|
||||
"robot_type, mock, num_image_writer_processes", [("koch", True, 0), ("koch", True, 1)]
|
||||
)
|
||||
@require_robot
|
||||
def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num_image_writer_processes):
|
||||
def test_record_with_event_stop_recording(tmp_path, request, robot_type, mock, num_image_writer_processes):
|
||||
robot_kwargs = {"robot_type": robot_type, "mock": mock}
|
||||
|
||||
if mock:
|
||||
@@ -403,7 +391,7 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
# and avoid writing calibration files in user .cache/calibration folder
|
||||
calibration_dir = tmpdir / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
mock_calibration_dir(calibration_dir)
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
else:
|
||||
@@ -420,7 +408,7 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num
|
||||
mock_listener.return_value = (None, mock_events)
|
||||
|
||||
repo_id = "lerobot/debug"
|
||||
root = Path(tmpdir) / "data" / repo_id
|
||||
root = tmp_path / "data" / repo_id
|
||||
single_task = "Do something."
|
||||
|
||||
rec_cfg = RecordControlConfig(
|
||||
@@ -436,7 +424,6 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num
|
||||
video=False,
|
||||
display_cameras=False,
|
||||
play_sounds=False,
|
||||
run_compute_stats=False,
|
||||
num_image_writer_processes=num_image_writer_processes,
|
||||
)
|
||||
|
||||
|
||||
@@ -15,24 +15,21 @@
|
||||
# limitations under the License.
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from copy import deepcopy
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
|
||||
import einops
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
from datasets import Dataset
|
||||
from huggingface_hub import HfApi
|
||||
from PIL import Image
|
||||
from safetensors.torch import load_file
|
||||
|
||||
import lerobot
|
||||
from lerobot.common.datasets.compute_stats import (
|
||||
aggregate_stats,
|
||||
compute_stats,
|
||||
get_stats_einops_patterns,
|
||||
)
|
||||
from lerobot.common.datasets.factory import make_dataset
|
||||
from lerobot.common.datasets.image_writer import image_array_to_pil_image
|
||||
from lerobot.common.datasets.lerobot_dataset import (
|
||||
LeRobotDataset,
|
||||
MultiLeRobotDataset,
|
||||
@@ -40,20 +37,34 @@ from lerobot.common.datasets.lerobot_dataset import (
|
||||
from lerobot.common.datasets.utils import (
|
||||
create_branch,
|
||||
flatten_dict,
|
||||
hf_transform_to_torch,
|
||||
unflatten_dict,
|
||||
)
|
||||
from lerobot.common.envs.factory import make_env_config
|
||||
from lerobot.common.policies.factory import make_policy_config
|
||||
from lerobot.common.robot_devices.robots.utils import make_robot
|
||||
from lerobot.common.utils.random_utils import seeded_context
|
||||
from lerobot.configs.default import DatasetConfig
|
||||
from lerobot.configs.train import TrainPipelineConfig
|
||||
from tests.fixtures.constants import DUMMY_REPO_ID
|
||||
from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_REPO_ID
|
||||
from tests.utils import DEVICE, require_x86_64_kernel
|
||||
|
||||
|
||||
def test_same_attributes_defined(lerobot_dataset_factory, tmp_path):
|
||||
@pytest.fixture
|
||||
def image_dataset(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {
|
||||
"image": {
|
||||
"dtype": "image",
|
||||
"shape": DUMMY_CHW,
|
||||
"names": [
|
||||
"channels",
|
||||
"height",
|
||||
"width",
|
||||
],
|
||||
}
|
||||
}
|
||||
return empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
|
||||
|
||||
def test_same_attributes_defined(tmp_path, lerobot_dataset_factory):
|
||||
"""
|
||||
Instantiate a LeRobotDataset both ways with '__init__()' and 'create()' and verify that instantiated
|
||||
objects have the same sets of attributes defined.
|
||||
@@ -66,24 +77,20 @@ def test_same_attributes_defined(lerobot_dataset_factory, tmp_path):
|
||||
root_init = tmp_path / "init"
|
||||
dataset_init = lerobot_dataset_factory(root=root_init)
|
||||
|
||||
# Access the '_hub_version' cached_property in both instances to force its creation
|
||||
_ = dataset_init.meta._hub_version
|
||||
_ = dataset_create.meta._hub_version
|
||||
|
||||
init_attr = set(vars(dataset_init).keys())
|
||||
create_attr = set(vars(dataset_create).keys())
|
||||
|
||||
assert init_attr == create_attr
|
||||
|
||||
|
||||
def test_dataset_initialization(lerobot_dataset_factory, tmp_path):
|
||||
def test_dataset_initialization(tmp_path, lerobot_dataset_factory):
|
||||
kwargs = {
|
||||
"repo_id": DUMMY_REPO_ID,
|
||||
"total_episodes": 10,
|
||||
"total_frames": 400,
|
||||
"episodes": [2, 5, 6],
|
||||
}
|
||||
dataset = lerobot_dataset_factory(root=tmp_path, **kwargs)
|
||||
dataset = lerobot_dataset_factory(root=tmp_path / "test", **kwargs)
|
||||
|
||||
assert dataset.repo_id == kwargs["repo_id"]
|
||||
assert dataset.meta.total_episodes == kwargs["total_episodes"]
|
||||
@@ -93,12 +100,232 @@ def test_dataset_initialization(lerobot_dataset_factory, tmp_path):
|
||||
assert dataset.num_frames == len(dataset)
|
||||
|
||||
|
||||
def test_add_frame_missing_task(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
with pytest.raises(
|
||||
ValueError, match="Feature mismatch in `frame` dictionary:\nMissing features: {'task'}\n"
|
||||
):
|
||||
dataset.add_frame({"state": torch.randn(1)})
|
||||
|
||||
|
||||
def test_add_frame_missing_feature(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
with pytest.raises(
|
||||
ValueError, match="Feature mismatch in `frame` dictionary:\nMissing features: {'state'}\n"
|
||||
):
|
||||
dataset.add_frame({"task": "Dummy task"})
|
||||
|
||||
|
||||
def test_add_frame_extra_feature(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
with pytest.raises(
|
||||
ValueError, match="Feature mismatch in `frame` dictionary:\nExtra features: {'extra'}\n"
|
||||
):
|
||||
dataset.add_frame({"state": torch.randn(1), "task": "Dummy task", "extra": "dummy_extra"})
|
||||
|
||||
|
||||
def test_add_frame_wrong_type(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
with pytest.raises(
|
||||
ValueError, match="The feature 'state' of dtype 'float16' is not of the expected dtype 'float32'.\n"
|
||||
):
|
||||
dataset.add_frame({"state": torch.randn(1, dtype=torch.float16), "task": "Dummy task"})
|
||||
|
||||
|
||||
def test_add_frame_wrong_shape(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (2,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=re.escape("The feature 'state' of shape '(1,)' does not have the expected shape '(2,)'.\n"),
|
||||
):
|
||||
dataset.add_frame({"state": torch.randn(1), "task": "Dummy task"})
|
||||
|
||||
|
||||
def test_add_frame_wrong_shape_python_float(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=re.escape(
|
||||
"The feature 'state' is not a 'np.ndarray'. Expected type is 'float32', but type '<class 'float'>' provided instead.\n"
|
||||
),
|
||||
):
|
||||
dataset.add_frame({"state": 1.0, "task": "Dummy task"})
|
||||
|
||||
|
||||
def test_add_frame_wrong_shape_torch_ndim_0(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=re.escape("The feature 'state' of shape '()' does not have the expected shape '(1,)'.\n"),
|
||||
):
|
||||
dataset.add_frame({"state": torch.tensor(1.0), "task": "Dummy task"})
|
||||
|
||||
|
||||
def test_add_frame_wrong_shape_numpy_ndim_0(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=re.escape(
|
||||
"The feature 'state' is not a 'np.ndarray'. Expected type is 'float32', but type '<class 'numpy.float32'>' provided instead.\n"
|
||||
),
|
||||
):
|
||||
dataset.add_frame({"state": np.float32(1.0), "task": "Dummy task"})
|
||||
|
||||
|
||||
def test_add_frame(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
dataset.add_frame({"state": torch.randn(1), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert len(dataset) == 1
|
||||
assert dataset[0]["task"] == "Dummy task"
|
||||
assert dataset[0]["task_index"] == 0
|
||||
assert dataset[0]["state"].ndim == 0
|
||||
|
||||
|
||||
def test_add_frame_state_1d(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (2,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
dataset.add_frame({"state": torch.randn(2), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["state"].shape == torch.Size([2])
|
||||
|
||||
|
||||
def test_add_frame_state_2d(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (2, 4), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
dataset.add_frame({"state": torch.randn(2, 4), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["state"].shape == torch.Size([2, 4])
|
||||
|
||||
|
||||
def test_add_frame_state_3d(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (2, 4, 3), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
dataset.add_frame({"state": torch.randn(2, 4, 3), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["state"].shape == torch.Size([2, 4, 3])
|
||||
|
||||
|
||||
def test_add_frame_state_4d(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
dataset.add_frame({"state": torch.randn(2, 4, 3, 5), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5])
|
||||
|
||||
|
||||
def test_add_frame_state_5d(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5, 1), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
dataset.add_frame({"state": torch.randn(2, 4, 3, 5, 1), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5, 1])
|
||||
|
||||
|
||||
def test_add_frame_state_numpy(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
dataset.add_frame({"state": np.array([1], dtype=np.float32), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["state"].ndim == 0
|
||||
|
||||
|
||||
def test_add_frame_string(tmp_path, empty_lerobot_dataset_factory):
|
||||
features = {"caption": {"dtype": "string", "shape": (1,), "names": None}}
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
dataset.add_frame({"caption": "Dummy caption", "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["caption"] == "Dummy caption"
|
||||
|
||||
|
||||
def test_add_frame_image_wrong_shape(image_dataset):
|
||||
dataset = image_dataset
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=re.escape(
|
||||
"The feature 'image' of shape '(3, 128, 96)' does not have the expected shape '(3, 96, 128)' or '(96, 128, 3)'.\n"
|
||||
),
|
||||
):
|
||||
c, h, w = DUMMY_CHW
|
||||
dataset.add_frame({"image": torch.randn(c, w, h), "task": "Dummy task"})
|
||||
|
||||
|
||||
def test_add_frame_image_wrong_range(image_dataset):
|
||||
"""This test will display the following error message from a thread:
|
||||
```
|
||||
Error writing image ...test_add_frame_image_wrong_ran0/test/images/image/episode_000000/frame_000000.png:
|
||||
The image data type is float, which requires values in the range [0.0, 1.0]. However, the provided range is [0.009678772038470007, 254.9776492089887].
|
||||
Please adjust the range or provide a uint8 image with values in the range [0, 255]
|
||||
```
|
||||
Hence the image won't be saved on disk and save_episode will raise `FileNotFoundError`.
|
||||
"""
|
||||
dataset = image_dataset
|
||||
dataset.add_frame({"image": np.random.rand(*DUMMY_CHW) * 255, "task": "Dummy task"})
|
||||
with pytest.raises(FileNotFoundError):
|
||||
dataset.save_episode()
|
||||
|
||||
|
||||
def test_add_frame_image(image_dataset):
|
||||
dataset = image_dataset
|
||||
dataset.add_frame({"image": np.random.rand(*DUMMY_CHW), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
|
||||
|
||||
|
||||
def test_add_frame_image_h_w_c(image_dataset):
|
||||
dataset = image_dataset
|
||||
dataset.add_frame({"image": np.random.rand(*DUMMY_HWC), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
|
||||
|
||||
|
||||
def test_add_frame_image_uint8(image_dataset):
|
||||
dataset = image_dataset
|
||||
image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8)
|
||||
dataset.add_frame({"image": image, "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
|
||||
|
||||
|
||||
def test_add_frame_image_pil(image_dataset):
|
||||
dataset = image_dataset
|
||||
image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8)
|
||||
dataset.add_frame({"image": Image.fromarray(image), "task": "Dummy task"})
|
||||
dataset.save_episode()
|
||||
|
||||
assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW)
|
||||
|
||||
|
||||
def test_image_array_to_pil_image_wrong_range_float_0_255():
|
||||
image = np.random.rand(*DUMMY_HWC) * 255
|
||||
with pytest.raises(ValueError):
|
||||
image_array_to_pil_image(image)
|
||||
|
||||
|
||||
# TODO(aliberts):
|
||||
# - [ ] test various attributes & state from init and create
|
||||
# - [ ] test init with episodes and check num_frames
|
||||
# - [ ] test add_frame
|
||||
# - [ ] test add_episode
|
||||
# - [ ] test consolidate
|
||||
# - [ ] test push_to_hub
|
||||
# - [ ] test smaller methods
|
||||
|
||||
@@ -210,67 +437,6 @@ def test_multidataset_frames():
|
||||
assert torch.equal(sub_dataset_item[k], dataset_item[k])
|
||||
|
||||
|
||||
# TODO(aliberts, rcadene): Refactor and move this to a tests/test_compute_stats.py
|
||||
def test_compute_stats_on_xarm():
|
||||
"""Check that the statistics are computed correctly according to the stats_patterns property.
|
||||
|
||||
We compare with taking a straight min, mean, max, std of all the data in one pass (which we can do
|
||||
because we are working with a small dataset).
|
||||
"""
|
||||
# TODO(rcadene, aliberts): remove dataset download
|
||||
dataset = LeRobotDataset("lerobot/xarm_lift_medium", episodes=[0])
|
||||
|
||||
# reduce size of dataset sample on which stats compute is tested to 10 frames
|
||||
dataset.hf_dataset = dataset.hf_dataset.select(range(10))
|
||||
|
||||
# Note: we set the batch size to be smaller than the whole dataset to make sure we are testing batched
|
||||
# computation of the statistics. While doing this, we also make sure it works when we don't divide the
|
||||
# dataset into even batches.
|
||||
computed_stats = compute_stats(dataset, batch_size=int(len(dataset) * 0.25), num_workers=0)
|
||||
|
||||
# get einops patterns to aggregate batches and compute statistics
|
||||
stats_patterns = get_stats_einops_patterns(dataset)
|
||||
|
||||
# get all frames from the dataset in the same dtype and range as during compute_stats
|
||||
dataloader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
num_workers=0,
|
||||
batch_size=len(dataset),
|
||||
shuffle=False,
|
||||
)
|
||||
full_batch = next(iter(dataloader))
|
||||
|
||||
# compute stats based on all frames from the dataset without any batching
|
||||
expected_stats = {}
|
||||
for k, pattern in stats_patterns.items():
|
||||
full_batch[k] = full_batch[k].float()
|
||||
expected_stats[k] = {}
|
||||
expected_stats[k]["mean"] = einops.reduce(full_batch[k], pattern, "mean")
|
||||
expected_stats[k]["std"] = torch.sqrt(
|
||||
einops.reduce((full_batch[k] - expected_stats[k]["mean"]) ** 2, pattern, "mean")
|
||||
)
|
||||
expected_stats[k]["min"] = einops.reduce(full_batch[k], pattern, "min")
|
||||
expected_stats[k]["max"] = einops.reduce(full_batch[k], pattern, "max")
|
||||
|
||||
# test computed stats match expected stats
|
||||
for k in stats_patterns:
|
||||
assert torch.allclose(computed_stats[k]["mean"], expected_stats[k]["mean"])
|
||||
assert torch.allclose(computed_stats[k]["std"], expected_stats[k]["std"])
|
||||
assert torch.allclose(computed_stats[k]["min"], expected_stats[k]["min"])
|
||||
assert torch.allclose(computed_stats[k]["max"], expected_stats[k]["max"])
|
||||
|
||||
# load stats used during training which are expected to match the ones returned by computed_stats
|
||||
loaded_stats = dataset.meta.stats # noqa: F841
|
||||
|
||||
# TODO(rcadene): we can't test this because expected_stats is computed on a subset
|
||||
# # test loaded stats match expected stats
|
||||
# for k in stats_patterns:
|
||||
# assert torch.allclose(loaded_stats[k]["mean"], expected_stats[k]["mean"])
|
||||
# assert torch.allclose(loaded_stats[k]["std"], expected_stats[k]["std"])
|
||||
# assert torch.allclose(loaded_stats[k]["min"], expected_stats[k]["min"])
|
||||
# assert torch.allclose(loaded_stats[k]["max"], expected_stats[k]["max"])
|
||||
|
||||
|
||||
# TODO(aliberts): Move to more appropriate location
|
||||
def test_flatten_unflatten_dict():
|
||||
d = {
|
||||
@@ -374,35 +540,6 @@ def test_backward_compatibility(repo_id):
|
||||
# load_and_compare(i - 1)
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO after fix multidataset")
|
||||
def test_multidataset_aggregate_stats():
|
||||
"""Makes 3 basic datasets and checks that aggregate stats are computed correctly."""
|
||||
with seeded_context(0):
|
||||
data_a = torch.rand(30, dtype=torch.float32)
|
||||
data_b = torch.rand(20, dtype=torch.float32)
|
||||
data_c = torch.rand(20, dtype=torch.float32)
|
||||
|
||||
hf_dataset_1 = Dataset.from_dict(
|
||||
{"a": data_a[:10], "b": data_b[:10], "c": data_c[:10], "index": torch.arange(10)}
|
||||
)
|
||||
hf_dataset_1.set_transform(hf_transform_to_torch)
|
||||
hf_dataset_2 = Dataset.from_dict({"a": data_a[10:20], "b": data_b[10:], "index": torch.arange(10)})
|
||||
hf_dataset_2.set_transform(hf_transform_to_torch)
|
||||
hf_dataset_3 = Dataset.from_dict({"a": data_a[20:], "c": data_c[10:], "index": torch.arange(10)})
|
||||
hf_dataset_3.set_transform(hf_transform_to_torch)
|
||||
dataset_1 = LeRobotDataset.from_preloaded("d1", hf_dataset=hf_dataset_1)
|
||||
dataset_1.stats = compute_stats(dataset_1, batch_size=len(hf_dataset_1), num_workers=0)
|
||||
dataset_2 = LeRobotDataset.from_preloaded("d2", hf_dataset=hf_dataset_2)
|
||||
dataset_2.stats = compute_stats(dataset_2, batch_size=len(hf_dataset_2), num_workers=0)
|
||||
dataset_3 = LeRobotDataset.from_preloaded("d3", hf_dataset=hf_dataset_3)
|
||||
dataset_3.stats = compute_stats(dataset_3, batch_size=len(hf_dataset_3), num_workers=0)
|
||||
stats = aggregate_stats([dataset_1, dataset_2, dataset_3])
|
||||
for data_key, data in zip(["a", "b", "c"], [data_a, data_b, data_c], strict=True):
|
||||
for agg_fn in ["mean", "min", "max"]:
|
||||
assert torch.allclose(stats[data_key][agg_fn], einops.reduce(data, "n -> 1", agg_fn))
|
||||
assert torch.allclose(stats[data_key]["std"], torch.std(data, correction=0))
|
||||
|
||||
|
||||
@pytest.mark.skip("Requires internet access")
|
||||
def test_create_branch():
|
||||
api = HfApi()
|
||||
@@ -431,9 +568,9 @@ def test_create_branch():
|
||||
|
||||
def test_dataset_feature_with_forward_slash_raises_error():
|
||||
# make sure dir does not exist
|
||||
from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME
|
||||
from lerobot.common.constants import HF_LEROBOT_HOME
|
||||
|
||||
dataset_dir = LEROBOT_HOME / "lerobot/test/with/slash"
|
||||
dataset_dir = HF_LEROBOT_HOME / "lerobot/test/with/slash"
|
||||
# make sure does not exist
|
||||
if dataset_dir.exists():
|
||||
dataset_dir.rmdir()
|
||||
|
||||
@@ -1,55 +1,78 @@
|
||||
from itertools import accumulate
|
||||
|
||||
import datasets
|
||||
import numpy as np
|
||||
import pyarrow.compute as pc
|
||||
import pytest
|
||||
import torch
|
||||
from datasets import Dataset
|
||||
|
||||
from lerobot.common.datasets.utils import (
|
||||
calculate_episode_data_index,
|
||||
check_delta_timestamps,
|
||||
check_timestamps_sync,
|
||||
get_delta_indices,
|
||||
hf_transform_to_torch,
|
||||
)
|
||||
from tests.fixtures.constants import DUMMY_MOTOR_FEATURES
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def synced_hf_dataset_factory(hf_dataset_factory):
|
||||
def _create_synced_hf_dataset(fps: int = 30) -> Dataset:
|
||||
return hf_dataset_factory(fps=fps)
|
||||
def calculate_total_episode(
|
||||
hf_dataset: datasets.Dataset, raise_if_not_contiguous: bool = True
|
||||
) -> dict[str, torch.Tensor]:
|
||||
episode_indices = sorted(hf_dataset.unique("episode_index"))
|
||||
total_episodes = len(episode_indices)
|
||||
if raise_if_not_contiguous and episode_indices != list(range(total_episodes)):
|
||||
raise ValueError("episode_index values are not sorted and contiguous.")
|
||||
return total_episodes
|
||||
|
||||
return _create_synced_hf_dataset
|
||||
|
||||
def calculate_episode_data_index(hf_dataset: datasets.Dataset) -> dict[str, np.ndarray]:
|
||||
episode_lengths = []
|
||||
table = hf_dataset.data.table
|
||||
total_episodes = calculate_total_episode(hf_dataset)
|
||||
for ep_idx in range(total_episodes):
|
||||
ep_table = table.filter(pc.equal(table["episode_index"], ep_idx))
|
||||
episode_lengths.insert(ep_idx, len(ep_table))
|
||||
|
||||
cumulative_lenghts = list(accumulate(episode_lengths))
|
||||
return {
|
||||
"from": np.array([0] + cumulative_lenghts[:-1], dtype=np.int64),
|
||||
"to": np.array(cumulative_lenghts, dtype=np.int64),
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def unsynced_hf_dataset_factory(synced_hf_dataset_factory):
|
||||
def _create_unsynced_hf_dataset(fps: int = 30, tolerance_s: float = 1e-4) -> Dataset:
|
||||
hf_dataset = synced_hf_dataset_factory(fps=fps)
|
||||
features = hf_dataset.features
|
||||
df = hf_dataset.to_pandas()
|
||||
dtype = df["timestamp"].dtype # This is to avoid pandas type warning
|
||||
# Modify a single timestamp just outside tolerance
|
||||
df.at[30, "timestamp"] = dtype.type(df.at[30, "timestamp"] + (tolerance_s * 1.1))
|
||||
unsynced_hf_dataset = Dataset.from_pandas(df, features=features)
|
||||
unsynced_hf_dataset.set_transform(hf_transform_to_torch)
|
||||
return unsynced_hf_dataset
|
||||
def synced_timestamps_factory(hf_dataset_factory):
|
||||
def _create_synced_timestamps(fps: int = 30) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
hf_dataset = hf_dataset_factory(fps=fps)
|
||||
timestamps = torch.stack(hf_dataset["timestamp"]).numpy()
|
||||
episode_indices = torch.stack(hf_dataset["episode_index"]).numpy()
|
||||
episode_data_index = calculate_episode_data_index(hf_dataset)
|
||||
return timestamps, episode_indices, episode_data_index
|
||||
|
||||
return _create_unsynced_hf_dataset
|
||||
return _create_synced_timestamps
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def slightly_off_hf_dataset_factory(synced_hf_dataset_factory):
|
||||
def _create_slightly_off_hf_dataset(fps: int = 30, tolerance_s: float = 1e-4) -> Dataset:
|
||||
hf_dataset = synced_hf_dataset_factory(fps=fps)
|
||||
features = hf_dataset.features
|
||||
df = hf_dataset.to_pandas()
|
||||
dtype = df["timestamp"].dtype # This is to avoid pandas type warning
|
||||
# Modify a single timestamp just inside tolerance
|
||||
df.at[30, "timestamp"] = dtype.type(df.at[30, "timestamp"] + (tolerance_s * 0.9))
|
||||
unsynced_hf_dataset = Dataset.from_pandas(df, features=features)
|
||||
unsynced_hf_dataset.set_transform(hf_transform_to_torch)
|
||||
return unsynced_hf_dataset
|
||||
def unsynced_timestamps_factory(synced_timestamps_factory):
|
||||
def _create_unsynced_timestamps(
|
||||
fps: int = 30, tolerance_s: float = 1e-4
|
||||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
timestamps, episode_indices, episode_data_index = synced_timestamps_factory(fps=fps)
|
||||
timestamps[30] += tolerance_s * 1.1 # Modify a single timestamp just outside tolerance
|
||||
return timestamps, episode_indices, episode_data_index
|
||||
|
||||
return _create_slightly_off_hf_dataset
|
||||
return _create_unsynced_timestamps
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def slightly_off_timestamps_factory(synced_timestamps_factory):
|
||||
def _create_slightly_off_timestamps(
|
||||
fps: int = 30, tolerance_s: float = 1e-4
|
||||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
timestamps, episode_indices, episode_data_index = synced_timestamps_factory(fps=fps)
|
||||
timestamps[30] += tolerance_s * 0.9 # Modify a single timestamp just inside tolerance
|
||||
return timestamps, episode_indices, episode_data_index
|
||||
|
||||
return _create_slightly_off_timestamps
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@@ -100,42 +123,42 @@ def delta_indices_factory():
|
||||
return _delta_indices
|
||||
|
||||
|
||||
def test_check_timestamps_sync_synced(synced_hf_dataset_factory):
|
||||
def test_check_timestamps_sync_synced(synced_timestamps_factory):
|
||||
fps = 30
|
||||
tolerance_s = 1e-4
|
||||
synced_hf_dataset = synced_hf_dataset_factory(fps)
|
||||
episode_data_index = calculate_episode_data_index(synced_hf_dataset)
|
||||
timestamps, ep_idx, ep_data_index = synced_timestamps_factory(fps)
|
||||
result = check_timestamps_sync(
|
||||
hf_dataset=synced_hf_dataset,
|
||||
episode_data_index=episode_data_index,
|
||||
timestamps=timestamps,
|
||||
episode_indices=ep_idx,
|
||||
episode_data_index=ep_data_index,
|
||||
fps=fps,
|
||||
tolerance_s=tolerance_s,
|
||||
)
|
||||
assert result is True
|
||||
|
||||
|
||||
def test_check_timestamps_sync_unsynced(unsynced_hf_dataset_factory):
|
||||
def test_check_timestamps_sync_unsynced(unsynced_timestamps_factory):
|
||||
fps = 30
|
||||
tolerance_s = 1e-4
|
||||
unsynced_hf_dataset = unsynced_hf_dataset_factory(fps, tolerance_s)
|
||||
episode_data_index = calculate_episode_data_index(unsynced_hf_dataset)
|
||||
timestamps, ep_idx, ep_data_index = unsynced_timestamps_factory(fps, tolerance_s)
|
||||
with pytest.raises(ValueError):
|
||||
check_timestamps_sync(
|
||||
hf_dataset=unsynced_hf_dataset,
|
||||
episode_data_index=episode_data_index,
|
||||
timestamps=timestamps,
|
||||
episode_indices=ep_idx,
|
||||
episode_data_index=ep_data_index,
|
||||
fps=fps,
|
||||
tolerance_s=tolerance_s,
|
||||
)
|
||||
|
||||
|
||||
def test_check_timestamps_sync_unsynced_no_exception(unsynced_hf_dataset_factory):
|
||||
def test_check_timestamps_sync_unsynced_no_exception(unsynced_timestamps_factory):
|
||||
fps = 30
|
||||
tolerance_s = 1e-4
|
||||
unsynced_hf_dataset = unsynced_hf_dataset_factory(fps, tolerance_s)
|
||||
episode_data_index = calculate_episode_data_index(unsynced_hf_dataset)
|
||||
timestamps, ep_idx, ep_data_index = unsynced_timestamps_factory(fps, tolerance_s)
|
||||
result = check_timestamps_sync(
|
||||
hf_dataset=unsynced_hf_dataset,
|
||||
episode_data_index=episode_data_index,
|
||||
timestamps=timestamps,
|
||||
episode_indices=ep_idx,
|
||||
episode_data_index=ep_data_index,
|
||||
fps=fps,
|
||||
tolerance_s=tolerance_s,
|
||||
raise_value_error=False,
|
||||
@@ -143,14 +166,14 @@ def test_check_timestamps_sync_unsynced_no_exception(unsynced_hf_dataset_factory
|
||||
assert result is False
|
||||
|
||||
|
||||
def test_check_timestamps_sync_slightly_off(slightly_off_hf_dataset_factory):
|
||||
def test_check_timestamps_sync_slightly_off(slightly_off_timestamps_factory):
|
||||
fps = 30
|
||||
tolerance_s = 1e-4
|
||||
slightly_off_hf_dataset = slightly_off_hf_dataset_factory(fps, tolerance_s)
|
||||
episode_data_index = calculate_episode_data_index(slightly_off_hf_dataset)
|
||||
timestamps, ep_idx, ep_data_index = slightly_off_timestamps_factory(fps, tolerance_s)
|
||||
result = check_timestamps_sync(
|
||||
hf_dataset=slightly_off_hf_dataset,
|
||||
episode_data_index=episode_data_index,
|
||||
timestamps=timestamps,
|
||||
episode_indices=ep_idx,
|
||||
episode_data_index=ep_data_index,
|
||||
fps=fps,
|
||||
tolerance_s=tolerance_s,
|
||||
)
|
||||
@@ -158,33 +181,13 @@ def test_check_timestamps_sync_slightly_off(slightly_off_hf_dataset_factory):
|
||||
|
||||
|
||||
def test_check_timestamps_sync_single_timestamp():
|
||||
single_timestamp_hf_dataset = Dataset.from_dict({"timestamp": [0.0], "episode_index": [0]})
|
||||
single_timestamp_hf_dataset.set_transform(hf_transform_to_torch)
|
||||
episode_data_index = {"to": torch.tensor([1]), "from": torch.tensor([0])}
|
||||
fps = 30
|
||||
tolerance_s = 1e-4
|
||||
timestamps, ep_idx = np.array([0.0]), np.array([0])
|
||||
episode_data_index = {"to": np.array([1]), "from": np.array([0])}
|
||||
result = check_timestamps_sync(
|
||||
hf_dataset=single_timestamp_hf_dataset,
|
||||
episode_data_index=episode_data_index,
|
||||
fps=fps,
|
||||
tolerance_s=tolerance_s,
|
||||
)
|
||||
assert result is True
|
||||
|
||||
|
||||
# TODO(aliberts): Change behavior of hf_transform_to_torch so that it can work with empty dataset
|
||||
@pytest.mark.skip("TODO: fix")
|
||||
def test_check_timestamps_sync_empty_dataset():
|
||||
fps = 30
|
||||
tolerance_s = 1e-4
|
||||
empty_hf_dataset = Dataset.from_dict({"timestamp": [], "episode_index": []})
|
||||
empty_hf_dataset.set_transform(hf_transform_to_torch)
|
||||
episode_data_index = {
|
||||
"to": torch.tensor([], dtype=torch.int64),
|
||||
"from": torch.tensor([], dtype=torch.int64),
|
||||
}
|
||||
result = check_timestamps_sync(
|
||||
hf_dataset=empty_hf_dataset,
|
||||
timestamps=timestamps,
|
||||
episode_indices=ep_idx,
|
||||
episode_data_index=episode_data_index,
|
||||
fps=fps,
|
||||
tolerance_s=tolerance_s,
|
||||
|
||||
@@ -53,7 +53,7 @@ def test_example_1(tmp_path, lerobot_dataset_factory):
|
||||
('repo_id = "lerobot/pusht"', f'repo_id = "{DUMMY_REPO_ID}"'),
|
||||
(
|
||||
"LeRobotDataset(repo_id",
|
||||
f"LeRobotDataset(repo_id, root='{str(tmp_path)}', local_files_only=True",
|
||||
f"LeRobotDataset(repo_id, root='{str(tmp_path)}'",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
@@ -9,10 +9,11 @@ from PIL import Image
|
||||
|
||||
from lerobot.common.datasets.image_writer import (
|
||||
AsyncImageWriter,
|
||||
image_array_to_image,
|
||||
image_array_to_pil_image,
|
||||
safe_stop_image_writer,
|
||||
write_image,
|
||||
)
|
||||
from tests.fixtures.constants import DUMMY_HWC
|
||||
|
||||
DUMMY_IMAGE = "test_image.png"
|
||||
|
||||
@@ -48,49 +49,62 @@ def test_zero_threads():
|
||||
AsyncImageWriter(num_processes=0, num_threads=0)
|
||||
|
||||
|
||||
def test_image_array_to_image_rgb(img_array_factory):
|
||||
def test_image_array_to_pil_image_float_array_wrong_range_0_255():
|
||||
image = np.random.rand(*DUMMY_HWC) * 255
|
||||
with pytest.raises(ValueError):
|
||||
image_array_to_pil_image(image)
|
||||
|
||||
|
||||
def test_image_array_to_pil_image_float_array_wrong_range_neg_1_1():
|
||||
image = np.random.rand(*DUMMY_HWC) * 2 - 1
|
||||
with pytest.raises(ValueError):
|
||||
image_array_to_pil_image(image)
|
||||
|
||||
|
||||
def test_image_array_to_pil_image_rgb(img_array_factory):
|
||||
img_array = img_array_factory(100, 100)
|
||||
result_image = image_array_to_image(img_array)
|
||||
result_image = image_array_to_pil_image(img_array)
|
||||
assert isinstance(result_image, Image.Image)
|
||||
assert result_image.size == (100, 100)
|
||||
assert result_image.mode == "RGB"
|
||||
|
||||
|
||||
def test_image_array_to_image_pytorch_format(img_array_factory):
|
||||
def test_image_array_to_pil_image_pytorch_format(img_array_factory):
|
||||
img_array = img_array_factory(100, 100).transpose(2, 0, 1)
|
||||
result_image = image_array_to_image(img_array)
|
||||
result_image = image_array_to_pil_image(img_array)
|
||||
assert isinstance(result_image, Image.Image)
|
||||
assert result_image.size == (100, 100)
|
||||
assert result_image.mode == "RGB"
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO: implement")
|
||||
def test_image_array_to_image_single_channel(img_array_factory):
|
||||
def test_image_array_to_pil_image_single_channel(img_array_factory):
|
||||
img_array = img_array_factory(channels=1)
|
||||
result_image = image_array_to_image(img_array)
|
||||
assert isinstance(result_image, Image.Image)
|
||||
assert result_image.size == (100, 100)
|
||||
assert result_image.mode == "L"
|
||||
with pytest.raises(NotImplementedError):
|
||||
image_array_to_pil_image(img_array)
|
||||
|
||||
|
||||
def test_image_array_to_image_float_array(img_array_factory):
|
||||
def test_image_array_to_pil_image_4_channels(img_array_factory):
|
||||
img_array = img_array_factory(channels=4)
|
||||
with pytest.raises(NotImplementedError):
|
||||
image_array_to_pil_image(img_array)
|
||||
|
||||
|
||||
def test_image_array_to_pil_image_float_array(img_array_factory):
|
||||
img_array = img_array_factory(dtype=np.float32)
|
||||
result_image = image_array_to_image(img_array)
|
||||
result_image = image_array_to_pil_image(img_array)
|
||||
assert isinstance(result_image, Image.Image)
|
||||
assert result_image.size == (100, 100)
|
||||
assert result_image.mode == "RGB"
|
||||
assert np.array(result_image).dtype == np.uint8
|
||||
|
||||
|
||||
def test_image_array_to_image_out_of_bounds_float():
|
||||
# Float array with values out of [0, 1]
|
||||
img_array = np.random.uniform(-1, 2, size=(100, 100, 3)).astype(np.float32)
|
||||
result_image = image_array_to_image(img_array)
|
||||
def test_image_array_to_pil_image_uint8_array(img_array_factory):
|
||||
img_array = img_array_factory(dtype=np.float32)
|
||||
result_image = image_array_to_pil_image(img_array)
|
||||
assert isinstance(result_image, Image.Image)
|
||||
assert result_image.size == (100, 100)
|
||||
assert result_image.mode == "RGB"
|
||||
assert np.array(result_image).dtype == np.uint8
|
||||
assert np.array(result_image).min() >= 0 and np.array(result_image).max() <= 255
|
||||
|
||||
|
||||
def test_write_image_numpy(tmp_path, img_array_factory):
|
||||
|
||||
@@ -1,370 +0,0 @@
|
||||
"""
|
||||
This file contains generic tests to ensure that nothing breaks if we modify the push_dataset_to_hub API.
|
||||
Also, this file contains backward compatibility tests. Because they are slow and require to download the raw datasets,
|
||||
we skip them for now in our CI.
|
||||
|
||||
Example to run backward compatiblity tests locally:
|
||||
```
|
||||
python -m pytest --run-skipped tests/test_push_dataset_to_hub.py::test_push_dataset_to_hub_pusht_backward_compatibility
|
||||
```
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.datasets.push_dataset_to_hub.utils import save_images_concurrently
|
||||
from lerobot.common.datasets.video_utils import encode_video_frames
|
||||
from lerobot.scripts.push_dataset_to_hub import push_dataset_to_hub
|
||||
from tests.utils import require_package_arg
|
||||
|
||||
|
||||
def _mock_download_raw_pusht(raw_dir, num_frames=4, num_episodes=3):
|
||||
import zarr
|
||||
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
zarr_path = raw_dir / "pusht_cchi_v7_replay.zarr"
|
||||
store = zarr.DirectoryStore(zarr_path)
|
||||
zarr_data = zarr.group(store=store)
|
||||
|
||||
zarr_data.create_dataset(
|
||||
"data/action", shape=(num_frames, 1), chunks=(num_frames, 1), dtype=np.float32, overwrite=True
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/img",
|
||||
shape=(num_frames, 96, 96, 3),
|
||||
chunks=(num_frames, 96, 96, 3),
|
||||
dtype=np.uint8,
|
||||
overwrite=True,
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/n_contacts", shape=(num_frames, 2), chunks=(num_frames, 2), dtype=np.float32, overwrite=True
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/state", shape=(num_frames, 5), chunks=(num_frames, 5), dtype=np.float32, overwrite=True
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/keypoint", shape=(num_frames, 9, 2), chunks=(num_frames, 9, 2), dtype=np.float32, overwrite=True
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"meta/episode_ends", shape=(num_episodes,), chunks=(num_episodes,), dtype=np.int32, overwrite=True
|
||||
)
|
||||
|
||||
zarr_data["data/action"][:] = np.random.randn(num_frames, 1)
|
||||
zarr_data["data/img"][:] = np.random.randint(0, 255, size=(num_frames, 96, 96, 3), dtype=np.uint8)
|
||||
zarr_data["data/n_contacts"][:] = np.random.randn(num_frames, 2)
|
||||
zarr_data["data/state"][:] = np.random.randn(num_frames, 5)
|
||||
zarr_data["data/keypoint"][:] = np.random.randn(num_frames, 9, 2)
|
||||
zarr_data["meta/episode_ends"][:] = np.array([1, 3, 4])
|
||||
|
||||
store.close()
|
||||
|
||||
|
||||
def _mock_download_raw_umi(raw_dir, num_frames=4, num_episodes=3):
|
||||
import zarr
|
||||
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
zarr_path = raw_dir / "cup_in_the_wild.zarr"
|
||||
store = zarr.DirectoryStore(zarr_path)
|
||||
zarr_data = zarr.group(store=store)
|
||||
|
||||
zarr_data.create_dataset(
|
||||
"data/camera0_rgb",
|
||||
shape=(num_frames, 96, 96, 3),
|
||||
chunks=(num_frames, 96, 96, 3),
|
||||
dtype=np.uint8,
|
||||
overwrite=True,
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/robot0_demo_end_pose",
|
||||
shape=(num_frames, 5),
|
||||
chunks=(num_frames, 5),
|
||||
dtype=np.float32,
|
||||
overwrite=True,
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/robot0_demo_start_pose",
|
||||
shape=(num_frames, 5),
|
||||
chunks=(num_frames, 5),
|
||||
dtype=np.float32,
|
||||
overwrite=True,
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/robot0_eef_pos", shape=(num_frames, 5), chunks=(num_frames, 5), dtype=np.float32, overwrite=True
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/robot0_eef_rot_axis_angle",
|
||||
shape=(num_frames, 5),
|
||||
chunks=(num_frames, 5),
|
||||
dtype=np.float32,
|
||||
overwrite=True,
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"data/robot0_gripper_width",
|
||||
shape=(num_frames, 5),
|
||||
chunks=(num_frames, 5),
|
||||
dtype=np.float32,
|
||||
overwrite=True,
|
||||
)
|
||||
zarr_data.create_dataset(
|
||||
"meta/episode_ends", shape=(num_episodes,), chunks=(num_episodes,), dtype=np.int32, overwrite=True
|
||||
)
|
||||
|
||||
zarr_data["data/camera0_rgb"][:] = np.random.randint(0, 255, size=(num_frames, 96, 96, 3), dtype=np.uint8)
|
||||
zarr_data["data/robot0_demo_end_pose"][:] = np.random.randn(num_frames, 5)
|
||||
zarr_data["data/robot0_demo_start_pose"][:] = np.random.randn(num_frames, 5)
|
||||
zarr_data["data/robot0_eef_pos"][:] = np.random.randn(num_frames, 5)
|
||||
zarr_data["data/robot0_eef_rot_axis_angle"][:] = np.random.randn(num_frames, 5)
|
||||
zarr_data["data/robot0_gripper_width"][:] = np.random.randn(num_frames, 5)
|
||||
zarr_data["meta/episode_ends"][:] = np.array([1, 3, 4])
|
||||
|
||||
store.close()
|
||||
|
||||
|
||||
def _mock_download_raw_xarm(raw_dir, num_frames=4):
|
||||
import pickle
|
||||
|
||||
dataset_dict = {
|
||||
"observations": {
|
||||
"rgb": np.random.randint(0, 255, size=(num_frames, 3, 84, 84), dtype=np.uint8),
|
||||
"state": np.random.randn(num_frames, 4),
|
||||
},
|
||||
"actions": np.random.randn(num_frames, 3),
|
||||
"rewards": np.random.randn(num_frames),
|
||||
"masks": np.random.randn(num_frames),
|
||||
"dones": np.array([False, True, True, True]),
|
||||
}
|
||||
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
pkl_path = raw_dir / "buffer.pkl"
|
||||
with open(pkl_path, "wb") as f:
|
||||
pickle.dump(dataset_dict, f)
|
||||
|
||||
|
||||
def _mock_download_raw_aloha(raw_dir, num_frames=6, num_episodes=3):
|
||||
import h5py
|
||||
|
||||
for ep_idx in range(num_episodes):
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
path_h5 = raw_dir / f"episode_{ep_idx}.hdf5"
|
||||
with h5py.File(str(path_h5), "w") as f:
|
||||
f.create_dataset("action", data=np.random.randn(num_frames // num_episodes, 14))
|
||||
f.create_dataset("observations/qpos", data=np.random.randn(num_frames // num_episodes, 14))
|
||||
f.create_dataset("observations/qvel", data=np.random.randn(num_frames // num_episodes, 14))
|
||||
f.create_dataset(
|
||||
"observations/images/top",
|
||||
data=np.random.randint(
|
||||
0, 255, size=(num_frames // num_episodes, 480, 640, 3), dtype=np.uint8
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _mock_download_raw_dora(raw_dir, num_frames=6, num_episodes=3, fps=30):
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pandas
|
||||
|
||||
def write_parquet(key, timestamps, values):
|
||||
data = {
|
||||
"timestamp_utc": timestamps,
|
||||
key: values,
|
||||
}
|
||||
df = pandas.DataFrame(data)
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
df.to_parquet(raw_dir / f"{key}.parquet", engine="pyarrow")
|
||||
|
||||
episode_indices = [None, None, -1, None, None, -1, None, None, -1]
|
||||
episode_indices_mapping = [0, 0, 0, 1, 1, 1, 2, 2, 2]
|
||||
frame_indices = [0, 1, -1, 0, 1, -1, 0, 1, -1]
|
||||
|
||||
cam_key = "observation.images.cam_high"
|
||||
timestamps = []
|
||||
actions = []
|
||||
states = []
|
||||
frames = []
|
||||
# `+ num_episodes`` for buffer frames associated to episode_index=-1
|
||||
for i, frame_idx in enumerate(frame_indices):
|
||||
t_utc = datetime.now(timezone.utc) + timedelta(seconds=i / fps)
|
||||
action = np.random.randn(21).tolist()
|
||||
state = np.random.randn(21).tolist()
|
||||
ep_idx = episode_indices_mapping[i]
|
||||
frame = [{"path": f"videos/{cam_key}_episode_{ep_idx:06d}.mp4", "timestamp": frame_idx / fps}]
|
||||
timestamps.append(t_utc)
|
||||
actions.append(action)
|
||||
states.append(state)
|
||||
frames.append(frame)
|
||||
|
||||
write_parquet(cam_key, timestamps, frames)
|
||||
write_parquet("observation.state", timestamps, states)
|
||||
write_parquet("action", timestamps, actions)
|
||||
write_parquet("episode_index", timestamps, episode_indices)
|
||||
|
||||
# write fake mp4 file for each episode
|
||||
for ep_idx in range(num_episodes):
|
||||
imgs_array = np.random.randint(0, 255, size=(num_frames // num_episodes, 480, 640, 3), dtype=np.uint8)
|
||||
|
||||
tmp_imgs_dir = raw_dir / "tmp_images"
|
||||
save_images_concurrently(imgs_array, tmp_imgs_dir)
|
||||
|
||||
fname = f"{cam_key}_episode_{ep_idx:06d}.mp4"
|
||||
video_path = raw_dir / "videos" / fname
|
||||
encode_video_frames(tmp_imgs_dir, video_path, fps, vcodec="libx264")
|
||||
|
||||
|
||||
def _mock_download_raw(raw_dir, repo_id):
|
||||
if "wrist_gripper" in repo_id:
|
||||
_mock_download_raw_dora(raw_dir)
|
||||
elif "aloha" in repo_id:
|
||||
_mock_download_raw_aloha(raw_dir)
|
||||
elif "pusht" in repo_id:
|
||||
_mock_download_raw_pusht(raw_dir)
|
||||
elif "xarm" in repo_id:
|
||||
_mock_download_raw_xarm(raw_dir)
|
||||
elif "umi" in repo_id:
|
||||
_mock_download_raw_umi(raw_dir)
|
||||
else:
|
||||
raise ValueError(repo_id)
|
||||
|
||||
|
||||
@pytest.mark.skip("push_dataset_to_hub is deprecated")
|
||||
def test_push_dataset_to_hub_invalid_repo_id(tmpdir):
|
||||
with pytest.raises(ValueError):
|
||||
push_dataset_to_hub(Path(tmpdir), "raw_format", "invalid_repo_id")
|
||||
|
||||
|
||||
@pytest.mark.skip("push_dataset_to_hub is deprecated")
|
||||
def test_push_dataset_to_hub_out_dir_force_override_false(tmpdir):
|
||||
tmpdir = Path(tmpdir)
|
||||
out_dir = tmpdir / "out"
|
||||
raw_dir = tmpdir / "raw"
|
||||
# mkdir to skip download
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
with pytest.raises(ValueError):
|
||||
push_dataset_to_hub(
|
||||
raw_dir=raw_dir,
|
||||
raw_format="some_format",
|
||||
repo_id="user/dataset",
|
||||
local_dir=out_dir,
|
||||
force_override=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip("push_dataset_to_hub is deprecated")
|
||||
@pytest.mark.parametrize(
|
||||
"required_packages, raw_format, repo_id, make_test_data",
|
||||
[
|
||||
(["gym_pusht"], "pusht_zarr", "lerobot/pusht", False),
|
||||
(["gym_pusht"], "pusht_zarr", "lerobot/pusht", True),
|
||||
(None, "xarm_pkl", "lerobot/xarm_lift_medium", False),
|
||||
(None, "aloha_hdf5", "lerobot/aloha_sim_insertion_scripted", False),
|
||||
(["imagecodecs"], "umi_zarr", "lerobot/umi_cup_in_the_wild", False),
|
||||
(None, "dora_parquet", "cadene/wrist_gripper", False),
|
||||
],
|
||||
)
|
||||
@require_package_arg
|
||||
def test_push_dataset_to_hub_format(required_packages, tmpdir, raw_format, repo_id, make_test_data):
|
||||
num_episodes = 3
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
raw_dir = tmpdir / f"{repo_id}_raw"
|
||||
_mock_download_raw(raw_dir, repo_id)
|
||||
|
||||
local_dir = tmpdir / repo_id
|
||||
|
||||
lerobot_dataset = push_dataset_to_hub(
|
||||
raw_dir=raw_dir,
|
||||
raw_format=raw_format,
|
||||
repo_id=repo_id,
|
||||
push_to_hub=False,
|
||||
local_dir=local_dir,
|
||||
force_override=False,
|
||||
cache_dir=tmpdir / "cache",
|
||||
tests_data_dir=tmpdir / "tests/data" if make_test_data else None,
|
||||
encoding={"vcodec": "libx264"},
|
||||
)
|
||||
|
||||
# minimal generic tests on the local directory containing LeRobotDataset
|
||||
assert (local_dir / "meta_data" / "info.json").exists()
|
||||
assert (local_dir / "meta_data" / "stats.safetensors").exists()
|
||||
assert (local_dir / "meta_data" / "episode_data_index.safetensors").exists()
|
||||
for i in range(num_episodes):
|
||||
for cam_key in lerobot_dataset.camera_keys:
|
||||
assert (local_dir / "videos" / f"{cam_key}_episode_{i:06d}.mp4").exists()
|
||||
assert (local_dir / "train" / "dataset_info.json").exists()
|
||||
assert (local_dir / "train" / "state.json").exists()
|
||||
assert len(list((local_dir / "train").glob("*.arrow"))) > 0
|
||||
|
||||
# minimal generic tests on the item
|
||||
item = lerobot_dataset[0]
|
||||
assert "index" in item
|
||||
assert "episode_index" in item
|
||||
assert "timestamp" in item
|
||||
for cam_key in lerobot_dataset.camera_keys:
|
||||
assert cam_key in item
|
||||
|
||||
if make_test_data:
|
||||
# Check that only the first episode is selected.
|
||||
test_dataset = LeRobotDataset(repo_id=repo_id, root=tmpdir / "tests/data")
|
||||
num_frames = sum(
|
||||
i == lerobot_dataset.hf_dataset["episode_index"][0]
|
||||
for i in lerobot_dataset.hf_dataset["episode_index"]
|
||||
).item()
|
||||
assert (
|
||||
test_dataset.hf_dataset["episode_index"]
|
||||
== lerobot_dataset.hf_dataset["episode_index"][:num_frames]
|
||||
)
|
||||
for k in ["from", "to"]:
|
||||
assert torch.equal(test_dataset.episode_data_index[k], lerobot_dataset.episode_data_index[k][:1])
|
||||
|
||||
|
||||
@pytest.mark.skip("push_dataset_to_hub is deprecated")
|
||||
@pytest.mark.parametrize(
|
||||
"raw_format, repo_id",
|
||||
[
|
||||
# TODO(rcadene): add raw dataset test artifacts
|
||||
("pusht_zarr", "lerobot/pusht"),
|
||||
("xarm_pkl", "lerobot/xarm_lift_medium"),
|
||||
("aloha_hdf5", "lerobot/aloha_sim_insertion_scripted"),
|
||||
("umi_zarr", "lerobot/umi_cup_in_the_wild"),
|
||||
("dora_parquet", "cadene/wrist_gripper"),
|
||||
],
|
||||
)
|
||||
def test_push_dataset_to_hub_pusht_backward_compatibility(tmpdir, raw_format, repo_id):
|
||||
_, dataset_id = repo_id.split("/")
|
||||
|
||||
tmpdir = Path(tmpdir)
|
||||
raw_dir = tmpdir / f"{dataset_id}_raw"
|
||||
local_dir = tmpdir / repo_id
|
||||
|
||||
push_dataset_to_hub(
|
||||
raw_dir=raw_dir,
|
||||
raw_format=raw_format,
|
||||
repo_id=repo_id,
|
||||
push_to_hub=False,
|
||||
local_dir=local_dir,
|
||||
force_override=False,
|
||||
cache_dir=tmpdir / "cache",
|
||||
episodes=[0],
|
||||
)
|
||||
|
||||
ds_actual = LeRobotDataset(repo_id, root=tmpdir)
|
||||
ds_reference = LeRobotDataset(repo_id)
|
||||
|
||||
assert len(ds_reference.hf_dataset) == len(ds_actual.hf_dataset)
|
||||
|
||||
def check_same_items(item1, item2):
|
||||
assert item1.keys() == item2.keys(), "Keys mismatch"
|
||||
|
||||
for key in item1:
|
||||
if isinstance(item1[key], torch.Tensor) and isinstance(item2[key], torch.Tensor):
|
||||
assert torch.equal(item1[key], item2[key]), f"Mismatch found in key: {key}"
|
||||
else:
|
||||
assert item1[key] == item2[key], f"Mismatch found in key: {key}"
|
||||
|
||||
for i in range(len(ds_reference.hf_dataset)):
|
||||
item_reference = ds_reference.hf_dataset[i]
|
||||
item_actual = ds_actual.hf_dataset[i]
|
||||
check_same_items(item_reference, item_actual)
|
||||
@@ -23,8 +23,6 @@ pytest -sx 'tests/test_robots.py::test_robot[aloha-True]'
|
||||
```
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
@@ -35,7 +33,7 @@ from tests.utils import TEST_ROBOT_TYPES, mock_calibration_dir, require_robot
|
||||
|
||||
@pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES)
|
||||
@require_robot
|
||||
def test_robot(tmpdir, request, robot_type, mock):
|
||||
def test_robot(tmp_path, request, robot_type, mock):
|
||||
# TODO(rcadene): measure fps in nightly?
|
||||
# TODO(rcadene): test logs
|
||||
# TODO(rcadene): add compatibility with other robots
|
||||
@@ -50,8 +48,7 @@ def test_robot(tmpdir, request, robot_type, mock):
|
||||
request.getfixturevalue("patch_builtins_input")
|
||||
|
||||
# Create an empty calibration directory to trigger manual calibration
|
||||
tmpdir = Path(tmpdir)
|
||||
calibration_dir = tmpdir / robot_type
|
||||
calibration_dir = tmp_path / robot_type
|
||||
mock_calibration_dir(calibration_dir)
|
||||
robot_kwargs["calibration_dir"] = calibration_dir
|
||||
|
||||
|
||||
Reference in New Issue
Block a user