Loads episode_data_index and stats during dataset __init__ (#85)
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
This commit is contained in:
@@ -1,9 +1,13 @@
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from datasets import load_dataset, load_from_disk
|
||||
|
||||
from lerobot.common.datasets.utils import load_previous_and_future_frames
|
||||
from lerobot.common.datasets.utils import (
|
||||
load_episode_data_index,
|
||||
load_hf_dataset,
|
||||
load_previous_and_future_frames,
|
||||
load_stats,
|
||||
)
|
||||
|
||||
|
||||
class AlohaDataset(torch.utils.data.Dataset):
|
||||
@@ -27,7 +31,7 @@ class AlohaDataset(torch.utils.data.Dataset):
|
||||
def __init__(
|
||||
self,
|
||||
dataset_id: str,
|
||||
version: str | None = "v1.0",
|
||||
version: str | None = "v1.1",
|
||||
root: Path | None = None,
|
||||
split: str = "train",
|
||||
transform: callable = None,
|
||||
@@ -40,13 +44,10 @@ class AlohaDataset(torch.utils.data.Dataset):
|
||||
self.split = split
|
||||
self.transform = transform
|
||||
self.delta_timestamps = delta_timestamps
|
||||
if self.root is not None:
|
||||
self.hf_dataset = load_from_disk(Path(self.root) / self.dataset_id / self.split)
|
||||
else:
|
||||
self.hf_dataset = load_dataset(
|
||||
f"lerobot/{self.dataset_id}", revision=self.version, split=self.split
|
||||
)
|
||||
self.hf_dataset = self.hf_dataset.with_format("torch")
|
||||
# load data from hub or locally when root is provided
|
||||
self.hf_dataset = load_hf_dataset(dataset_id, version, root, split)
|
||||
self.episode_data_index = load_episode_data_index(dataset_id, version, root)
|
||||
self.stats = load_stats(dataset_id, version, root)
|
||||
|
||||
@property
|
||||
def num_samples(self) -> int:
|
||||
@@ -54,7 +55,7 @@ class AlohaDataset(torch.utils.data.Dataset):
|
||||
|
||||
@property
|
||||
def num_episodes(self) -> int:
|
||||
return len(self.hf_dataset.unique("episode_id"))
|
||||
return len(self.hf_dataset.unique("episode_index"))
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
@@ -66,19 +67,11 @@ class AlohaDataset(torch.utils.data.Dataset):
|
||||
item = load_previous_and_future_frames(
|
||||
item,
|
||||
self.hf_dataset,
|
||||
self.episode_data_index,
|
||||
self.delta_timestamps,
|
||||
tol=1 / self.fps - 1e-4, # 1e-4 to account for possible numerical error
|
||||
)
|
||||
|
||||
# convert images from channel last (PIL) to channel first (pytorch)
|
||||
for key in self.image_keys:
|
||||
if item[key].ndim == 3:
|
||||
item[key] = item[key].permute((2, 0, 1)) # h w c -> c h w
|
||||
elif item[key].ndim == 4:
|
||||
item[key] = item[key].permute((0, 3, 1, 2)) # t h w c -> t c h w
|
||||
else:
|
||||
raise ValueError(item[key].ndim)
|
||||
|
||||
if self.transform is not None:
|
||||
item = self.transform(item)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user