Loads episode_data_index and stats during dataset __init__ (#85)
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
This commit is contained in:
@@ -1,9 +1,13 @@
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from datasets import load_dataset, load_from_disk
|
||||
|
||||
from lerobot.common.datasets.utils import load_previous_and_future_frames
|
||||
from lerobot.common.datasets.utils import (
|
||||
load_episode_data_index,
|
||||
load_hf_dataset,
|
||||
load_previous_and_future_frames,
|
||||
load_stats,
|
||||
)
|
||||
|
||||
|
||||
class PushtDataset(torch.utils.data.Dataset):
|
||||
@@ -25,7 +29,7 @@ class PushtDataset(torch.utils.data.Dataset):
|
||||
def __init__(
|
||||
self,
|
||||
dataset_id: str = "pusht",
|
||||
version: str | None = "v1.0",
|
||||
version: str | None = "v1.1",
|
||||
root: Path | None = None,
|
||||
split: str = "train",
|
||||
transform: callable = None,
|
||||
@@ -38,13 +42,10 @@ class PushtDataset(torch.utils.data.Dataset):
|
||||
self.split = split
|
||||
self.transform = transform
|
||||
self.delta_timestamps = delta_timestamps
|
||||
if self.root is not None:
|
||||
self.hf_dataset = load_from_disk(Path(self.root) / self.dataset_id / self.split)
|
||||
else:
|
||||
self.hf_dataset = load_dataset(
|
||||
f"lerobot/{self.dataset_id}", revision=self.version, split=self.split
|
||||
)
|
||||
self.hf_dataset = self.hf_dataset.with_format("torch")
|
||||
# load data from hub or locally when root is provided
|
||||
self.hf_dataset = load_hf_dataset(dataset_id, version, root, split)
|
||||
self.episode_data_index = load_episode_data_index(dataset_id, version, root)
|
||||
self.stats = load_stats(dataset_id, version, root)
|
||||
|
||||
@property
|
||||
def num_samples(self) -> int:
|
||||
@@ -52,7 +53,7 @@ class PushtDataset(torch.utils.data.Dataset):
|
||||
|
||||
@property
|
||||
def num_episodes(self) -> int:
|
||||
return len(self.hf_dataset.unique("episode_id"))
|
||||
return len(self.episode_data_index["from"])
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
@@ -64,19 +65,11 @@ class PushtDataset(torch.utils.data.Dataset):
|
||||
item = load_previous_and_future_frames(
|
||||
item,
|
||||
self.hf_dataset,
|
||||
self.episode_data_index,
|
||||
self.delta_timestamps,
|
||||
tol=1 / self.fps - 1e-4, # 1e-4 to account for possible numerical error
|
||||
)
|
||||
|
||||
# convert images from channel last (PIL) to channel first (pytorch)
|
||||
for key in self.image_keys:
|
||||
if item[key].ndim == 3:
|
||||
item[key] = item[key].permute((2, 0, 1)) # h w c -> c h w
|
||||
elif item[key].ndim == 4:
|
||||
item[key] = item[key].permute((0, 3, 1, 2)) # t h w c -> t c h w
|
||||
else:
|
||||
raise ValueError(item[key].ndim)
|
||||
|
||||
if self.transform is not None:
|
||||
item = self.transform(item)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user