Release cleanup (#132)
Co-authored-by: Kashif Rasul <kashif.rasul@gmail.com> Co-authored-by: Alexander Soare <alexander.soare159@gmail.com> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Cadene <re.cadene@gmail.com>
This commit is contained in:
@@ -85,13 +85,6 @@ available_datasets = list(
|
||||
itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets)
|
||||
)
|
||||
|
||||
# TODO(rcadene, aliberts, alexander-soare): Add real-world env with a gym API
|
||||
available_datasets_without_env = ["lerobot/umi_cup_in_the_wild"]
|
||||
|
||||
available_datasets = list(
|
||||
itertools.chain(*available_datasets_per_env.values(), available_datasets_without_env)
|
||||
)
|
||||
|
||||
available_policies = [
|
||||
"act",
|
||||
"diffusion",
|
||||
|
||||
@@ -37,16 +37,16 @@ How to decode videos?
|
||||
## Variables
|
||||
|
||||
**Image content**
|
||||
We don't expect the same optimal settings for a dataset of images from a simulation, or from real-world in an appartment, or in a factory, or outdoor, etc. Hence, we run this bechmark on two datasets: `pusht` (simulation) and `umi` (real-world outdoor).
|
||||
We don't expect the same optimal settings for a dataset of images from a simulation, or from real-world in an appartment, or in a factory, or outdoor, etc. Hence, we run this benchmark on two datasets: `pusht` (simulation) and `umi` (real-world outdoor).
|
||||
|
||||
**Requested timestamps**
|
||||
In this benchmark, we focus on the loading time of random access, so we are not interested about sequentially loading all frames of a video like in a movie. However, the number of consecutive timestamps requested and their spacing can greatly affect the `load_time_factor`. In fact, it is expected to get faster loading time by decoding a large number of consecutive frames from a video, than to load the same data from individual images. To reflect our robotics use case, we consider a few settings:
|
||||
In this benchmark, we focus on the loading time of random access, so we are not interested in sequentially loading all frames of a video like in a movie. However, the number of consecutive timestamps requested and their spacing can greatly affect the `load_time_factor`. In fact, it is expected to get faster loading time by decoding a large number of consecutive frames from a video, than to load the same data from individual images. To reflect our robotics use case, we consider a few settings:
|
||||
- `single_frame`: 1 frame,
|
||||
- `2_frames`: 2 consecutive frames (e.g. `[t, t + 1 / fps]`),
|
||||
- `2_frames_4_space`: 2 consecutive frames with 4 frames of spacing (e.g `[t, t + 4 / fps]`),
|
||||
|
||||
**Data augmentations**
|
||||
We might revisit this benchmark and find better settings if we train our policies with various data augmentations to make them more robusts (e.g. robust to color changes, compression, etc.).
|
||||
We might revisit this benchmark and find better settings if we train our policies with various data augmentations to make them more robust (e.g. robust to color changes, compression, etc.).
|
||||
|
||||
|
||||
## Results
|
||||
|
||||
@@ -47,6 +47,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
|
||||
@property
|
||||
def fps(self) -> int:
|
||||
"""Frames per second used during data collection."""
|
||||
return self.info["fps"]
|
||||
|
||||
@property
|
||||
@@ -61,15 +62,22 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
return self.hf_dataset.features
|
||||
|
||||
@property
|
||||
def image_keys(self) -> list[str]:
|
||||
image_keys = []
|
||||
def camera_keys(self) -> list[str]:
|
||||
"""Keys to access image and video stream from cameras."""
|
||||
keys = []
|
||||
for key, feats in self.hf_dataset.features.items():
|
||||
if isinstance(feats, datasets.Image):
|
||||
image_keys.append(key)
|
||||
return image_keys + self.video_frame_keys
|
||||
if isinstance(feats, (datasets.Image, VideoFrame)):
|
||||
keys.append(key)
|
||||
return keys
|
||||
|
||||
@property
|
||||
def video_frame_keys(self):
|
||||
def video_frame_keys(self) -> list[str]:
|
||||
"""Keys to access video frames that requires to be decoded into images.
|
||||
|
||||
Note: It is empty if the dataset contains images only,
|
||||
or equal to `self.cameras` if the dataset contains videos only,
|
||||
or can even be a subset of `self.cameras` in a case of a mixed image/video dataset.
|
||||
"""
|
||||
video_frame_keys = []
|
||||
for key, feats in self.hf_dataset.features.items():
|
||||
if isinstance(feats, VideoFrame):
|
||||
@@ -78,10 +86,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
|
||||
@property
|
||||
def num_samples(self) -> int:
|
||||
"""Number of samples/frames."""
|
||||
return len(self.hf_dataset)
|
||||
|
||||
@property
|
||||
def num_episodes(self) -> int:
|
||||
"""Number of episodes."""
|
||||
return len(self.hf_dataset.unique("episode_index"))
|
||||
|
||||
@property
|
||||
@@ -121,6 +131,22 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
|
||||
return item
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
f"{self.__class__.__name__}(\n"
|
||||
f" Repository ID: '{self.repo_id}',\n"
|
||||
f" Version: '{self.version}',\n"
|
||||
f" Split: '{self.split}',\n"
|
||||
f" Number of Samples: {self.num_samples},\n"
|
||||
f" Number of Episodes: {self.num_episodes},\n"
|
||||
f" Type: {'video (.mp4)' if self.video else 'image (.png)'},\n"
|
||||
f" Recorded Frames per Second: {self.fps},\n"
|
||||
f" Camera Keys: {self.camera_keys},\n"
|
||||
f" Video Frame Keys: {self.video_frame_keys if self.video else 'N/A'},\n"
|
||||
f" Transformations: {self.transform},\n"
|
||||
f")"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_preloaded(
|
||||
cls,
|
||||
|
||||
@@ -142,12 +142,12 @@ def load_from_raw(raw_dir, out_dir, fps, video, debug):
|
||||
def to_hf_dataset(data_dict, video) -> Dataset:
|
||||
features = {}
|
||||
|
||||
image_keys = [key for key in data_dict if "observation.images." in key]
|
||||
for image_key in image_keys:
|
||||
keys = [key for key in data_dict if "observation.images." in key]
|
||||
for key in keys:
|
||||
if video:
|
||||
features[image_key] = VideoFrame()
|
||||
features[key] = VideoFrame()
|
||||
else:
|
||||
features[image_key] = Image()
|
||||
features[key] = Image()
|
||||
|
||||
features["observation.state"] = Sequence(
|
||||
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
|
||||
@@ -25,7 +25,7 @@ training:
|
||||
eval_freq: ???
|
||||
save_freq: ???
|
||||
log_freq: 250
|
||||
save_model: false
|
||||
save_model: true
|
||||
|
||||
eval:
|
||||
n_episodes: 1
|
||||
|
||||
@@ -583,17 +583,18 @@ if __name__ == "__main__":
|
||||
pretrained_policy_path = Path(
|
||||
snapshot_download(args.pretrained_policy_name_or_path, revision=args.revision)
|
||||
)
|
||||
except HFValidationError:
|
||||
logging.warning(
|
||||
"The provided pretrained_policy_name_or_path is not a valid Hugging Face Hub repo ID. "
|
||||
"Treating it as a local directory."
|
||||
)
|
||||
except RepositoryNotFoundError:
|
||||
logging.warning(
|
||||
"The provided pretrained_policy_name_or_path was not found on the Hugging Face Hub. Treating "
|
||||
"it as a local directory."
|
||||
)
|
||||
pretrained_policy_path = Path(args.pretrained_policy_name_or_path)
|
||||
except (HFValidationError, RepositoryNotFoundError) as e:
|
||||
if isinstance(e, HFValidationError):
|
||||
error_message = (
|
||||
"The provided pretrained_policy_name_or_path is not a valid Hugging Face Hub repo ID."
|
||||
)
|
||||
else:
|
||||
error_message = (
|
||||
"The provided pretrained_policy_name_or_path was not found on the Hugging Face Hub."
|
||||
)
|
||||
|
||||
logging.warning(f"{error_message} Treating it as a local directory.")
|
||||
pretrained_policy_path = Path(args.pretrained_policy_name_or_path)
|
||||
if not pretrained_policy_path.is_dir() or not pretrained_policy_path.exists():
|
||||
raise ValueError(
|
||||
"The provided pretrained_policy_name_or_path is not a valid/existing Hugging Face Hub "
|
||||
|
||||
@@ -60,7 +60,7 @@ import torch
|
||||
from huggingface_hub import HfApi
|
||||
from safetensors.torch import save_file
|
||||
|
||||
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
|
||||
from lerobot.common.datasets.push_dataset_to_hub._download_raw import download_raw
|
||||
from lerobot.common.datasets.push_dataset_to_hub.compute_stats import compute_stats
|
||||
from lerobot.common.datasets.utils import flatten_dict
|
||||
@@ -252,7 +252,7 @@ def main():
|
||||
parser.add_argument(
|
||||
"--revision",
|
||||
type=str,
|
||||
default="v1.2",
|
||||
default=CODEBASE_VERSION,
|
||||
help="Codebase version used to generate the dataset.",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
||||
@@ -8,7 +8,6 @@ import hydra
|
||||
import torch
|
||||
from datasets import concatenate_datasets
|
||||
from datasets.utils import disable_progress_bars, enable_progress_bars
|
||||
from diffusers.optimization import get_scheduler
|
||||
|
||||
from lerobot.common.datasets.factory import make_dataset
|
||||
from lerobot.common.datasets.utils import cycle
|
||||
@@ -55,6 +54,8 @@ def make_optimizer_and_scheduler(cfg, policy):
|
||||
cfg.training.adam_weight_decay,
|
||||
)
|
||||
assert cfg.training.online_steps == 0, "Diffusion Policy does not handle online training."
|
||||
from diffusers.optimization import get_scheduler
|
||||
|
||||
lr_scheduler = get_scheduler(
|
||||
cfg.training.lr_scheduler,
|
||||
optimizer=optimizer,
|
||||
@@ -336,7 +337,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||
logging.info(f"{num_total_params=} ({format_big_number(num_total_params)})")
|
||||
|
||||
# Note: this helper will be used in offline and online training loops.
|
||||
def _maybe_eval_and_maybe_save(step):
|
||||
def evaluate_and_checkpoint_if_needed(step):
|
||||
if step % cfg.training.eval_freq == 0:
|
||||
logging.info(f"Eval policy at step {step}")
|
||||
eval_info = eval_policy(
|
||||
@@ -392,9 +393,9 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||
if step % cfg.training.log_freq == 0:
|
||||
log_train_info(logger, train_info, step, cfg, offline_dataset, is_offline)
|
||||
|
||||
# Note: _maybe_eval_and_maybe_save happens **after** the `step`th training update has completed, so we pass in
|
||||
# step + 1.
|
||||
_maybe_eval_and_maybe_save(step + 1)
|
||||
# Note: evaluate_and_checkpoint_if_needed happens **after** the `step`th training update has completed,
|
||||
# so we pass in step + 1.
|
||||
evaluate_and_checkpoint_if_needed(step + 1)
|
||||
|
||||
step += 1
|
||||
|
||||
@@ -460,9 +461,9 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||
if step % cfg.training.log_freq == 0:
|
||||
log_train_info(logger, train_info, step, cfg, online_dataset, is_offline)
|
||||
|
||||
# Note: _maybe_eval_and_maybe_save happens **after** the `step`th training update has completed, so we pass
|
||||
# in step + 1.
|
||||
_maybe_eval_and_maybe_save(step + 1)
|
||||
# Note: evaluate_and_checkpoint_if_needed happens **after** the `step`th training update has completed,
|
||||
# so we pass in step + 1.
|
||||
evaluate_and_checkpoint_if_needed(step + 1)
|
||||
|
||||
step += 1
|
||||
online_step += 1
|
||||
|
||||
@@ -32,7 +32,7 @@ local$ rerun lerobot_pusht_episode_0.rrd
|
||||
```
|
||||
|
||||
- Visualize data stored on a distant machine through streaming:
|
||||
(You need to forward the websocket port to the distant machine, with
|
||||
(You need to forward the websocket port to the distant machine, with
|
||||
`ssh -L 9087:localhost:9087 username@remote-host`)
|
||||
```
|
||||
distant$ python lerobot/scripts/visualize_dataset.py \
|
||||
@@ -131,7 +131,7 @@ def visualize_dataset(
|
||||
rr.set_time_seconds("timestamp", batch["timestamp"][i].item())
|
||||
|
||||
# display each camera image
|
||||
for key in dataset.image_keys:
|
||||
for key in dataset.camera_keys:
|
||||
# TODO(rcadene): add `.compress()`? is it lossless?
|
||||
rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user