Compare commits

..

3 Commits

Author SHA1 Message Date
Quentin Gallouédec
783a40c9d4 pretrained config for act 2024-04-25 16:06:57 +02:00
Remi
659c69a1c0 Refactor datasets into LeRobotDataset (#91)
Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
2024-04-25 12:23:12 +02:00
Remi
e760e4cd63 Move normalization to policy for act and diffusion (#90)
Co-authored-by: Alexander Soare <alexander.soare159@gmail.com>
2024-04-25 11:47:38 +02:00
14 changed files with 189 additions and 180 deletions

1
.gitignore vendored
View File

@@ -6,7 +6,6 @@ data
outputs
.vscode
rl
.DS_Store
# HPC
nautilus/*.yaml

View File

@@ -153,7 +153,7 @@ See `python lerobot/scripts/eval.py --help` for more instructions.
### Train your own policy
Checkout [examples](./examples) to see how tou can start training a model on a dataset, which will be automatically downloaded if needed.
Check out [examples](./examples) to see how you can start training a model on a dataset, which will be automatically downloaded if needed.
In general, you can use our training script to easily train any policy on any environment:
```bash
@@ -165,7 +165,7 @@ policy=act \
hydra.run.dir=outputs/train/aloha_act
```
After training, you may want to revisit model evaluation to change the evaluation settings. In fact, during training every checkpoints are already evaluated but on a low number of episodes for efficiency. Checkout [example](./examples) to evaluate any model checkpoint on more episodes to increase statistical significance.
After training, you may want to revisit model evaluation to change the evaluation settings. In fact, during training every checkpoint is already evaluated but on a low number of episodes for efficiency. Check out [example](./examples) to evaluate any model checkpoint on more episodes to increase statistical significance.
## Contribute

View File

@@ -1,21 +1,17 @@
from dataclasses import dataclass, field
from transformers.configuration_utils import PretrainedConfig
@dataclass
class ActionChunkingTransformerConfig:
class ActionChunkingTransformerConfig(PretrainedConfig):
"""Configuration class for the Action Chunking Transformers policy.
Defaults are configured for training on bimanual Aloha tasks like "insertion" or "transfer".
The parameters you will most likely need to change are the ones which depend on the environment / sensors.
Those are: `state_dim`, `action_dim` and `camera_names`.
Those are: `input_shapes` and 'output_shapes`.
Args:
state_dim: Dimensionality of the observation state space (excluding images).
action_dim: Dimensionality of the action space.
n_obs_steps: Number of environment steps worth of observations to pass to the policy (takes the
current step and additional steps going back).
camera_names: The (unique) set of names for the cameras.
chunk_size: The size of the action prediction "chunks" in units of environment steps.
n_action_steps: The number of action steps to run in the environment for one invocation of the policy.
This should be no greater than the chunk size. For example, if the chunk size size 100, you may
@@ -58,43 +54,41 @@ class ActionChunkingTransformerConfig:
dropout: Dropout to use in the transformer layers (see code for details).
kl_weight: The weight to use for the KL-divergence component of the loss if the variational objective
is enabled. Loss is then calculated as: `reconstruction_loss + kl_weight * kld_loss`.
"""
# Environment.
# TODO(rcadene, alexander-soare): remove these as they are defined in input_shapes, output_shapes
state_dim: int = 14
action_dim: int = 14
Example:
# Inputs / output structure.
```python
>>> from lerobot import ActionChunkingTransformerConfig
>>> # Initializing an ACT style configuration
>>> configuration = ActionChunkingTransformerConfig()
>>> # Initializing a model (with random weights) from the ACT style configuration
>>> model = ActionChunkingTransformerPolicy(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
# Input / output structure.
n_obs_steps: int = 1
camera_names: tuple[str] = ("top",)
chunk_size: int = 100
n_action_steps: int = 100
input_shapes: dict[str, list[str]] = field(
default_factory=lambda: {
"observation.images.top": [3, 480, 640],
"observation.state": [14],
}
)
output_shapes: dict[str, list[str]] = field(
default_factory=lambda: {
"action": [14],
}
)
input_shapes: dict[str, list[str]] = {
"observation.images.top": [3, 480, 640],
"observation.state": [14],
}
output_shapes: dict[str, list[str]] = {"action": [14]}
# Normalization / Unnormalization
normalize_input_modes: dict[str, str] = field(
default_factory=lambda: {
"observation.image": "mean_std",
"observation.state": "mean_std",
}
)
unnormalize_output_modes: dict[str, str] = field(
default_factory=lambda: {
"action": "mean_std",
}
)
normalize_input_modes: dict[str, str] = {
"observation.image": "mean_std",
"observation.state": "mean_std",
}
unnormalize_output_modes: dict[str, str] = {"action": "mean_std"}
# Architecture.
# Vision backbone.
@@ -147,7 +141,10 @@ class ActionChunkingTransformerConfig:
raise ValueError(
f"Multiple observation steps not handled yet. Got `nobs_steps={self.n_obs_steps}`"
)
if self.camera_names != ["top"]:
raise ValueError(f"For now, `camera_names` can only be ['top']. Got {self.camera_names}.")
if len(set(self.camera_names)) != len(self.camera_names):
raise ValueError(f"`camera_names` should not have any repeated entries. Got {self.camera_names}.")
# Check that there is only one image.
# TODO(alexander-soare): generalize this to multiple images.
if (
sum(k.startswith("observation.images.") for k in self.input_shapes) != 1
or "observation.images.top" not in self.input_shapes
):
raise ValueError('For now, only "observation.images.top" is accepted for an image input.')

View File

@@ -5,6 +5,7 @@ The majority of changes here involve removing unused code, unifying naming, and
"""
import math
import time
from collections import deque
from itertools import chain
from typing import Callable
@@ -80,9 +81,13 @@ class ActionChunkingTransformerPolicy(nn.Module):
self.vae_encoder = _TransformerEncoder(cfg)
self.vae_encoder_cls_embed = nn.Embedding(1, cfg.d_model)
# Projection layer for joint-space configuration to hidden dimension.
self.vae_encoder_robot_state_input_proj = nn.Linear(cfg.state_dim, cfg.d_model)
self.vae_encoder_robot_state_input_proj = nn.Linear(
cfg.input_shapes["observation.state"][0], cfg.d_model
)
# Projection layer for action (joint-space target) to hidden dimension.
self.vae_encoder_action_input_proj = nn.Linear(cfg.state_dim, cfg.d_model)
self.vae_encoder_action_input_proj = nn.Linear(
cfg.input_shapes["observation.state"][0], cfg.d_model
)
self.latent_dim = cfg.latent_dim
# Projection layer from the VAE encoder's output to the latent distribution's parameter space.
self.vae_encoder_latent_output_proj = nn.Linear(cfg.d_model, self.latent_dim * 2)
@@ -110,7 +115,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
# Transformer encoder input projections. The tokens will be structured like
# [latent, robot_state, image_feature_map_pixels].
self.encoder_robot_state_input_proj = nn.Linear(cfg.state_dim, cfg.d_model)
self.encoder_robot_state_input_proj = nn.Linear(cfg.input_shapes["observation.state"][0], cfg.d_model)
self.encoder_latent_input_proj = nn.Linear(self.latent_dim, cfg.d_model)
self.encoder_img_feat_input_proj = nn.Conv2d(
backbone_model.fc.in_features, cfg.d_model, kernel_size=1
@@ -124,9 +129,28 @@ class ActionChunkingTransformerPolicy(nn.Module):
self.decoder_pos_embed = nn.Embedding(cfg.chunk_size, cfg.d_model)
# Final action regression head on the output of the transformer's decoder.
self.action_head = nn.Linear(cfg.d_model, cfg.action_dim)
self.action_head = nn.Linear(cfg.d_model, cfg.output_shapes["action"][0])
self._reset_parameters()
self._create_optimizer()
def _create_optimizer(self):
optimizer_params_dicts = [
{
"params": [
p for n, p in self.named_parameters() if not n.startswith("backbone") and p.requires_grad
]
},
{
"params": [
p for n, p in self.named_parameters() if n.startswith("backbone") and p.requires_grad
],
"lr": self.cfg.lr_backbone,
},
]
self.optimizer = torch.optim.AdamW(
optimizer_params_dicts, lr=self.cfg.lr, weight_decay=self.cfg.weight_decay
)
def _reset_parameters(self):
"""Xavier-uniform initialization of the transformer parameters as in the original code."""
@@ -186,6 +210,33 @@ class ActionChunkingTransformerPolicy(nn.Module):
return loss_dict
def update(self, batch, **_) -> dict:
"""Run the model in train mode, compute the loss, and do an optimization step."""
start_time = time.time()
self.train()
batch = self.normalize_inputs(batch)
loss_dict = self.forward(batch)
# TODO(rcadene): self.unnormalize_outputs(out_dict)
loss = loss_dict["loss"]
loss.backward()
grad_norm = torch.nn.utils.clip_grad_norm_(
self.parameters(), self.cfg.grad_clip_norm, error_if_nonfinite=False
)
self.optimizer.step()
self.optimizer.zero_grad()
info = {
"loss": loss.item(),
"grad_norm": float(grad_norm),
"lr": self.cfg.lr,
"update_s": time.time() - start_time,
}
return info
def _stack_images(self, batch: dict[str, Tensor]) -> dict[str, Tensor]:
"""Stacks all the images in a batch and puts them in a new key: "observation.images".
@@ -196,17 +247,9 @@ class ActionChunkingTransformerPolicy(nn.Module):
"observation.images.{name}": (B, C, H, W) tensor of images.
}
"""
# Check that there is only one image.
# TODO(alexander-soare): generalize this to multiple images.
provided_cameras = {k.rsplit(".", 1)[-1] for k in batch if k.startswith("observation.images.")}
if len(missing := set(self.cfg.camera_names).difference(provided_cameras)) > 0:
raise ValueError(
f"The following camera images are missing from the provided batch: {missing}. Check the "
"configuration parameter: `camera_names`."
)
# Stack images in the order dictated by the camera names.
# Stack images in the order dictated by input_shapes.
batch["observation.images"] = torch.stack(
[batch[f"observation.images.{name}"] for name in self.cfg.camera_names],
[batch[k] for k in self.cfg.input_shapes if k.startswith("observation.images.")],
dim=-4,
)
@@ -274,7 +317,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
all_cam_features = []
all_cam_pos_embeds = []
images = batch["observation.images"]
for cam_index in range(len(self.cfg.camera_names)):
for cam_index in range(images.shape[-4]):
cam_features = self.backbone(images[:, cam_index])["feature_map"]
cam_pos_embed = self.encoder_cam_feat_pos_embed(cam_features).to(dtype=cam_features.dtype)
cam_features = self.encoder_img_feat_input_proj(cam_features) # (B, C, h, w)

View File

@@ -8,12 +8,9 @@ class DiffusionConfig:
Defaults are configured for training with PushT providing proprioceptive and single camera observations.
The parameters you will most likely need to change are the ones which depend on the environment / sensors.
Those are: `state_dim`, `action_dim` and `image_size`.
Those are: `input_shapes` and `output_shapes`.
Args:
state_dim: Dimensionality of the observation state space (excluding images).
action_dim: Dimensionality of the action space.
image_size: (H, W) size of the input images.
n_obs_steps: Number of environment steps worth of observations to pass to the policy (takes the
current step and additional steps going back).
horizon: Diffusion model action prediction size as detailed in `DiffusionPolicy.select_action`.
@@ -68,13 +65,6 @@ class DiffusionConfig:
spaced). If not provided, this defaults to be the same as `num_train_timesteps`.
"""
# Environment.
# Inherit these from the environment config.
# TODO(rcadene, alexander-soare): remove these as they are defined in input_shapes, output_shapes
state_dim: int = 2
action_dim: int = 2
image_size: tuple[int, int] = (96, 96)
# Inputs / output structure.
n_obs_steps: int = 2
horizon: int = 16
@@ -155,10 +145,14 @@ class DiffusionConfig:
raise ValueError(
f"`vision_backbone` must be one of the ResNet variants. Got {self.vision_backbone}."
)
if self.crop_shape[0] > self.image_size[0] or self.crop_shape[1] > self.image_size[1]:
if (
self.crop_shape[0] > self.input_shapes["observation.image"][1]
or self.crop_shape[1] > self.input_shapes["observation.image"][2]
):
raise ValueError(
f"`crop_shape` should fit within `image_size`. Got {self.crop_shape} for `crop_shape` and "
f"{self.image_size} for `image_size`."
f'`crop_shape` should fit within `input_shapes["observation.image"]`. Got {self.crop_shape} '
f'for `crop_shape` and {self.input_shapes["observation.image"]} for '
'`input_shapes["observation.image"]`.'
)
supported_prediction_types = ["epsilon", "sample"]
if self.prediction_type not in supported_prediction_types:

View File

@@ -11,6 +11,7 @@ TODO(alexander-soare):
import copy
import logging
import math
import time
from collections import deque
from typing import Callable
@@ -18,6 +19,7 @@ import einops
import torch
import torch.nn.functional as F # noqa: N812
import torchvision
from diffusers.optimization import get_scheduler
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from robomimic.models.base_nets import SpatialSoftmax
from torch import Tensor, nn
@@ -69,7 +71,25 @@ class DiffusionPolicy(nn.Module):
self.ema_diffusion = copy.deepcopy(self.diffusion)
self.ema = _EMA(cfg, model=self.ema_diffusion)
# TODO(alexander-soare): Move optimizer out of policy.
self.optimizer = torch.optim.Adam(
self.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay
)
# TODO(alexander-soare): Move LR scheduler out of policy.
# TODO(rcadene): modify lr scheduler so that it doesn't depend on epochs but steps
self.global_step = 0
# configure lr scheduler
self.lr_scheduler = get_scheduler(
cfg.lr_scheduler,
optimizer=self.optimizer,
num_warmup_steps=cfg.lr_warmup_steps,
num_training_steps=lr_scheduler_num_training_steps,
# pytorch assumes stepping LRScheduler every epoch
# however huggingface diffusers steps it every batch
last_epoch=self.global_step - 1,
)
def reset(self):
"""
@@ -135,6 +155,41 @@ class DiffusionPolicy(nn.Module):
loss = self.diffusion.compute_loss(batch)
return {"loss": loss}
def update(self, batch: dict[str, Tensor], **_) -> dict:
"""Run the model in train mode, compute the loss, and do an optimization step."""
start_time = time.time()
self.diffusion.train()
batch = self.normalize_inputs(batch)
loss = self.forward(batch)["loss"]
loss.backward()
# TODO(rcadene): self.unnormalize_outputs(out_dict)
grad_norm = torch.nn.utils.clip_grad_norm_(
self.diffusion.parameters(),
self.cfg.grad_clip_norm,
error_if_nonfinite=False,
)
self.optimizer.step()
self.optimizer.zero_grad()
self.lr_scheduler.step()
if self.ema is not None:
self.ema.step(self.diffusion)
info = {
"loss": loss.item(),
"grad_norm": float(grad_norm),
"lr": self.lr_scheduler.get_last_lr()[0],
"update_s": time.time() - start_time,
}
return info
def save(self, fp):
torch.save(self.state_dict(), fp)
@@ -156,7 +211,8 @@ class _DiffusionUnetImagePolicy(nn.Module):
self.rgb_encoder = _RgbEncoder(cfg)
self.unet = _ConditionalUnet1D(
cfg, global_cond_dim=(cfg.action_dim + self.rgb_encoder.feature_dim) * cfg.n_obs_steps
cfg,
global_cond_dim=(cfg.output_shapes["action"][0] + self.rgb_encoder.feature_dim) * cfg.n_obs_steps,
)
self.noise_scheduler = DDPMScheduler(
@@ -184,7 +240,7 @@ class _DiffusionUnetImagePolicy(nn.Module):
# Sample prior.
sample = torch.randn(
size=(batch_size, self.cfg.horizon, self.cfg.action_dim),
size=(batch_size, self.cfg.horizon, self.cfg.output_shapes["action"][0]),
dtype=dtype,
device=device,
generator=generator,
@@ -227,7 +283,7 @@ class _DiffusionUnetImagePolicy(nn.Module):
sample = self.conditional_sample(batch_size, global_cond=global_cond)
# `horizon` steps worth of actions (from the first observation).
actions = sample[..., : self.cfg.action_dim]
actions = sample[..., : self.cfg.output_shapes["action"][0]]
# Extract `n_action_steps` steps worth of actions (from the current observation).
start = n_obs_steps - 1
end = start + self.cfg.n_action_steps
@@ -337,7 +393,9 @@ class _RgbEncoder(nn.Module):
# Set up pooling and final layers.
# Use a dry run to get the feature map shape.
with torch.inference_mode():
feat_map_shape = tuple(self.backbone(torch.zeros(size=(1, 3, *cfg.image_size))).shape[1:])
feat_map_shape = tuple(
self.backbone(torch.zeros(size=(1, *cfg.input_shapes["observation.image"]))).shape[1:]
)
self.pool = SpatialSoftmax(feat_map_shape, num_kp=cfg.spatial_softmax_num_keypoints)
self.feature_dim = cfg.spatial_softmax_num_keypoints * 2
self.out = nn.Linear(cfg.spatial_softmax_num_keypoints * 2, self.feature_dim)
@@ -454,7 +512,7 @@ class _ConditionalUnet1D(nn.Module):
# In channels / out channels for each downsampling block in the Unet's encoder. For the decoder, we
# just reverse these.
in_out = [(cfg.action_dim, cfg.down_dims[0])] + list(
in_out = [(cfg.output_shapes["action"][0], cfg.down_dims[0])] + list(
zip(cfg.down_dims[:-1], cfg.down_dims[1:], strict=True)
)
@@ -505,7 +563,7 @@ class _ConditionalUnet1D(nn.Module):
self.final_conv = nn.Sequential(
_Conv1dBlock(cfg.down_dims[0], cfg.down_dims[0], kernel_size=cfg.kernel_size),
nn.Conv1d(cfg.down_dims[0], cfg.action_dim, 1),
nn.Conv1d(cfg.down_dims[0], cfg.output_shapes["action"][0], 1),
)
def forward(self, x: Tensor, timestep: Tensor | int, global_cond=None) -> Tensor:

View File

@@ -21,7 +21,5 @@ env:
image_size: [3, 480, 640]
episode_length: 400
fps: ${fps}
policy:
state_dim: 14
action_dim: 14

View File

@@ -21,7 +21,5 @@ env:
image_size: 96
episode_length: 300
fps: ${fps}
policy:
state_dim: 2
action_dim: 2

View File

@@ -20,7 +20,5 @@ env:
image_size: 84
episode_length: 25
fps: ${fps}
policy:
state_dim: 4
action_dim: 4

View File

@@ -23,23 +23,17 @@ policy:
pretrained_model_path:
# Environment.
# Inherit these from the environment config.
state_dim: ???
action_dim: ???
# Inputs / output structure.
# Input / output structure.
n_obs_steps: ${n_obs_steps}
camera_names: [top] # [top, front_close, left_pillar, right_pillar]
chunk_size: 100 # chunk_size
n_action_steps: 100
input_shapes:
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
observation.images.top: [3, 480, 640]
observation.state: ["${policy.state_dim}"]
observation.state: ["${env.state_dim}"]
output_shapes:
action: ["${policy.action_dim}"]
action: ["${env.action_dim}"]
# Normalization / Unnormalization
normalize_input_modes:

View File

@@ -37,15 +37,7 @@ policy:
pretrained_model_path:
# Environment.
# Inherit these from the environment config.
state_dim: ???
action_dim: ???
image_size:
- ${env.image_size} # height
- ${env.image_size} # width
# Inputs / output structure.
# Input / output structure.
n_obs_steps: ${n_obs_steps}
horizon: ${horizon}
n_action_steps: ${n_action_steps}
@@ -53,9 +45,9 @@ policy:
input_shapes:
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
observation.image: [3, 96, 96]
observation.state: ["${policy.state_dim}"]
observation.state: ["${env.state_dim}"]
output_shapes:
action: ["${policy.action_dim}"]
action: ["${env.action_dim}"]
# Normalization / Unnormalization
normalize_input_modes:

View File

@@ -16,8 +16,8 @@ policy:
frame_stack: 1
num_channels: 32
img_size: ${env.image_size}
state_dim: ???
action_dim: ???
state_dim: ${env.action_dim}
action_dim: ${env.action_dim}
# planning
mpc: true

View File

@@ -1,5 +1,4 @@
import logging
import time
from copy import deepcopy
from pathlib import Path
@@ -8,7 +7,6 @@ import hydra
import torch
from datasets import concatenate_datasets
from datasets.utils import disable_progress_bars, enable_progress_bars
from diffusers.optimization import get_scheduler
from lerobot.common.datasets.factory import make_dataset
from lerobot.common.datasets.utils import cycle
@@ -24,45 +22,6 @@ from lerobot.common.utils.utils import (
from lerobot.scripts.eval import eval_policy
def update_policy(policy, batch, optimizer, grad_clip_norm, lr_scheduler=None):
start_time = time.time()
model = policy.diffusion if hasattr(policy, "diffusion") else policy # TODO: hacky, remove this line
model.train()
batch = policy.normalize_inputs(batch)
output_dict = policy.forward(batch)
# TODO(rcadene): policy.unnormalize_outputs(out_dict)
loss = output_dict["loss"]
loss.backward()
# Diffusion
model = policy.diffusion if hasattr(policy, "diffusion") else policy # TODO: hacky, remove this line
grad_norm = torch.nn.utils.clip_grad_norm_(
model.parameters(),
grad_clip_norm,
error_if_nonfinite=False,
)
optimizer.step()
optimizer.zero_grad()
if lr_scheduler is not None:
lr_scheduler.step()
if hasattr(policy, "ema") and policy.ema is not None:
policy.ema.step(model)
info = {
"loss": loss.item(),
"grad_norm": float(grad_norm),
"lr": optimizer.param_groups[0]['lr'],
"update_s": time.time() - start_time,
}
return info
@hydra.main(version_base=None, config_name="default", config_path="../configs")
def train_cli(cfg: dict):
train(
@@ -275,35 +234,6 @@ def train(cfg: dict, out_dir=None, job_name=None):
logging.info("make_policy")
policy = make_policy(cfg, dataset_stats=offline_dataset.stats)
# Create optimizer and scheduler
# Temporary hack to move optimizer out of policy
if cfg.policy.name == "act":
optimizer_params_dicts = [
{"params": [p for n, p in policy.named_parameters() if not n.startswith("backbone") and p.requires_grad]},
{
"params": [p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad],
"lr": cfg.lr_backbone,
},
]
optimizer = torch.optim.AdamW(optimizer_params_dicts, lr=cfg.lr, weight_decay=cfg.weight_decay)
lr_scheduler = None
elif cfg.policy.name == "diffusion":
optimizer = torch.optim.Adam(
policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay
)
# TODO(rcadene): modify lr scheduler so that it doesn't depend on epochs but steps
global_step = 0
# configure lr scheduler
lr_scheduler = get_scheduler(
cfg.lr_scheduler,
optimizer=optimizer,
num_warmup_steps=cfg.lr_warmup_steps,
num_training_steps=cfg.offline_steps,
# pytorch assumes stepping LRScheduler every epoch
# however huggingface diffusers steps it every batch
last_epoch=global_step - 1,
)
num_learnable_params = sum(p.numel() for p in policy.parameters() if p.requires_grad)
num_total_params = sum(p.numel() for p in policy.parameters())
@@ -363,7 +293,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
for key in batch:
batch[key] = batch[key].to(cfg.device, non_blocking=True)
train_info = update_policy(policy, batch, optimizer, cfg.grad_clip_norm, lr_scheduler)
train_info = policy.update(batch, step=step)
# TODO(rcadene): is it ok if step_t=0 = 0 and not 1 as previously done?
if step % cfg.log_freq == 0:
@@ -386,7 +316,9 @@ def train(cfg: dict, out_dir=None, job_name=None):
# create dataloader for online training
concat_dataset = torch.utils.data.ConcatDataset([offline_dataset, online_dataset])
weights = [1.0] * len(concat_dataset)
sampler = torch.utils.data.WeightedRandomSampler(weights, num_samples=len(concat_dataset), replacement=True)
sampler = torch.utils.data.WeightedRandomSampler(
weights, num_samples=len(concat_dataset), replacement=True
)
dataloader = torch.utils.data.DataLoader(
concat_dataset,
num_workers=4,

View File

@@ -121,7 +121,13 @@ def test_policy(env_name, policy_name, extra_overrides):
],
)
def test_normalize(insert_temporal_dim):
# TODO(rcadene, alexander-soare): test with real data and assert results of normalization/unnormalization
"""
Test that normalize/unnormalize can run without exceptions when properly set up, and that they raise
an exception when the forward pass is called without the stats having been provided.
TODO(rcadene, alexander-soare): This should also test that the normalization / unnormalization works as
expected.
"""
input_shapes = {
"observation.image": [3, 96, 96],
@@ -193,7 +199,7 @@ def test_normalize(insert_temporal_dim):
new_normalize.load_state_dict(normalize.state_dict())
new_normalize(input_batch)
# test wihtout stats
# test without stats
unnormalize = Unnormalize(output_shapes, unnormalize_output_modes, stats=None)
with pytest.raises(AssertionError):
unnormalize(output_batch)