Compare commits
3 Commits
qgallouede
...
qgallouede
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
783a40c9d4 | ||
|
|
659c69a1c0 | ||
|
|
e760e4cd63 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,7 +6,6 @@ data
|
||||
outputs
|
||||
.vscode
|
||||
rl
|
||||
.DS_Store
|
||||
|
||||
# HPC
|
||||
nautilus/*.yaml
|
||||
|
||||
@@ -153,7 +153,7 @@ See `python lerobot/scripts/eval.py --help` for more instructions.
|
||||
|
||||
### Train your own policy
|
||||
|
||||
Checkout [examples](./examples) to see how tou can start training a model on a dataset, which will be automatically downloaded if needed.
|
||||
Check out [examples](./examples) to see how you can start training a model on a dataset, which will be automatically downloaded if needed.
|
||||
|
||||
In general, you can use our training script to easily train any policy on any environment:
|
||||
```bash
|
||||
@@ -165,7 +165,7 @@ policy=act \
|
||||
hydra.run.dir=outputs/train/aloha_act
|
||||
```
|
||||
|
||||
After training, you may want to revisit model evaluation to change the evaluation settings. In fact, during training every checkpoints are already evaluated but on a low number of episodes for efficiency. Checkout [example](./examples) to evaluate any model checkpoint on more episodes to increase statistical significance.
|
||||
After training, you may want to revisit model evaluation to change the evaluation settings. In fact, during training every checkpoint is already evaluated but on a low number of episodes for efficiency. Check out [example](./examples) to evaluate any model checkpoint on more episodes to increase statistical significance.
|
||||
|
||||
## Contribute
|
||||
|
||||
|
||||
@@ -1,21 +1,17 @@
|
||||
from dataclasses import dataclass, field
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActionChunkingTransformerConfig:
|
||||
class ActionChunkingTransformerConfig(PretrainedConfig):
|
||||
"""Configuration class for the Action Chunking Transformers policy.
|
||||
|
||||
Defaults are configured for training on bimanual Aloha tasks like "insertion" or "transfer".
|
||||
|
||||
The parameters you will most likely need to change are the ones which depend on the environment / sensors.
|
||||
Those are: `state_dim`, `action_dim` and `camera_names`.
|
||||
Those are: `input_shapes` and 'output_shapes`.
|
||||
|
||||
Args:
|
||||
state_dim: Dimensionality of the observation state space (excluding images).
|
||||
action_dim: Dimensionality of the action space.
|
||||
n_obs_steps: Number of environment steps worth of observations to pass to the policy (takes the
|
||||
current step and additional steps going back).
|
||||
camera_names: The (unique) set of names for the cameras.
|
||||
chunk_size: The size of the action prediction "chunks" in units of environment steps.
|
||||
n_action_steps: The number of action steps to run in the environment for one invocation of the policy.
|
||||
This should be no greater than the chunk size. For example, if the chunk size size 100, you may
|
||||
@@ -58,43 +54,41 @@ class ActionChunkingTransformerConfig:
|
||||
dropout: Dropout to use in the transformer layers (see code for details).
|
||||
kl_weight: The weight to use for the KL-divergence component of the loss if the variational objective
|
||||
is enabled. Loss is then calculated as: `reconstruction_loss + kl_weight * kld_loss`.
|
||||
"""
|
||||
|
||||
# Environment.
|
||||
# TODO(rcadene, alexander-soare): remove these as they are defined in input_shapes, output_shapes
|
||||
state_dim: int = 14
|
||||
action_dim: int = 14
|
||||
Example:
|
||||
|
||||
# Inputs / output structure.
|
||||
```python
|
||||
>>> from lerobot import ActionChunkingTransformerConfig
|
||||
|
||||
>>> # Initializing an ACT style configuration
|
||||
>>> configuration = ActionChunkingTransformerConfig()
|
||||
|
||||
>>> # Initializing a model (with random weights) from the ACT style configuration
|
||||
>>> model = ActionChunkingTransformerPolicy(configuration)
|
||||
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
# Input / output structure.
|
||||
n_obs_steps: int = 1
|
||||
camera_names: tuple[str] = ("top",)
|
||||
chunk_size: int = 100
|
||||
n_action_steps: int = 100
|
||||
|
||||
input_shapes: dict[str, list[str]] = field(
|
||||
default_factory=lambda: {
|
||||
"observation.images.top": [3, 480, 640],
|
||||
"observation.state": [14],
|
||||
}
|
||||
)
|
||||
output_shapes: dict[str, list[str]] = field(
|
||||
default_factory=lambda: {
|
||||
"action": [14],
|
||||
}
|
||||
)
|
||||
input_shapes: dict[str, list[str]] = {
|
||||
"observation.images.top": [3, 480, 640],
|
||||
"observation.state": [14],
|
||||
}
|
||||
|
||||
output_shapes: dict[str, list[str]] = {"action": [14]}
|
||||
|
||||
# Normalization / Unnormalization
|
||||
normalize_input_modes: dict[str, str] = field(
|
||||
default_factory=lambda: {
|
||||
"observation.image": "mean_std",
|
||||
"observation.state": "mean_std",
|
||||
}
|
||||
)
|
||||
unnormalize_output_modes: dict[str, str] = field(
|
||||
default_factory=lambda: {
|
||||
"action": "mean_std",
|
||||
}
|
||||
)
|
||||
normalize_input_modes: dict[str, str] = {
|
||||
"observation.image": "mean_std",
|
||||
"observation.state": "mean_std",
|
||||
}
|
||||
|
||||
unnormalize_output_modes: dict[str, str] = {"action": "mean_std"}
|
||||
|
||||
# Architecture.
|
||||
# Vision backbone.
|
||||
@@ -147,7 +141,10 @@ class ActionChunkingTransformerConfig:
|
||||
raise ValueError(
|
||||
f"Multiple observation steps not handled yet. Got `nobs_steps={self.n_obs_steps}`"
|
||||
)
|
||||
if self.camera_names != ["top"]:
|
||||
raise ValueError(f"For now, `camera_names` can only be ['top']. Got {self.camera_names}.")
|
||||
if len(set(self.camera_names)) != len(self.camera_names):
|
||||
raise ValueError(f"`camera_names` should not have any repeated entries. Got {self.camera_names}.")
|
||||
# Check that there is only one image.
|
||||
# TODO(alexander-soare): generalize this to multiple images.
|
||||
if (
|
||||
sum(k.startswith("observation.images.") for k in self.input_shapes) != 1
|
||||
or "observation.images.top" not in self.input_shapes
|
||||
):
|
||||
raise ValueError('For now, only "observation.images.top" is accepted for an image input.')
|
||||
|
||||
@@ -5,6 +5,7 @@ The majority of changes here involve removing unused code, unifying naming, and
|
||||
"""
|
||||
|
||||
import math
|
||||
import time
|
||||
from collections import deque
|
||||
from itertools import chain
|
||||
from typing import Callable
|
||||
@@ -80,9 +81,13 @@ class ActionChunkingTransformerPolicy(nn.Module):
|
||||
self.vae_encoder = _TransformerEncoder(cfg)
|
||||
self.vae_encoder_cls_embed = nn.Embedding(1, cfg.d_model)
|
||||
# Projection layer for joint-space configuration to hidden dimension.
|
||||
self.vae_encoder_robot_state_input_proj = nn.Linear(cfg.state_dim, cfg.d_model)
|
||||
self.vae_encoder_robot_state_input_proj = nn.Linear(
|
||||
cfg.input_shapes["observation.state"][0], cfg.d_model
|
||||
)
|
||||
# Projection layer for action (joint-space target) to hidden dimension.
|
||||
self.vae_encoder_action_input_proj = nn.Linear(cfg.state_dim, cfg.d_model)
|
||||
self.vae_encoder_action_input_proj = nn.Linear(
|
||||
cfg.input_shapes["observation.state"][0], cfg.d_model
|
||||
)
|
||||
self.latent_dim = cfg.latent_dim
|
||||
# Projection layer from the VAE encoder's output to the latent distribution's parameter space.
|
||||
self.vae_encoder_latent_output_proj = nn.Linear(cfg.d_model, self.latent_dim * 2)
|
||||
@@ -110,7 +115,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
|
||||
|
||||
# Transformer encoder input projections. The tokens will be structured like
|
||||
# [latent, robot_state, image_feature_map_pixels].
|
||||
self.encoder_robot_state_input_proj = nn.Linear(cfg.state_dim, cfg.d_model)
|
||||
self.encoder_robot_state_input_proj = nn.Linear(cfg.input_shapes["observation.state"][0], cfg.d_model)
|
||||
self.encoder_latent_input_proj = nn.Linear(self.latent_dim, cfg.d_model)
|
||||
self.encoder_img_feat_input_proj = nn.Conv2d(
|
||||
backbone_model.fc.in_features, cfg.d_model, kernel_size=1
|
||||
@@ -124,9 +129,28 @@ class ActionChunkingTransformerPolicy(nn.Module):
|
||||
self.decoder_pos_embed = nn.Embedding(cfg.chunk_size, cfg.d_model)
|
||||
|
||||
# Final action regression head on the output of the transformer's decoder.
|
||||
self.action_head = nn.Linear(cfg.d_model, cfg.action_dim)
|
||||
self.action_head = nn.Linear(cfg.d_model, cfg.output_shapes["action"][0])
|
||||
|
||||
self._reset_parameters()
|
||||
self._create_optimizer()
|
||||
|
||||
def _create_optimizer(self):
|
||||
optimizer_params_dicts = [
|
||||
{
|
||||
"params": [
|
||||
p for n, p in self.named_parameters() if not n.startswith("backbone") and p.requires_grad
|
||||
]
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
p for n, p in self.named_parameters() if n.startswith("backbone") and p.requires_grad
|
||||
],
|
||||
"lr": self.cfg.lr_backbone,
|
||||
},
|
||||
]
|
||||
self.optimizer = torch.optim.AdamW(
|
||||
optimizer_params_dicts, lr=self.cfg.lr, weight_decay=self.cfg.weight_decay
|
||||
)
|
||||
|
||||
def _reset_parameters(self):
|
||||
"""Xavier-uniform initialization of the transformer parameters as in the original code."""
|
||||
@@ -186,6 +210,33 @@ class ActionChunkingTransformerPolicy(nn.Module):
|
||||
|
||||
return loss_dict
|
||||
|
||||
def update(self, batch, **_) -> dict:
|
||||
"""Run the model in train mode, compute the loss, and do an optimization step."""
|
||||
start_time = time.time()
|
||||
self.train()
|
||||
|
||||
batch = self.normalize_inputs(batch)
|
||||
|
||||
loss_dict = self.forward(batch)
|
||||
# TODO(rcadene): self.unnormalize_outputs(out_dict)
|
||||
loss = loss_dict["loss"]
|
||||
loss.backward()
|
||||
|
||||
grad_norm = torch.nn.utils.clip_grad_norm_(
|
||||
self.parameters(), self.cfg.grad_clip_norm, error_if_nonfinite=False
|
||||
)
|
||||
|
||||
self.optimizer.step()
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
info = {
|
||||
"loss": loss.item(),
|
||||
"grad_norm": float(grad_norm),
|
||||
"lr": self.cfg.lr,
|
||||
"update_s": time.time() - start_time,
|
||||
}
|
||||
|
||||
return info
|
||||
|
||||
def _stack_images(self, batch: dict[str, Tensor]) -> dict[str, Tensor]:
|
||||
"""Stacks all the images in a batch and puts them in a new key: "observation.images".
|
||||
@@ -196,17 +247,9 @@ class ActionChunkingTransformerPolicy(nn.Module):
|
||||
"observation.images.{name}": (B, C, H, W) tensor of images.
|
||||
}
|
||||
"""
|
||||
# Check that there is only one image.
|
||||
# TODO(alexander-soare): generalize this to multiple images.
|
||||
provided_cameras = {k.rsplit(".", 1)[-1] for k in batch if k.startswith("observation.images.")}
|
||||
if len(missing := set(self.cfg.camera_names).difference(provided_cameras)) > 0:
|
||||
raise ValueError(
|
||||
f"The following camera images are missing from the provided batch: {missing}. Check the "
|
||||
"configuration parameter: `camera_names`."
|
||||
)
|
||||
# Stack images in the order dictated by the camera names.
|
||||
# Stack images in the order dictated by input_shapes.
|
||||
batch["observation.images"] = torch.stack(
|
||||
[batch[f"observation.images.{name}"] for name in self.cfg.camera_names],
|
||||
[batch[k] for k in self.cfg.input_shapes if k.startswith("observation.images.")],
|
||||
dim=-4,
|
||||
)
|
||||
|
||||
@@ -274,7 +317,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
|
||||
all_cam_features = []
|
||||
all_cam_pos_embeds = []
|
||||
images = batch["observation.images"]
|
||||
for cam_index in range(len(self.cfg.camera_names)):
|
||||
for cam_index in range(images.shape[-4]):
|
||||
cam_features = self.backbone(images[:, cam_index])["feature_map"]
|
||||
cam_pos_embed = self.encoder_cam_feat_pos_embed(cam_features).to(dtype=cam_features.dtype)
|
||||
cam_features = self.encoder_img_feat_input_proj(cam_features) # (B, C, h, w)
|
||||
|
||||
@@ -8,12 +8,9 @@ class DiffusionConfig:
|
||||
Defaults are configured for training with PushT providing proprioceptive and single camera observations.
|
||||
|
||||
The parameters you will most likely need to change are the ones which depend on the environment / sensors.
|
||||
Those are: `state_dim`, `action_dim` and `image_size`.
|
||||
Those are: `input_shapes` and `output_shapes`.
|
||||
|
||||
Args:
|
||||
state_dim: Dimensionality of the observation state space (excluding images).
|
||||
action_dim: Dimensionality of the action space.
|
||||
image_size: (H, W) size of the input images.
|
||||
n_obs_steps: Number of environment steps worth of observations to pass to the policy (takes the
|
||||
current step and additional steps going back).
|
||||
horizon: Diffusion model action prediction size as detailed in `DiffusionPolicy.select_action`.
|
||||
@@ -68,13 +65,6 @@ class DiffusionConfig:
|
||||
spaced). If not provided, this defaults to be the same as `num_train_timesteps`.
|
||||
"""
|
||||
|
||||
# Environment.
|
||||
# Inherit these from the environment config.
|
||||
# TODO(rcadene, alexander-soare): remove these as they are defined in input_shapes, output_shapes
|
||||
state_dim: int = 2
|
||||
action_dim: int = 2
|
||||
image_size: tuple[int, int] = (96, 96)
|
||||
|
||||
# Inputs / output structure.
|
||||
n_obs_steps: int = 2
|
||||
horizon: int = 16
|
||||
@@ -155,10 +145,14 @@ class DiffusionConfig:
|
||||
raise ValueError(
|
||||
f"`vision_backbone` must be one of the ResNet variants. Got {self.vision_backbone}."
|
||||
)
|
||||
if self.crop_shape[0] > self.image_size[0] or self.crop_shape[1] > self.image_size[1]:
|
||||
if (
|
||||
self.crop_shape[0] > self.input_shapes["observation.image"][1]
|
||||
or self.crop_shape[1] > self.input_shapes["observation.image"][2]
|
||||
):
|
||||
raise ValueError(
|
||||
f"`crop_shape` should fit within `image_size`. Got {self.crop_shape} for `crop_shape` and "
|
||||
f"{self.image_size} for `image_size`."
|
||||
f'`crop_shape` should fit within `input_shapes["observation.image"]`. Got {self.crop_shape} '
|
||||
f'for `crop_shape` and {self.input_shapes["observation.image"]} for '
|
||||
'`input_shapes["observation.image"]`.'
|
||||
)
|
||||
supported_prediction_types = ["epsilon", "sample"]
|
||||
if self.prediction_type not in supported_prediction_types:
|
||||
|
||||
@@ -11,6 +11,7 @@ TODO(alexander-soare):
|
||||
import copy
|
||||
import logging
|
||||
import math
|
||||
import time
|
||||
from collections import deque
|
||||
from typing import Callable
|
||||
|
||||
@@ -18,6 +19,7 @@ import einops
|
||||
import torch
|
||||
import torch.nn.functional as F # noqa: N812
|
||||
import torchvision
|
||||
from diffusers.optimization import get_scheduler
|
||||
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
|
||||
from robomimic.models.base_nets import SpatialSoftmax
|
||||
from torch import Tensor, nn
|
||||
@@ -69,7 +71,25 @@ class DiffusionPolicy(nn.Module):
|
||||
self.ema_diffusion = copy.deepcopy(self.diffusion)
|
||||
self.ema = _EMA(cfg, model=self.ema_diffusion)
|
||||
|
||||
# TODO(alexander-soare): Move optimizer out of policy.
|
||||
self.optimizer = torch.optim.Adam(
|
||||
self.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay
|
||||
)
|
||||
|
||||
# TODO(alexander-soare): Move LR scheduler out of policy.
|
||||
# TODO(rcadene): modify lr scheduler so that it doesn't depend on epochs but steps
|
||||
self.global_step = 0
|
||||
|
||||
# configure lr scheduler
|
||||
self.lr_scheduler = get_scheduler(
|
||||
cfg.lr_scheduler,
|
||||
optimizer=self.optimizer,
|
||||
num_warmup_steps=cfg.lr_warmup_steps,
|
||||
num_training_steps=lr_scheduler_num_training_steps,
|
||||
# pytorch assumes stepping LRScheduler every epoch
|
||||
# however huggingface diffusers steps it every batch
|
||||
last_epoch=self.global_step - 1,
|
||||
)
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
@@ -135,6 +155,41 @@ class DiffusionPolicy(nn.Module):
|
||||
loss = self.diffusion.compute_loss(batch)
|
||||
return {"loss": loss}
|
||||
|
||||
def update(self, batch: dict[str, Tensor], **_) -> dict:
|
||||
"""Run the model in train mode, compute the loss, and do an optimization step."""
|
||||
start_time = time.time()
|
||||
|
||||
self.diffusion.train()
|
||||
|
||||
batch = self.normalize_inputs(batch)
|
||||
|
||||
loss = self.forward(batch)["loss"]
|
||||
loss.backward()
|
||||
|
||||
# TODO(rcadene): self.unnormalize_outputs(out_dict)
|
||||
|
||||
grad_norm = torch.nn.utils.clip_grad_norm_(
|
||||
self.diffusion.parameters(),
|
||||
self.cfg.grad_clip_norm,
|
||||
error_if_nonfinite=False,
|
||||
)
|
||||
|
||||
self.optimizer.step()
|
||||
self.optimizer.zero_grad()
|
||||
self.lr_scheduler.step()
|
||||
|
||||
if self.ema is not None:
|
||||
self.ema.step(self.diffusion)
|
||||
|
||||
info = {
|
||||
"loss": loss.item(),
|
||||
"grad_norm": float(grad_norm),
|
||||
"lr": self.lr_scheduler.get_last_lr()[0],
|
||||
"update_s": time.time() - start_time,
|
||||
}
|
||||
|
||||
return info
|
||||
|
||||
def save(self, fp):
|
||||
torch.save(self.state_dict(), fp)
|
||||
|
||||
@@ -156,7 +211,8 @@ class _DiffusionUnetImagePolicy(nn.Module):
|
||||
|
||||
self.rgb_encoder = _RgbEncoder(cfg)
|
||||
self.unet = _ConditionalUnet1D(
|
||||
cfg, global_cond_dim=(cfg.action_dim + self.rgb_encoder.feature_dim) * cfg.n_obs_steps
|
||||
cfg,
|
||||
global_cond_dim=(cfg.output_shapes["action"][0] + self.rgb_encoder.feature_dim) * cfg.n_obs_steps,
|
||||
)
|
||||
|
||||
self.noise_scheduler = DDPMScheduler(
|
||||
@@ -184,7 +240,7 @@ class _DiffusionUnetImagePolicy(nn.Module):
|
||||
|
||||
# Sample prior.
|
||||
sample = torch.randn(
|
||||
size=(batch_size, self.cfg.horizon, self.cfg.action_dim),
|
||||
size=(batch_size, self.cfg.horizon, self.cfg.output_shapes["action"][0]),
|
||||
dtype=dtype,
|
||||
device=device,
|
||||
generator=generator,
|
||||
@@ -227,7 +283,7 @@ class _DiffusionUnetImagePolicy(nn.Module):
|
||||
sample = self.conditional_sample(batch_size, global_cond=global_cond)
|
||||
|
||||
# `horizon` steps worth of actions (from the first observation).
|
||||
actions = sample[..., : self.cfg.action_dim]
|
||||
actions = sample[..., : self.cfg.output_shapes["action"][0]]
|
||||
# Extract `n_action_steps` steps worth of actions (from the current observation).
|
||||
start = n_obs_steps - 1
|
||||
end = start + self.cfg.n_action_steps
|
||||
@@ -337,7 +393,9 @@ class _RgbEncoder(nn.Module):
|
||||
# Set up pooling and final layers.
|
||||
# Use a dry run to get the feature map shape.
|
||||
with torch.inference_mode():
|
||||
feat_map_shape = tuple(self.backbone(torch.zeros(size=(1, 3, *cfg.image_size))).shape[1:])
|
||||
feat_map_shape = tuple(
|
||||
self.backbone(torch.zeros(size=(1, *cfg.input_shapes["observation.image"]))).shape[1:]
|
||||
)
|
||||
self.pool = SpatialSoftmax(feat_map_shape, num_kp=cfg.spatial_softmax_num_keypoints)
|
||||
self.feature_dim = cfg.spatial_softmax_num_keypoints * 2
|
||||
self.out = nn.Linear(cfg.spatial_softmax_num_keypoints * 2, self.feature_dim)
|
||||
@@ -454,7 +512,7 @@ class _ConditionalUnet1D(nn.Module):
|
||||
|
||||
# In channels / out channels for each downsampling block in the Unet's encoder. For the decoder, we
|
||||
# just reverse these.
|
||||
in_out = [(cfg.action_dim, cfg.down_dims[0])] + list(
|
||||
in_out = [(cfg.output_shapes["action"][0], cfg.down_dims[0])] + list(
|
||||
zip(cfg.down_dims[:-1], cfg.down_dims[1:], strict=True)
|
||||
)
|
||||
|
||||
@@ -505,7 +563,7 @@ class _ConditionalUnet1D(nn.Module):
|
||||
|
||||
self.final_conv = nn.Sequential(
|
||||
_Conv1dBlock(cfg.down_dims[0], cfg.down_dims[0], kernel_size=cfg.kernel_size),
|
||||
nn.Conv1d(cfg.down_dims[0], cfg.action_dim, 1),
|
||||
nn.Conv1d(cfg.down_dims[0], cfg.output_shapes["action"][0], 1),
|
||||
)
|
||||
|
||||
def forward(self, x: Tensor, timestep: Tensor | int, global_cond=None) -> Tensor:
|
||||
|
||||
2
lerobot/configs/env/aloha.yaml
vendored
2
lerobot/configs/env/aloha.yaml
vendored
@@ -21,7 +21,5 @@ env:
|
||||
image_size: [3, 480, 640]
|
||||
episode_length: 400
|
||||
fps: ${fps}
|
||||
|
||||
policy:
|
||||
state_dim: 14
|
||||
action_dim: 14
|
||||
|
||||
2
lerobot/configs/env/pusht.yaml
vendored
2
lerobot/configs/env/pusht.yaml
vendored
@@ -21,7 +21,5 @@ env:
|
||||
image_size: 96
|
||||
episode_length: 300
|
||||
fps: ${fps}
|
||||
|
||||
policy:
|
||||
state_dim: 2
|
||||
action_dim: 2
|
||||
|
||||
2
lerobot/configs/env/xarm.yaml
vendored
2
lerobot/configs/env/xarm.yaml
vendored
@@ -20,7 +20,5 @@ env:
|
||||
image_size: 84
|
||||
episode_length: 25
|
||||
fps: ${fps}
|
||||
|
||||
policy:
|
||||
state_dim: 4
|
||||
action_dim: 4
|
||||
|
||||
@@ -23,23 +23,17 @@ policy:
|
||||
|
||||
pretrained_model_path:
|
||||
|
||||
# Environment.
|
||||
# Inherit these from the environment config.
|
||||
state_dim: ???
|
||||
action_dim: ???
|
||||
|
||||
# Inputs / output structure.
|
||||
# Input / output structure.
|
||||
n_obs_steps: ${n_obs_steps}
|
||||
camera_names: [top] # [top, front_close, left_pillar, right_pillar]
|
||||
chunk_size: 100 # chunk_size
|
||||
n_action_steps: 100
|
||||
|
||||
input_shapes:
|
||||
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
|
||||
observation.images.top: [3, 480, 640]
|
||||
observation.state: ["${policy.state_dim}"]
|
||||
observation.state: ["${env.state_dim}"]
|
||||
output_shapes:
|
||||
action: ["${policy.action_dim}"]
|
||||
action: ["${env.action_dim}"]
|
||||
|
||||
# Normalization / Unnormalization
|
||||
normalize_input_modes:
|
||||
|
||||
@@ -37,15 +37,7 @@ policy:
|
||||
|
||||
pretrained_model_path:
|
||||
|
||||
# Environment.
|
||||
# Inherit these from the environment config.
|
||||
state_dim: ???
|
||||
action_dim: ???
|
||||
image_size:
|
||||
- ${env.image_size} # height
|
||||
- ${env.image_size} # width
|
||||
|
||||
# Inputs / output structure.
|
||||
# Input / output structure.
|
||||
n_obs_steps: ${n_obs_steps}
|
||||
horizon: ${horizon}
|
||||
n_action_steps: ${n_action_steps}
|
||||
@@ -53,9 +45,9 @@ policy:
|
||||
input_shapes:
|
||||
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
|
||||
observation.image: [3, 96, 96]
|
||||
observation.state: ["${policy.state_dim}"]
|
||||
observation.state: ["${env.state_dim}"]
|
||||
output_shapes:
|
||||
action: ["${policy.action_dim}"]
|
||||
action: ["${env.action_dim}"]
|
||||
|
||||
# Normalization / Unnormalization
|
||||
normalize_input_modes:
|
||||
|
||||
@@ -16,8 +16,8 @@ policy:
|
||||
frame_stack: 1
|
||||
num_channels: 32
|
||||
img_size: ${env.image_size}
|
||||
state_dim: ???
|
||||
action_dim: ???
|
||||
state_dim: ${env.action_dim}
|
||||
action_dim: ${env.action_dim}
|
||||
|
||||
# planning
|
||||
mpc: true
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import logging
|
||||
import time
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
@@ -8,7 +7,6 @@ import hydra
|
||||
import torch
|
||||
from datasets import concatenate_datasets
|
||||
from datasets.utils import disable_progress_bars, enable_progress_bars
|
||||
from diffusers.optimization import get_scheduler
|
||||
|
||||
from lerobot.common.datasets.factory import make_dataset
|
||||
from lerobot.common.datasets.utils import cycle
|
||||
@@ -24,45 +22,6 @@ from lerobot.common.utils.utils import (
|
||||
from lerobot.scripts.eval import eval_policy
|
||||
|
||||
|
||||
def update_policy(policy, batch, optimizer, grad_clip_norm, lr_scheduler=None):
|
||||
start_time = time.time()
|
||||
|
||||
model = policy.diffusion if hasattr(policy, "diffusion") else policy # TODO: hacky, remove this line
|
||||
model.train()
|
||||
|
||||
batch = policy.normalize_inputs(batch)
|
||||
|
||||
output_dict = policy.forward(batch)
|
||||
# TODO(rcadene): policy.unnormalize_outputs(out_dict)
|
||||
loss = output_dict["loss"]
|
||||
loss.backward()
|
||||
|
||||
# Diffusion
|
||||
model = policy.diffusion if hasattr(policy, "diffusion") else policy # TODO: hacky, remove this line
|
||||
grad_norm = torch.nn.utils.clip_grad_norm_(
|
||||
model.parameters(),
|
||||
grad_clip_norm,
|
||||
error_if_nonfinite=False,
|
||||
)
|
||||
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
if lr_scheduler is not None:
|
||||
lr_scheduler.step()
|
||||
|
||||
if hasattr(policy, "ema") and policy.ema is not None:
|
||||
policy.ema.step(model)
|
||||
|
||||
info = {
|
||||
"loss": loss.item(),
|
||||
"grad_norm": float(grad_norm),
|
||||
"lr": optimizer.param_groups[0]['lr'],
|
||||
"update_s": time.time() - start_time,
|
||||
}
|
||||
|
||||
return info
|
||||
|
||||
|
||||
@hydra.main(version_base=None, config_name="default", config_path="../configs")
|
||||
def train_cli(cfg: dict):
|
||||
train(
|
||||
@@ -275,35 +234,6 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||
logging.info("make_policy")
|
||||
policy = make_policy(cfg, dataset_stats=offline_dataset.stats)
|
||||
|
||||
# Create optimizer and scheduler
|
||||
# Temporary hack to move optimizer out of policy
|
||||
if cfg.policy.name == "act":
|
||||
optimizer_params_dicts = [
|
||||
{"params": [p for n, p in policy.named_parameters() if not n.startswith("backbone") and p.requires_grad]},
|
||||
{
|
||||
"params": [p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad],
|
||||
"lr": cfg.lr_backbone,
|
||||
},
|
||||
]
|
||||
optimizer = torch.optim.AdamW(optimizer_params_dicts, lr=cfg.lr, weight_decay=cfg.weight_decay)
|
||||
lr_scheduler = None
|
||||
elif cfg.policy.name == "diffusion":
|
||||
optimizer = torch.optim.Adam(
|
||||
policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay
|
||||
)
|
||||
# TODO(rcadene): modify lr scheduler so that it doesn't depend on epochs but steps
|
||||
global_step = 0
|
||||
# configure lr scheduler
|
||||
lr_scheduler = get_scheduler(
|
||||
cfg.lr_scheduler,
|
||||
optimizer=optimizer,
|
||||
num_warmup_steps=cfg.lr_warmup_steps,
|
||||
num_training_steps=cfg.offline_steps,
|
||||
# pytorch assumes stepping LRScheduler every epoch
|
||||
# however huggingface diffusers steps it every batch
|
||||
last_epoch=global_step - 1,
|
||||
)
|
||||
|
||||
num_learnable_params = sum(p.numel() for p in policy.parameters() if p.requires_grad)
|
||||
num_total_params = sum(p.numel() for p in policy.parameters())
|
||||
|
||||
@@ -363,7 +293,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||
for key in batch:
|
||||
batch[key] = batch[key].to(cfg.device, non_blocking=True)
|
||||
|
||||
train_info = update_policy(policy, batch, optimizer, cfg.grad_clip_norm, lr_scheduler)
|
||||
train_info = policy.update(batch, step=step)
|
||||
|
||||
# TODO(rcadene): is it ok if step_t=0 = 0 and not 1 as previously done?
|
||||
if step % cfg.log_freq == 0:
|
||||
@@ -386,7 +316,9 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||
# create dataloader for online training
|
||||
concat_dataset = torch.utils.data.ConcatDataset([offline_dataset, online_dataset])
|
||||
weights = [1.0] * len(concat_dataset)
|
||||
sampler = torch.utils.data.WeightedRandomSampler(weights, num_samples=len(concat_dataset), replacement=True)
|
||||
sampler = torch.utils.data.WeightedRandomSampler(
|
||||
weights, num_samples=len(concat_dataset), replacement=True
|
||||
)
|
||||
dataloader = torch.utils.data.DataLoader(
|
||||
concat_dataset,
|
||||
num_workers=4,
|
||||
|
||||
@@ -121,7 +121,13 @@ def test_policy(env_name, policy_name, extra_overrides):
|
||||
],
|
||||
)
|
||||
def test_normalize(insert_temporal_dim):
|
||||
# TODO(rcadene, alexander-soare): test with real data and assert results of normalization/unnormalization
|
||||
"""
|
||||
Test that normalize/unnormalize can run without exceptions when properly set up, and that they raise
|
||||
an exception when the forward pass is called without the stats having been provided.
|
||||
|
||||
TODO(rcadene, alexander-soare): This should also test that the normalization / unnormalization works as
|
||||
expected.
|
||||
"""
|
||||
|
||||
input_shapes = {
|
||||
"observation.image": [3, 96, 96],
|
||||
@@ -193,7 +199,7 @@ def test_normalize(insert_temporal_dim):
|
||||
new_normalize.load_state_dict(normalize.state_dict())
|
||||
new_normalize(input_batch)
|
||||
|
||||
# test wihtout stats
|
||||
# test without stats
|
||||
unnormalize = Unnormalize(output_shapes, unnormalize_output_modes, stats=None)
|
||||
with pytest.raises(AssertionError):
|
||||
unnormalize(output_batch)
|
||||
|
||||
Reference in New Issue
Block a user