forked from tangger/lerobot
Use PytorchModelHubMixin to save models as safetensors (#125)
Co-authored-by: Remi <re.cadene@gmail.com>
This commit is contained in:
@@ -2,9 +2,12 @@ import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
|
||||
from omegaconf import OmegaConf
|
||||
from termcolor import colored
|
||||
|
||||
from lerobot.common.policies.policy_protocol import Policy
|
||||
|
||||
|
||||
def log_output_dir(out_dir):
|
||||
logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {out_dir}")
|
||||
@@ -27,7 +30,7 @@ class Logger:
|
||||
self._log_dir = Path(log_dir)
|
||||
self._log_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._job_name = job_name
|
||||
self._model_dir = self._log_dir / "models"
|
||||
self._model_dir = self._log_dir / "checkpoints"
|
||||
self._buffer_dir = self._log_dir / "buffers"
|
||||
self._save_model = cfg.training.save_model
|
||||
self._disable_wandb_artifact = cfg.wandb.disable_artifact
|
||||
@@ -67,18 +70,20 @@ class Logger:
|
||||
logging.info(f"Track this run --> {colored(wandb.run.get_url(), 'yellow', attrs=['bold'])}")
|
||||
self._wandb = wandb
|
||||
|
||||
def save_model(self, policy, identifier):
|
||||
def save_model(self, policy: Policy, identifier):
|
||||
if self._save_model:
|
||||
self._model_dir.mkdir(parents=True, exist_ok=True)
|
||||
fp = self._model_dir / f"{str(identifier)}.pt"
|
||||
policy.save(fp)
|
||||
save_dir = self._model_dir / str(identifier)
|
||||
policy.save_pretrained(save_dir)
|
||||
# Also save the full Hydra config for the env configuration.
|
||||
OmegaConf.save(self._cfg, save_dir / "config.yaml")
|
||||
if self._wandb and not self._disable_wandb_artifact:
|
||||
# note wandb artifact does not accept ":" in its name
|
||||
artifact = self._wandb.Artifact(
|
||||
self._group.replace(":", "_") + "-" + str(self._seed) + "-" + str(identifier),
|
||||
type="model",
|
||||
)
|
||||
artifact.add_file(fp)
|
||||
artifact.add_file(save_dir / SAFETENSORS_SINGLE_FILE)
|
||||
self._wandb.log_artifact(artifact)
|
||||
|
||||
def save_buffer(self, buffer, identifier):
|
||||
|
||||
@@ -38,7 +38,7 @@ class ACTConfig:
|
||||
replace_final_stride_with_dilation: Whether to replace the ResNet's final 2x2 stride with a dilated
|
||||
convolution.
|
||||
pre_norm: Whether to use "pre-norm" in the transformer blocks.
|
||||
d_model: The transformer blocks' main hidden dimension.
|
||||
dim_model: The transformer blocks' main hidden dimension.
|
||||
n_heads: The number of heads to use in the transformer blocks' multi-head attention.
|
||||
dim_feedforward: The dimension to expand the transformer's hidden dimension to in the feed-forward
|
||||
layers.
|
||||
@@ -94,7 +94,7 @@ class ACTConfig:
|
||||
replace_final_stride_with_dilation: int = False
|
||||
# Transformer layers.
|
||||
pre_norm: bool = False
|
||||
d_model: int = 512
|
||||
dim_model: int = 512
|
||||
n_heads: int = 8
|
||||
dim_feedforward: int = 3200
|
||||
feedforward_activation: str = "relu"
|
||||
|
||||
@@ -14,6 +14,7 @@ import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F # noqa: N812
|
||||
import torchvision
|
||||
from huggingface_hub import PyTorchModelHubMixin
|
||||
from torch import Tensor, nn
|
||||
from torchvision.models._utils import IntermediateLayerGetter
|
||||
from torchvision.ops.misc import FrozenBatchNorm2d
|
||||
@@ -22,7 +23,7 @@ from lerobot.common.policies.act.configuration_act import ACTConfig
|
||||
from lerobot.common.policies.normalize import Normalize, Unnormalize
|
||||
|
||||
|
||||
class ACTPolicy(nn.Module):
|
||||
class ACTPolicy(nn.Module, PyTorchModelHubMixin):
|
||||
"""
|
||||
Action Chunking Transformer Policy as per Learning Fine-Grained Bimanual Manipulation with Low-Cost
|
||||
Hardware (paper: https://arxiv.org/abs/2304.13705, code: https://github.com/tonyzhaozh/act)
|
||||
@@ -30,27 +31,31 @@ class ACTPolicy(nn.Module):
|
||||
|
||||
name = "act"
|
||||
|
||||
def __init__(self, cfg: ACTConfig | None = None, dataset_stats=None):
|
||||
def __init__(self, config: ACTConfig | None = None, dataset_stats=None):
|
||||
"""
|
||||
Args:
|
||||
cfg: Policy configuration class instance or None, in which case the default instantiation of the
|
||||
configuration class is used.
|
||||
config: Policy configuration class instance or None, in which case the default instantiation of
|
||||
the configuration class is used.
|
||||
"""
|
||||
super().__init__()
|
||||
if cfg is None:
|
||||
cfg = ACTConfig()
|
||||
self.cfg = cfg
|
||||
self.normalize_inputs = Normalize(cfg.input_shapes, cfg.input_normalization_modes, dataset_stats)
|
||||
self.normalize_targets = Normalize(cfg.output_shapes, cfg.output_normalization_modes, dataset_stats)
|
||||
self.unnormalize_outputs = Unnormalize(
|
||||
cfg.output_shapes, cfg.output_normalization_modes, dataset_stats
|
||||
if config is None:
|
||||
config = ACTConfig()
|
||||
self.config = config
|
||||
self.normalize_inputs = Normalize(
|
||||
config.input_shapes, config.input_normalization_modes, dataset_stats
|
||||
)
|
||||
self.model = ACT(cfg)
|
||||
self.normalize_targets = Normalize(
|
||||
config.output_shapes, config.output_normalization_modes, dataset_stats
|
||||
)
|
||||
self.unnormalize_outputs = Unnormalize(
|
||||
config.output_shapes, config.output_normalization_modes, dataset_stats
|
||||
)
|
||||
self.model = ACT(config)
|
||||
|
||||
def reset(self):
|
||||
"""This should be called whenever the environment is reset."""
|
||||
if self.cfg.n_action_steps is not None:
|
||||
self._action_queue = deque([], maxlen=self.cfg.n_action_steps)
|
||||
if self.config.n_action_steps is not None:
|
||||
self._action_queue = deque([], maxlen=self.config.n_action_steps)
|
||||
|
||||
@torch.no_grad
|
||||
def select_action(self, batch: dict[str, Tensor], **_) -> Tensor:
|
||||
@@ -68,7 +73,7 @@ class ACTPolicy(nn.Module):
|
||||
if len(self._action_queue) == 0:
|
||||
# `self.model.forward` returns a (batch_size, n_action_steps, action_dim) tensor, but the queue
|
||||
# effectively has shape (n_action_steps, batch_size, *), hence the transpose.
|
||||
actions = self.model(batch)[0][: self.cfg.n_action_steps]
|
||||
actions = self.model(batch)[0][: self.config.n_action_steps]
|
||||
|
||||
# TODO(rcadene): make _forward return output dictionary?
|
||||
actions = self.unnormalize_outputs({"action": actions})["action"]
|
||||
@@ -88,7 +93,7 @@ class ACTPolicy(nn.Module):
|
||||
).mean()
|
||||
|
||||
loss_dict = {"l1_loss": l1_loss}
|
||||
if self.cfg.use_vae:
|
||||
if self.config.use_vae:
|
||||
# Calculate Dₖₗ(latent_pdf || standard_normal). Note: After computing the KL-divergence for
|
||||
# each dimension independently, we sum over the latent dimension to get the total
|
||||
# KL-divergence per batch element, then take the mean over the batch.
|
||||
@@ -97,7 +102,7 @@ class ACTPolicy(nn.Module):
|
||||
(-0.5 * (1 + log_sigma_x2_hat - mu_hat.pow(2) - (log_sigma_x2_hat).exp())).sum(-1).mean()
|
||||
)
|
||||
loss_dict["kld_loss"] = mean_kld
|
||||
loss_dict["loss"] = l1_loss + mean_kld * self.cfg.kl_weight
|
||||
loss_dict["loss"] = l1_loss + mean_kld * self.config.kl_weight
|
||||
else:
|
||||
loss_dict["loss"] = l1_loss
|
||||
|
||||
@@ -114,17 +119,10 @@ class ACTPolicy(nn.Module):
|
||||
"""
|
||||
# Stack images in the order dictated by input_shapes.
|
||||
batch["observation.images"] = torch.stack(
|
||||
[batch[k] for k in self.cfg.input_shapes if k.startswith("observation.images.")],
|
||||
[batch[k] for k in self.config.input_shapes if k.startswith("observation.images.")],
|
||||
dim=-4,
|
||||
)
|
||||
|
||||
def save(self, fp):
|
||||
torch.save(self.state_dict(), fp)
|
||||
|
||||
def load(self, fp):
|
||||
d = torch.load(fp)
|
||||
self.load_state_dict(d)
|
||||
|
||||
|
||||
class ACT(nn.Module):
|
||||
"""Action Chunking Transformer: The underlying neural network for ACTPolicy.
|
||||
@@ -161,36 +159,36 @@ class ACT(nn.Module):
|
||||
└───────────────────────┘
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: ACTConfig):
|
||||
def __init__(self, config: ACTConfig):
|
||||
super().__init__()
|
||||
self.cfg = cfg
|
||||
self.config = config
|
||||
# BERT style VAE encoder with input [cls, *joint_space_configuration, *action_sequence].
|
||||
# The cls token forms parameters of the latent's distribution (like this [*means, *log_variances]).
|
||||
if self.cfg.use_vae:
|
||||
self.vae_encoder = ACTEncoder(cfg)
|
||||
self.vae_encoder_cls_embed = nn.Embedding(1, cfg.d_model)
|
||||
if self.config.use_vae:
|
||||
self.vae_encoder = ACTEncoder(config)
|
||||
self.vae_encoder_cls_embed = nn.Embedding(1, config.dim_model)
|
||||
# Projection layer for joint-space configuration to hidden dimension.
|
||||
self.vae_encoder_robot_state_input_proj = nn.Linear(
|
||||
cfg.input_shapes["observation.state"][0], cfg.d_model
|
||||
config.input_shapes["observation.state"][0], config.dim_model
|
||||
)
|
||||
# Projection layer for action (joint-space target) to hidden dimension.
|
||||
self.vae_encoder_action_input_proj = nn.Linear(
|
||||
cfg.input_shapes["observation.state"][0], cfg.d_model
|
||||
config.input_shapes["observation.state"][0], config.dim_model
|
||||
)
|
||||
self.latent_dim = cfg.latent_dim
|
||||
self.latent_dim = config.latent_dim
|
||||
# Projection layer from the VAE encoder's output to the latent distribution's parameter space.
|
||||
self.vae_encoder_latent_output_proj = nn.Linear(cfg.d_model, self.latent_dim * 2)
|
||||
self.vae_encoder_latent_output_proj = nn.Linear(config.dim_model, self.latent_dim * 2)
|
||||
# Fixed sinusoidal positional embedding the whole input to the VAE encoder. Unsqueeze for batch
|
||||
# dimension.
|
||||
self.register_buffer(
|
||||
"vae_encoder_pos_enc",
|
||||
create_sinusoidal_position_embedding(1 + 1 + cfg.chunk_size, cfg.d_model).unsqueeze(0),
|
||||
create_sinusoidal_pos_embedding(1 + 1 + config.chunk_size, config.dim_model).unsqueeze(0),
|
||||
)
|
||||
|
||||
# Backbone for image feature extraction.
|
||||
backbone_model = getattr(torchvision.models, cfg.vision_backbone)(
|
||||
replace_stride_with_dilation=[False, False, cfg.replace_final_stride_with_dilation],
|
||||
weights=cfg.pretrained_backbone_weights,
|
||||
backbone_model = getattr(torchvision.models, config.vision_backbone)(
|
||||
replace_stride_with_dilation=[False, False, config.replace_final_stride_with_dilation],
|
||||
weights=config.pretrained_backbone_weights,
|
||||
norm_layer=FrozenBatchNorm2d,
|
||||
)
|
||||
# Note: The assumption here is that we are using a ResNet model (and hence layer4 is the final feature
|
||||
@@ -199,26 +197,28 @@ class ACT(nn.Module):
|
||||
self.backbone = IntermediateLayerGetter(backbone_model, return_layers={"layer4": "feature_map"})
|
||||
|
||||
# Transformer (acts as VAE decoder when training with the variational objective).
|
||||
self.encoder = ACTEncoder(cfg)
|
||||
self.decoder = ACTDecoder(cfg)
|
||||
self.encoder = ACTEncoder(config)
|
||||
self.decoder = ACTDecoder(config)
|
||||
|
||||
# Transformer encoder input projections. The tokens will be structured like
|
||||
# [latent, robot_state, image_feature_map_pixels].
|
||||
self.encoder_robot_state_input_proj = nn.Linear(cfg.input_shapes["observation.state"][0], cfg.d_model)
|
||||
self.encoder_latent_input_proj = nn.Linear(self.latent_dim, cfg.d_model)
|
||||
self.encoder_robot_state_input_proj = nn.Linear(
|
||||
config.input_shapes["observation.state"][0], config.dim_model
|
||||
)
|
||||
self.encoder_latent_input_proj = nn.Linear(self.latent_dim, config.dim_model)
|
||||
self.encoder_img_feat_input_proj = nn.Conv2d(
|
||||
backbone_model.fc.in_features, cfg.d_model, kernel_size=1
|
||||
backbone_model.fc.in_features, config.dim_model, kernel_size=1
|
||||
)
|
||||
# Transformer encoder positional embeddings.
|
||||
self.encoder_robot_and_latent_pos_embed = nn.Embedding(2, cfg.d_model)
|
||||
self.encoder_cam_feat_pos_embed = ACTSinusoidalPositionEmbedding2d(cfg.d_model // 2)
|
||||
self.encoder_robot_and_latent_pos_embed = nn.Embedding(2, config.dim_model)
|
||||
self.encoder_cam_feat_pos_embed = ACTSinusoidalPositionEmbedding2d(config.dim_model // 2)
|
||||
|
||||
# Transformer decoder.
|
||||
# Learnable positional embedding for the transformer's decoder (in the style of DETR object queries).
|
||||
self.decoder_pos_embed = nn.Embedding(cfg.chunk_size, cfg.d_model)
|
||||
self.decoder_pos_embed = nn.Embedding(config.chunk_size, config.dim_model)
|
||||
|
||||
# Final action regression head on the output of the transformer's decoder.
|
||||
self.action_head = nn.Linear(cfg.d_model, cfg.output_shapes["action"][0])
|
||||
self.action_head = nn.Linear(config.dim_model, config.output_shapes["action"][0])
|
||||
|
||||
self._reset_parameters()
|
||||
|
||||
@@ -244,7 +244,7 @@ class ACT(nn.Module):
|
||||
Tuple containing the latent PDF's parameters (mean, log(σ²)) both as (B, L) tensors where L is the
|
||||
latent dimension.
|
||||
"""
|
||||
if self.cfg.use_vae and self.training:
|
||||
if self.config.use_vae and self.training:
|
||||
assert (
|
||||
"action" in batch
|
||||
), "actions must be provided when using the variational objective in training mode."
|
||||
@@ -252,7 +252,7 @@ class ACT(nn.Module):
|
||||
batch_size = batch["observation.state"].shape[0]
|
||||
|
||||
# Prepare the latent for input to the transformer encoder.
|
||||
if self.cfg.use_vae and "action" in batch:
|
||||
if self.config.use_vae and "action" in batch:
|
||||
# Prepare the input to the VAE encoder: [cls, *joint_space_configuration, *action_sequence].
|
||||
cls_embed = einops.repeat(
|
||||
self.vae_encoder_cls_embed.weight, "1 d -> b 1 d", b=batch_size
|
||||
@@ -322,7 +322,7 @@ class ACT(nn.Module):
|
||||
# Forward pass through the transformer modules.
|
||||
encoder_out = self.encoder(encoder_in, pos_embed=pos_embed)
|
||||
decoder_in = torch.zeros(
|
||||
(self.cfg.chunk_size, batch_size, self.cfg.d_model),
|
||||
(self.config.chunk_size, batch_size, self.config.dim_model),
|
||||
dtype=pos_embed.dtype,
|
||||
device=pos_embed.device,
|
||||
)
|
||||
@@ -344,10 +344,10 @@ class ACT(nn.Module):
|
||||
class ACTEncoder(nn.Module):
|
||||
"""Convenience module for running multiple encoder layers, maybe followed by normalization."""
|
||||
|
||||
def __init__(self, cfg: ACTConfig):
|
||||
def __init__(self, config: ACTConfig):
|
||||
super().__init__()
|
||||
self.layers = nn.ModuleList([ACTEncoderLayer(cfg) for _ in range(cfg.n_encoder_layers)])
|
||||
self.norm = nn.LayerNorm(cfg.d_model) if cfg.pre_norm else nn.Identity()
|
||||
self.layers = nn.ModuleList([ACTEncoderLayer(config) for _ in range(config.n_encoder_layers)])
|
||||
self.norm = nn.LayerNorm(config.dim_model) if config.pre_norm else nn.Identity()
|
||||
|
||||
def forward(self, x: Tensor, pos_embed: Tensor | None = None) -> Tensor:
|
||||
for layer in self.layers:
|
||||
@@ -357,22 +357,22 @@ class ACTEncoder(nn.Module):
|
||||
|
||||
|
||||
class ACTEncoderLayer(nn.Module):
|
||||
def __init__(self, cfg: ACTConfig):
|
||||
def __init__(self, config: ACTConfig):
|
||||
super().__init__()
|
||||
self.self_attn = nn.MultiheadAttention(cfg.d_model, cfg.n_heads, dropout=cfg.dropout)
|
||||
self.self_attn = nn.MultiheadAttention(config.dim_model, config.n_heads, dropout=config.dropout)
|
||||
|
||||
# Feed forward layers.
|
||||
self.linear1 = nn.Linear(cfg.d_model, cfg.dim_feedforward)
|
||||
self.dropout = nn.Dropout(cfg.dropout)
|
||||
self.linear2 = nn.Linear(cfg.dim_feedforward, cfg.d_model)
|
||||
self.linear1 = nn.Linear(config.dim_model, config.dim_feedforward)
|
||||
self.dropout = nn.Dropout(config.dropout)
|
||||
self.linear2 = nn.Linear(config.dim_feedforward, config.dim_model)
|
||||
|
||||
self.norm1 = nn.LayerNorm(cfg.d_model)
|
||||
self.norm2 = nn.LayerNorm(cfg.d_model)
|
||||
self.dropout1 = nn.Dropout(cfg.dropout)
|
||||
self.dropout2 = nn.Dropout(cfg.dropout)
|
||||
self.norm1 = nn.LayerNorm(config.dim_model)
|
||||
self.norm2 = nn.LayerNorm(config.dim_model)
|
||||
self.dropout1 = nn.Dropout(config.dropout)
|
||||
self.dropout2 = nn.Dropout(config.dropout)
|
||||
|
||||
self.activation = get_activation_fn(cfg.feedforward_activation)
|
||||
self.pre_norm = cfg.pre_norm
|
||||
self.activation = get_activation_fn(config.feedforward_activation)
|
||||
self.pre_norm = config.pre_norm
|
||||
|
||||
def forward(self, x, pos_embed: Tensor | None = None) -> Tensor:
|
||||
skip = x
|
||||
@@ -395,11 +395,11 @@ class ACTEncoderLayer(nn.Module):
|
||||
|
||||
|
||||
class ACTDecoder(nn.Module):
|
||||
def __init__(self, cfg: ACTConfig):
|
||||
def __init__(self, config: ACTConfig):
|
||||
"""Convenience module for running multiple decoder layers followed by normalization."""
|
||||
super().__init__()
|
||||
self.layers = nn.ModuleList([ACTDecoderLayer(cfg) for _ in range(cfg.n_decoder_layers)])
|
||||
self.norm = nn.LayerNorm(cfg.d_model)
|
||||
self.layers = nn.ModuleList([ACTDecoderLayer(config) for _ in range(config.n_decoder_layers)])
|
||||
self.norm = nn.LayerNorm(config.dim_model)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
@@ -418,25 +418,25 @@ class ACTDecoder(nn.Module):
|
||||
|
||||
|
||||
class ACTDecoderLayer(nn.Module):
|
||||
def __init__(self, cfg: ACTConfig):
|
||||
def __init__(self, config: ACTConfig):
|
||||
super().__init__()
|
||||
self.self_attn = nn.MultiheadAttention(cfg.d_model, cfg.n_heads, dropout=cfg.dropout)
|
||||
self.multihead_attn = nn.MultiheadAttention(cfg.d_model, cfg.n_heads, dropout=cfg.dropout)
|
||||
self.self_attn = nn.MultiheadAttention(config.dim_model, config.n_heads, dropout=config.dropout)
|
||||
self.multihead_attn = nn.MultiheadAttention(config.dim_model, config.n_heads, dropout=config.dropout)
|
||||
|
||||
# Feed forward layers.
|
||||
self.linear1 = nn.Linear(cfg.d_model, cfg.dim_feedforward)
|
||||
self.dropout = nn.Dropout(cfg.dropout)
|
||||
self.linear2 = nn.Linear(cfg.dim_feedforward, cfg.d_model)
|
||||
self.linear1 = nn.Linear(config.dim_model, config.dim_feedforward)
|
||||
self.dropout = nn.Dropout(config.dropout)
|
||||
self.linear2 = nn.Linear(config.dim_feedforward, config.dim_model)
|
||||
|
||||
self.norm1 = nn.LayerNorm(cfg.d_model)
|
||||
self.norm2 = nn.LayerNorm(cfg.d_model)
|
||||
self.norm3 = nn.LayerNorm(cfg.d_model)
|
||||
self.dropout1 = nn.Dropout(cfg.dropout)
|
||||
self.dropout2 = nn.Dropout(cfg.dropout)
|
||||
self.dropout3 = nn.Dropout(cfg.dropout)
|
||||
self.norm1 = nn.LayerNorm(config.dim_model)
|
||||
self.norm2 = nn.LayerNorm(config.dim_model)
|
||||
self.norm3 = nn.LayerNorm(config.dim_model)
|
||||
self.dropout1 = nn.Dropout(config.dropout)
|
||||
self.dropout2 = nn.Dropout(config.dropout)
|
||||
self.dropout3 = nn.Dropout(config.dropout)
|
||||
|
||||
self.activation = get_activation_fn(cfg.feedforward_activation)
|
||||
self.pre_norm = cfg.pre_norm
|
||||
self.activation = get_activation_fn(config.feedforward_activation)
|
||||
self.pre_norm = config.pre_norm
|
||||
|
||||
def maybe_add_pos_embed(self, tensor: Tensor, pos_embed: Tensor | None) -> Tensor:
|
||||
return tensor if pos_embed is None else tensor + pos_embed
|
||||
@@ -489,7 +489,7 @@ class ACTDecoderLayer(nn.Module):
|
||||
return x
|
||||
|
||||
|
||||
def create_sinusoidal_position_embedding(num_positions: int, dimension: int) -> Tensor:
|
||||
def create_sinusoidal_pos_embedding(num_positions: int, dimension: int) -> Tensor:
|
||||
"""1D sinusoidal positional embeddings as in Attention is All You Need.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -9,7 +9,6 @@ TODO(alexander-soare):
|
||||
"""
|
||||
|
||||
import copy
|
||||
import logging
|
||||
import math
|
||||
from collections import deque
|
||||
from typing import Callable
|
||||
@@ -19,6 +18,7 @@ import torch
|
||||
import torch.nn.functional as F # noqa: N812
|
||||
import torchvision
|
||||
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
|
||||
from huggingface_hub import PyTorchModelHubMixin
|
||||
from robomimic.models.base_nets import SpatialSoftmax
|
||||
from torch import Tensor, nn
|
||||
from torch.nn.modules.batchnorm import _BatchNorm
|
||||
@@ -32,7 +32,7 @@ from lerobot.common.policies.utils import (
|
||||
)
|
||||
|
||||
|
||||
class DiffusionPolicy(nn.Module):
|
||||
class DiffusionPolicy(nn.Module, PyTorchModelHubMixin):
|
||||
"""
|
||||
Diffusion Policy as per "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion"
|
||||
(paper: https://arxiv.org/abs/2303.04137, code: https://github.com/real-stanford/diffusion_policy).
|
||||
@@ -41,45 +41,50 @@ class DiffusionPolicy(nn.Module):
|
||||
name = "diffusion"
|
||||
|
||||
def __init__(
|
||||
self, cfg: DiffusionConfig | None = None, lr_scheduler_num_training_steps: int = 0, dataset_stats=None
|
||||
self,
|
||||
config: DiffusionConfig | None = None,
|
||||
dataset_stats=None,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
cfg: Policy configuration class instance or None, in which case the default instantiation of the
|
||||
configuration class is used.
|
||||
config: Policy configuration class instance or None, in which case the default instantiation of
|
||||
the configuration class is used.
|
||||
"""
|
||||
super().__init__()
|
||||
# TODO(alexander-soare): LR scheduler will be removed.
|
||||
assert lr_scheduler_num_training_steps > 0
|
||||
if cfg is None:
|
||||
cfg = DiffusionConfig()
|
||||
self.cfg = cfg
|
||||
self.normalize_inputs = Normalize(cfg.input_shapes, cfg.input_normalization_modes, dataset_stats)
|
||||
self.normalize_targets = Normalize(cfg.output_shapes, cfg.output_normalization_modes, dataset_stats)
|
||||
if config is None:
|
||||
config = DiffusionConfig()
|
||||
self.config = config
|
||||
self.normalize_inputs = Normalize(
|
||||
config.input_shapes, config.input_normalization_modes, dataset_stats
|
||||
)
|
||||
self.normalize_targets = Normalize(
|
||||
config.output_shapes, config.output_normalization_modes, dataset_stats
|
||||
)
|
||||
self.unnormalize_outputs = Unnormalize(
|
||||
cfg.output_shapes, cfg.output_normalization_modes, dataset_stats
|
||||
config.output_shapes, config.output_normalization_modes, dataset_stats
|
||||
)
|
||||
|
||||
# queues are populated during rollout of the policy, they contain the n latest observations and actions
|
||||
self._queues = None
|
||||
|
||||
self.diffusion = DiffusionModel(cfg)
|
||||
self.diffusion = DiffusionModel(config)
|
||||
|
||||
# TODO(alexander-soare): This should probably be managed outside of the policy class.
|
||||
self.ema_diffusion = None
|
||||
self.ema = None
|
||||
if self.cfg.use_ema:
|
||||
if self.config.use_ema:
|
||||
self.ema_diffusion = copy.deepcopy(self.diffusion)
|
||||
self.ema = DiffusionEMA(cfg, model=self.ema_diffusion)
|
||||
self.ema = DiffusionEMA(config, model=self.ema_diffusion)
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Clear observation and action queues. Should be called on `env.reset()`
|
||||
"""
|
||||
self._queues = {
|
||||
"observation.image": deque(maxlen=self.cfg.n_obs_steps),
|
||||
"observation.state": deque(maxlen=self.cfg.n_obs_steps),
|
||||
"action": deque(maxlen=self.cfg.n_action_steps),
|
||||
"observation.image": deque(maxlen=self.config.n_obs_steps),
|
||||
"observation.state": deque(maxlen=self.config.n_obs_steps),
|
||||
"action": deque(maxlen=self.config.n_action_steps),
|
||||
}
|
||||
|
||||
@torch.no_grad
|
||||
@@ -138,46 +143,34 @@ class DiffusionPolicy(nn.Module):
|
||||
loss = self.diffusion.compute_loss(batch)
|
||||
return {"loss": loss}
|
||||
|
||||
def save(self, fp):
|
||||
torch.save(self.state_dict(), fp)
|
||||
|
||||
def load(self, fp):
|
||||
d = torch.load(fp)
|
||||
missing_keys, unexpected_keys = self.load_state_dict(d, strict=False)
|
||||
if len(missing_keys) > 0:
|
||||
assert all(k.startswith("ema_diffusion.") for k in missing_keys)
|
||||
logging.warning(
|
||||
"DiffusionPolicy.load expected ema parameters in loaded state dict but none were found."
|
||||
)
|
||||
assert len(unexpected_keys) == 0
|
||||
|
||||
|
||||
class DiffusionModel(nn.Module):
|
||||
def __init__(self, cfg: DiffusionConfig):
|
||||
def __init__(self, config: DiffusionConfig):
|
||||
super().__init__()
|
||||
self.cfg = cfg
|
||||
self.config = config
|
||||
|
||||
self.rgb_encoder = DiffusionRgbEncoder(cfg)
|
||||
self.rgb_encoder = DiffusionRgbEncoder(config)
|
||||
self.unet = DiffusionConditionalUnet1d(
|
||||
cfg,
|
||||
global_cond_dim=(cfg.output_shapes["action"][0] + self.rgb_encoder.feature_dim) * cfg.n_obs_steps,
|
||||
config,
|
||||
global_cond_dim=(config.output_shapes["action"][0] + self.rgb_encoder.feature_dim)
|
||||
* config.n_obs_steps,
|
||||
)
|
||||
|
||||
self.noise_scheduler = DDPMScheduler(
|
||||
num_train_timesteps=cfg.num_train_timesteps,
|
||||
beta_start=cfg.beta_start,
|
||||
beta_end=cfg.beta_end,
|
||||
beta_schedule=cfg.beta_schedule,
|
||||
num_train_timesteps=config.num_train_timesteps,
|
||||
beta_start=config.beta_start,
|
||||
beta_end=config.beta_end,
|
||||
beta_schedule=config.beta_schedule,
|
||||
variance_type="fixed_small",
|
||||
clip_sample=cfg.clip_sample,
|
||||
clip_sample_range=cfg.clip_sample_range,
|
||||
prediction_type=cfg.prediction_type,
|
||||
clip_sample=config.clip_sample,
|
||||
clip_sample_range=config.clip_sample_range,
|
||||
prediction_type=config.prediction_type,
|
||||
)
|
||||
|
||||
if cfg.num_inference_steps is None:
|
||||
if config.num_inference_steps is None:
|
||||
self.num_inference_steps = self.noise_scheduler.config.num_train_timesteps
|
||||
else:
|
||||
self.num_inference_steps = cfg.num_inference_steps
|
||||
self.num_inference_steps = config.num_inference_steps
|
||||
|
||||
# ========= inference ============
|
||||
def conditional_sample(
|
||||
@@ -188,7 +181,7 @@ class DiffusionModel(nn.Module):
|
||||
|
||||
# Sample prior.
|
||||
sample = torch.randn(
|
||||
size=(batch_size, self.cfg.horizon, self.cfg.output_shapes["action"][0]),
|
||||
size=(batch_size, self.config.horizon, self.config.output_shapes["action"][0]),
|
||||
dtype=dtype,
|
||||
device=device,
|
||||
generator=generator,
|
||||
@@ -218,7 +211,7 @@ class DiffusionModel(nn.Module):
|
||||
"""
|
||||
assert set(batch).issuperset({"observation.state", "observation.image"})
|
||||
batch_size, n_obs_steps = batch["observation.state"].shape[:2]
|
||||
assert n_obs_steps == self.cfg.n_obs_steps
|
||||
assert n_obs_steps == self.config.n_obs_steps
|
||||
|
||||
# Extract image feature (first combine batch and sequence dims).
|
||||
img_features = self.rgb_encoder(einops.rearrange(batch["observation.image"], "b n ... -> (b n) ..."))
|
||||
@@ -231,10 +224,10 @@ class DiffusionModel(nn.Module):
|
||||
sample = self.conditional_sample(batch_size, global_cond=global_cond)
|
||||
|
||||
# `horizon` steps worth of actions (from the first observation).
|
||||
actions = sample[..., : self.cfg.output_shapes["action"][0]]
|
||||
actions = sample[..., : self.config.output_shapes["action"][0]]
|
||||
# Extract `n_action_steps` steps worth of actions (from the current observation).
|
||||
start = n_obs_steps - 1
|
||||
end = start + self.cfg.n_action_steps
|
||||
end = start + self.config.n_action_steps
|
||||
actions = actions[:, start:end]
|
||||
|
||||
return actions
|
||||
@@ -253,8 +246,8 @@ class DiffusionModel(nn.Module):
|
||||
assert set(batch).issuperset({"observation.state", "observation.image", "action", "action_is_pad"})
|
||||
batch_size, n_obs_steps = batch["observation.state"].shape[:2]
|
||||
horizon = batch["action"].shape[1]
|
||||
assert horizon == self.cfg.horizon
|
||||
assert n_obs_steps == self.cfg.n_obs_steps
|
||||
assert horizon == self.config.horizon
|
||||
assert n_obs_steps == self.config.n_obs_steps
|
||||
|
||||
# Extract image feature (first combine batch and sequence dims).
|
||||
img_features = self.rgb_encoder(einops.rearrange(batch["observation.image"], "b n ... -> (b n) ..."))
|
||||
@@ -283,12 +276,12 @@ class DiffusionModel(nn.Module):
|
||||
|
||||
# Compute the loss.
|
||||
# The target is either the original trajectory, or the noise.
|
||||
if self.cfg.prediction_type == "epsilon":
|
||||
if self.config.prediction_type == "epsilon":
|
||||
target = eps
|
||||
elif self.cfg.prediction_type == "sample":
|
||||
elif self.config.prediction_type == "sample":
|
||||
target = batch["action"]
|
||||
else:
|
||||
raise ValueError(f"Unsupported prediction type {self.cfg.prediction_type}")
|
||||
raise ValueError(f"Unsupported prediction type {self.config.prediction_type}")
|
||||
|
||||
loss = F.mse_loss(pred, target, reduction="none")
|
||||
|
||||
@@ -306,29 +299,29 @@ class DiffusionRgbEncoder(nn.Module):
|
||||
Includes the ability to normalize and crop the image first.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: DiffusionConfig):
|
||||
def __init__(self, config: DiffusionConfig):
|
||||
super().__init__()
|
||||
# Set up optional preprocessing.
|
||||
if cfg.crop_shape is not None:
|
||||
if config.crop_shape is not None:
|
||||
self.do_crop = True
|
||||
# Always use center crop for eval
|
||||
self.center_crop = torchvision.transforms.CenterCrop(cfg.crop_shape)
|
||||
if cfg.crop_is_random:
|
||||
self.maybe_random_crop = torchvision.transforms.RandomCrop(cfg.crop_shape)
|
||||
self.center_crop = torchvision.transforms.CenterCrop(config.crop_shape)
|
||||
if config.crop_is_random:
|
||||
self.maybe_random_crop = torchvision.transforms.RandomCrop(config.crop_shape)
|
||||
else:
|
||||
self.maybe_random_crop = self.center_crop
|
||||
else:
|
||||
self.do_crop = False
|
||||
|
||||
# Set up backbone.
|
||||
backbone_model = getattr(torchvision.models, cfg.vision_backbone)(
|
||||
weights=cfg.pretrained_backbone_weights
|
||||
backbone_model = getattr(torchvision.models, config.vision_backbone)(
|
||||
weights=config.pretrained_backbone_weights
|
||||
)
|
||||
# Note: This assumes that the layer4 feature map is children()[-3]
|
||||
# TODO(alexander-soare): Use a safer alternative.
|
||||
self.backbone = nn.Sequential(*(list(backbone_model.children())[:-2]))
|
||||
if cfg.use_group_norm:
|
||||
if cfg.pretrained_backbone_weights:
|
||||
if config.use_group_norm:
|
||||
if config.pretrained_backbone_weights:
|
||||
raise ValueError(
|
||||
"You can't replace BatchNorm in a pretrained model without ruining the weights!"
|
||||
)
|
||||
@@ -342,11 +335,11 @@ class DiffusionRgbEncoder(nn.Module):
|
||||
# Use a dry run to get the feature map shape.
|
||||
with torch.inference_mode():
|
||||
feat_map_shape = tuple(
|
||||
self.backbone(torch.zeros(size=(1, *cfg.input_shapes["observation.image"]))).shape[1:]
|
||||
self.backbone(torch.zeros(size=(1, *config.input_shapes["observation.image"]))).shape[1:]
|
||||
)
|
||||
self.pool = SpatialSoftmax(feat_map_shape, num_kp=cfg.spatial_softmax_num_keypoints)
|
||||
self.feature_dim = cfg.spatial_softmax_num_keypoints * 2
|
||||
self.out = nn.Linear(cfg.spatial_softmax_num_keypoints * 2, self.feature_dim)
|
||||
self.pool = SpatialSoftmax(feat_map_shape, num_kp=config.spatial_softmax_num_keypoints)
|
||||
self.feature_dim = config.spatial_softmax_num_keypoints * 2
|
||||
self.out = nn.Linear(config.spatial_softmax_num_keypoints * 2, self.feature_dim)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x: Tensor) -> Tensor:
|
||||
@@ -442,34 +435,34 @@ class DiffusionConditionalUnet1d(nn.Module):
|
||||
Note: this removes local conditioning as compared to the original diffusion policy code.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: DiffusionConfig, global_cond_dim: int):
|
||||
def __init__(self, config: DiffusionConfig, global_cond_dim: int):
|
||||
super().__init__()
|
||||
|
||||
self.cfg = cfg
|
||||
self.config = config
|
||||
|
||||
# Encoder for the diffusion timestep.
|
||||
self.diffusion_step_encoder = nn.Sequential(
|
||||
DiffusionSinusoidalPosEmb(cfg.diffusion_step_embed_dim),
|
||||
nn.Linear(cfg.diffusion_step_embed_dim, cfg.diffusion_step_embed_dim * 4),
|
||||
DiffusionSinusoidalPosEmb(config.diffusion_step_embed_dim),
|
||||
nn.Linear(config.diffusion_step_embed_dim, config.diffusion_step_embed_dim * 4),
|
||||
nn.Mish(),
|
||||
nn.Linear(cfg.diffusion_step_embed_dim * 4, cfg.diffusion_step_embed_dim),
|
||||
nn.Linear(config.diffusion_step_embed_dim * 4, config.diffusion_step_embed_dim),
|
||||
)
|
||||
|
||||
# The FiLM conditioning dimension.
|
||||
cond_dim = cfg.diffusion_step_embed_dim + global_cond_dim
|
||||
cond_dim = config.diffusion_step_embed_dim + global_cond_dim
|
||||
|
||||
# In channels / out channels for each downsampling block in the Unet's encoder. For the decoder, we
|
||||
# just reverse these.
|
||||
in_out = [(cfg.output_shapes["action"][0], cfg.down_dims[0])] + list(
|
||||
zip(cfg.down_dims[:-1], cfg.down_dims[1:], strict=True)
|
||||
in_out = [(config.output_shapes["action"][0], config.down_dims[0])] + list(
|
||||
zip(config.down_dims[:-1], config.down_dims[1:], strict=True)
|
||||
)
|
||||
|
||||
# Unet encoder.
|
||||
common_res_block_kwargs = {
|
||||
"cond_dim": cond_dim,
|
||||
"kernel_size": cfg.kernel_size,
|
||||
"n_groups": cfg.n_groups,
|
||||
"use_film_scale_modulation": cfg.use_film_scale_modulation,
|
||||
"kernel_size": config.kernel_size,
|
||||
"n_groups": config.n_groups,
|
||||
"use_film_scale_modulation": config.use_film_scale_modulation,
|
||||
}
|
||||
self.down_modules = nn.ModuleList([])
|
||||
for ind, (dim_in, dim_out) in enumerate(in_out):
|
||||
@@ -489,10 +482,10 @@ class DiffusionConditionalUnet1d(nn.Module):
|
||||
self.mid_modules = nn.ModuleList(
|
||||
[
|
||||
DiffusionConditionalResidualBlock1d(
|
||||
cfg.down_dims[-1], cfg.down_dims[-1], **common_res_block_kwargs
|
||||
config.down_dims[-1], config.down_dims[-1], **common_res_block_kwargs
|
||||
),
|
||||
DiffusionConditionalResidualBlock1d(
|
||||
cfg.down_dims[-1], cfg.down_dims[-1], **common_res_block_kwargs
|
||||
config.down_dims[-1], config.down_dims[-1], **common_res_block_kwargs
|
||||
),
|
||||
]
|
||||
)
|
||||
@@ -514,8 +507,8 @@ class DiffusionConditionalUnet1d(nn.Module):
|
||||
)
|
||||
|
||||
self.final_conv = nn.Sequential(
|
||||
DiffusionConv1dBlock(cfg.down_dims[0], cfg.down_dims[0], kernel_size=cfg.kernel_size),
|
||||
nn.Conv1d(cfg.down_dims[0], cfg.output_shapes["action"][0], 1),
|
||||
DiffusionConv1dBlock(config.down_dims[0], config.down_dims[0], kernel_size=config.kernel_size),
|
||||
nn.Conv1d(config.down_dims[0], config.output_shapes["action"][0], 1),
|
||||
)
|
||||
|
||||
def forward(self, x: Tensor, timestep: Tensor | int, global_cond=None) -> Tensor:
|
||||
@@ -626,13 +619,13 @@ class DiffusionEMA:
|
||||
Exponential Moving Average of models weights
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: DiffusionConfig, model: nn.Module):
|
||||
def __init__(self, config: DiffusionConfig, model: nn.Module):
|
||||
"""
|
||||
@crowsonkb's notes on EMA Warmup:
|
||||
If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan
|
||||
to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps),
|
||||
gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999
|
||||
at 215.4k steps).
|
||||
If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models
|
||||
you plan to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999
|
||||
at 1M steps), gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999
|
||||
at 10K steps, 0.9999 at 215.4k steps).
|
||||
Args:
|
||||
inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
|
||||
power (float): Exponential factor of EMA warmup. Default: 2/3.
|
||||
@@ -643,11 +636,11 @@ class DiffusionEMA:
|
||||
self.averaged_model.eval()
|
||||
self.averaged_model.requires_grad_(False)
|
||||
|
||||
self.update_after_step = cfg.ema_update_after_step
|
||||
self.inv_gamma = cfg.ema_inv_gamma
|
||||
self.power = cfg.ema_power
|
||||
self.min_alpha = cfg.ema_min_alpha
|
||||
self.max_alpha = cfg.ema_max_alpha
|
||||
self.update_after_step = config.ema_update_after_step
|
||||
self.inv_gamma = config.ema_inv_gamma
|
||||
self.power = config.ema_power
|
||||
self.min_alpha = config.ema_min_alpha
|
||||
self.max_alpha = config.ema_max_alpha
|
||||
|
||||
self.alpha = 0.0
|
||||
self.optimization_step = 0
|
||||
|
||||
@@ -2,6 +2,7 @@ import inspect
|
||||
|
||||
from omegaconf import DictConfig, OmegaConf
|
||||
|
||||
from lerobot.common.policies.policy_protocol import Policy
|
||||
from lerobot.common.utils.utils import get_safe_torch_device
|
||||
|
||||
|
||||
@@ -20,42 +21,49 @@ def _policy_cfg_from_hydra_cfg(policy_cfg_class, hydra_cfg):
|
||||
return policy_cfg
|
||||
|
||||
|
||||
def make_policy(hydra_cfg: DictConfig, dataset_stats=None):
|
||||
if hydra_cfg.policy.name == "tdmpc":
|
||||
from lerobot.common.policies.tdmpc.policy import TDMPCPolicy
|
||||
|
||||
policy = TDMPCPolicy(
|
||||
hydra_cfg.policy,
|
||||
n_obs_steps=hydra_cfg.n_obs_steps,
|
||||
n_action_steps=hydra_cfg.n_action_steps,
|
||||
device=hydra_cfg.device,
|
||||
)
|
||||
elif hydra_cfg.policy.name == "diffusion":
|
||||
def get_policy_and_config_classes(name: str) -> tuple[Policy, object]:
|
||||
"""Get the policy's class and config class given a name (matching the policy class' `name` attribute)."""
|
||||
if name == "tdmpc":
|
||||
raise NotImplementedError("Coming soon!")
|
||||
elif name == "diffusion":
|
||||
from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
|
||||
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
|
||||
policy_cfg = _policy_cfg_from_hydra_cfg(DiffusionConfig, hydra_cfg)
|
||||
policy = DiffusionPolicy(policy_cfg, hydra_cfg.training.offline_steps, dataset_stats)
|
||||
policy.to(get_safe_torch_device(hydra_cfg.device))
|
||||
elif hydra_cfg.policy.name == "act":
|
||||
return DiffusionPolicy, DiffusionConfig
|
||||
elif name == "act":
|
||||
from lerobot.common.policies.act.configuration_act import ACTConfig
|
||||
from lerobot.common.policies.act.modeling_act import ACTPolicy
|
||||
|
||||
policy_cfg = _policy_cfg_from_hydra_cfg(ACTConfig, hydra_cfg)
|
||||
policy = ACTPolicy(policy_cfg, dataset_stats)
|
||||
return ACTPolicy, ACTConfig
|
||||
else:
|
||||
raise NotImplementedError(f"Policy with name {name} is not implemented.")
|
||||
|
||||
|
||||
def make_policy(
|
||||
hydra_cfg: DictConfig, pretrained_policy_name_or_path: str | None = None, dataset_stats=None
|
||||
) -> Policy:
|
||||
"""Make an instance of a policy class.
|
||||
|
||||
Args:
|
||||
hydra_cfg: A parsed Hydra configuration (see scripts). If `pretrained_policy_name_or_path` is
|
||||
provided, only `hydra_cfg.policy.name` is used while everything else is ignored.
|
||||
pretrained_policy_name_or_path: Either the repo ID of a model hosted on the Hub or a path to a
|
||||
directory containing weights saved using `Policy.save_pretrained`. Note that providing this
|
||||
argument overrides everything in `hydra_cfg.policy` apart from `hydra_cfg.policy.name`.
|
||||
dataset_stats: Dataset statistics to use for (un)normalization of inputs/outputs in the policy. Must
|
||||
be provided when initializing a new policy, and must not be provided when loading a pretrained
|
||||
policy. Therefore, this argument is mutually exclusive with `pretrained_policy_name_or_path`.
|
||||
"""
|
||||
if not (pretrained_policy_name_or_path is None) ^ (dataset_stats is None):
|
||||
raise ValueError("Only one of `pretrained_policy_name_or_path` and `dataset_stats` may be provided.")
|
||||
|
||||
policy_cls, policy_cfg_class = get_policy_and_config_classes(hydra_cfg.policy.name)
|
||||
|
||||
if pretrained_policy_name_or_path is None:
|
||||
policy_cfg = _policy_cfg_from_hydra_cfg(policy_cfg_class, hydra_cfg)
|
||||
policy = policy_cls(policy_cfg, dataset_stats)
|
||||
policy.to(get_safe_torch_device(hydra_cfg.device))
|
||||
else:
|
||||
raise ValueError(hydra_cfg.policy.name)
|
||||
|
||||
if hydra_cfg.policy.pretrained_model_path:
|
||||
# TODO(rcadene): hack for old pretrained models from fowm
|
||||
if hydra_cfg.policy.name == "tdmpc" and "fowm" in hydra_cfg.policy.pretrained_model_path:
|
||||
if "offline" in hydra_cfg.policy.pretrained_model_path:
|
||||
policy.step[0] = 25000
|
||||
elif "final" in hydra_cfg.policy.pretrained_model_path:
|
||||
policy.step[0] = 100000
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
policy.load(hydra_cfg.policy.pretrained_model_path)
|
||||
policy = policy_cls.from_pretrained(pretrained_policy_name_or_path)
|
||||
|
||||
return policy
|
||||
|
||||
@@ -57,17 +57,28 @@ def create_stats_buffers(
|
||||
)
|
||||
|
||||
if stats is not None:
|
||||
# Note: The clone is needed to make sure that the logic in save_pretrained doesn't see duplicated
|
||||
# tensors anywhere (for example, when we use the same stats for normalization and
|
||||
# unnormalization). See the logic here
|
||||
# https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L97.
|
||||
if mode == "mean_std":
|
||||
buffer["mean"].data = stats[key]["mean"]
|
||||
buffer["std"].data = stats[key]["std"]
|
||||
buffer["mean"].data = stats[key]["mean"].clone()
|
||||
buffer["std"].data = stats[key]["std"].clone()
|
||||
elif mode == "min_max":
|
||||
buffer["min"].data = stats[key]["min"]
|
||||
buffer["max"].data = stats[key]["max"]
|
||||
buffer["min"].data = stats[key]["min"].clone()
|
||||
buffer["max"].data = stats[key]["max"].clone()
|
||||
|
||||
stats_buffers[key] = buffer
|
||||
return stats_buffers
|
||||
|
||||
|
||||
def _no_stats_error_str(name: str) -> str:
|
||||
return (
|
||||
f"`{name}` is infinity. You should either initialize with `stats` as an argument, or use a "
|
||||
"pretrained model."
|
||||
)
|
||||
|
||||
|
||||
class Normalize(nn.Module):
|
||||
"""Normalizes data (e.g. "observation.image") for more stable and faster convergence during training."""
|
||||
|
||||
@@ -99,7 +110,6 @@ class Normalize(nn.Module):
|
||||
self.shapes = shapes
|
||||
self.modes = modes
|
||||
self.stats = stats
|
||||
# `self.buffer_observation_state["mean"]` contains `torch.tensor(state_dim)`
|
||||
stats_buffers = create_stats_buffers(shapes, modes, stats)
|
||||
for key, buffer in stats_buffers.items():
|
||||
setattr(self, "buffer_" + key.replace(".", "_"), buffer)
|
||||
@@ -113,26 +123,14 @@ class Normalize(nn.Module):
|
||||
if mode == "mean_std":
|
||||
mean = buffer["mean"]
|
||||
std = buffer["std"]
|
||||
assert not torch.isinf(mean).any(), (
|
||||
"`mean` is infinity. You forgot to initialize with `stats` as argument, or called "
|
||||
"`policy.load_state_dict`."
|
||||
)
|
||||
assert not torch.isinf(std).any(), (
|
||||
"`std` is infinity. You forgot to initialize with `stats` as argument, or called "
|
||||
"`policy.load_state_dict`."
|
||||
)
|
||||
assert not torch.isinf(mean).any(), _no_stats_error_str("mean")
|
||||
assert not torch.isinf(std).any(), _no_stats_error_str("std")
|
||||
batch[key] = (batch[key] - mean) / (std + 1e-8)
|
||||
elif mode == "min_max":
|
||||
min = buffer["min"]
|
||||
max = buffer["max"]
|
||||
assert not torch.isinf(min).any(), (
|
||||
"`min` is infinity. You forgot to initialize with `stats` as argument, or called "
|
||||
"`policy.load_state_dict`."
|
||||
)
|
||||
assert not torch.isinf(max).any(), (
|
||||
"`max` is infinity. You forgot to initialize with `stats` as argument, or called "
|
||||
"`policy.load_state_dict`."
|
||||
)
|
||||
assert not torch.isinf(min).any(), _no_stats_error_str("min")
|
||||
assert not torch.isinf(max).any(), _no_stats_error_str("max")
|
||||
# normalize to [0,1]
|
||||
batch[key] = (batch[key] - min) / (max - min)
|
||||
# normalize to [-1, 1]
|
||||
@@ -190,26 +188,14 @@ class Unnormalize(nn.Module):
|
||||
if mode == "mean_std":
|
||||
mean = buffer["mean"]
|
||||
std = buffer["std"]
|
||||
assert not torch.isinf(mean).any(), (
|
||||
"`mean` is infinity. You forgot to initialize with `stats` as argument, or called "
|
||||
"`policy.load_state_dict`."
|
||||
)
|
||||
assert not torch.isinf(std).any(), (
|
||||
"`std` is infinity. You forgot to initialize with `stats` as argument, or called "
|
||||
"`policy.load_state_dict`."
|
||||
)
|
||||
assert not torch.isinf(mean).any(), _no_stats_error_str("mean")
|
||||
assert not torch.isinf(std).any(), _no_stats_error_str("std")
|
||||
batch[key] = batch[key] * std + mean
|
||||
elif mode == "min_max":
|
||||
min = buffer["min"]
|
||||
max = buffer["max"]
|
||||
assert not torch.isinf(min).any(), (
|
||||
"`min` is infinity. You forgot to initialize with `stats` as argument, or called "
|
||||
"`policy.load_state_dict`."
|
||||
)
|
||||
assert not torch.isinf(max).any(), (
|
||||
"`max` is infinity. You forgot to initialize with `stats` as argument, or called "
|
||||
"`policy.load_state_dict`."
|
||||
)
|
||||
assert not torch.isinf(min).any(), _no_stats_error_str("min")
|
||||
assert not torch.isinf(max).any(), _no_stats_error_str("max")
|
||||
batch[key] = (batch[key] + 1) / 2
|
||||
batch[key] = batch[key] * (max - min) + min
|
||||
else:
|
||||
|
||||
@@ -14,7 +14,10 @@ from torch import Tensor
|
||||
|
||||
@runtime_checkable
|
||||
class Policy(Protocol):
|
||||
"""The required interface for implementing a policy."""
|
||||
"""The required interface for implementing a policy.
|
||||
|
||||
We also expect all policies to subclass torch.nn.Module and PyTorchModelHubMixin.
|
||||
"""
|
||||
|
||||
name: str
|
||||
|
||||
|
||||
Reference in New Issue
Block a user