Remove warnings (#111)

- Replace `use_pretrained_backbone` with `pretrained_backbone_weights`
- Bump diffusers' minimum version `0.26.3` -> `0.27.2`
- Add ignore flags in CI's pytest
- Change Box observation spaces in simulation environments
- Set `version_base="1.2"` in Hydra initializations
- Bump einops' minimum version `0.7.0` -> `0.8.0`
This commit is contained in:
Simon Alibert
2024-04-29 00:31:33 +02:00
committed by GitHub
parent 55dc9f7f51
commit 791506dfb8
14 changed files with 52 additions and 36 deletions

View File

@@ -53,7 +53,14 @@ jobs:
poetry install --all-extras poetry install --all-extras
- name: Test with pytest - name: Test with pytest
run: pytest -v --cov=./lerobot --durations=0 tests run: |
pytest tests -v --cov=./lerobot --durations=0 \
-W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
-W ignore::UserWarning:torch.utils.data.dataloader:558 \
-W ignore::UserWarning:gymnasium.utils.env_checker:247 \
&& rm -rf tests/outputs outputs
- name: Test end-to-end - name: Test end-to-end
run: make test-end-to-end run: |
make test-end-to-end \
&& rm -rf outputs

View File

@@ -33,8 +33,8 @@ class ActionChunkingTransformerConfig:
deviation and "min_max" which rescale in a [-1, 1] range. deviation and "min_max" which rescale in a [-1, 1] range.
unnormalize_output_modes: Similar dictionary as `normalize_input_modes`, but to unormalize in original scale. unnormalize_output_modes: Similar dictionary as `normalize_input_modes`, but to unormalize in original scale.
vision_backbone: Name of the torchvision resnet backbone to use for encoding images. vision_backbone: Name of the torchvision resnet backbone to use for encoding images.
use_pretrained_backbone: Whether the backbone should be initialized with pretrained weights from pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone.
torchvision. `None` means no pretrained weights.
replace_final_stride_with_dilation: Whether to replace the ResNet's final 2x2 stride with a dilated replace_final_stride_with_dilation: Whether to replace the ResNet's final 2x2 stride with a dilated
convolution. convolution.
pre_norm: Whether to use "pre-norm" in the transformer blocks. pre_norm: Whether to use "pre-norm" in the transformer blocks.
@@ -90,7 +90,7 @@ class ActionChunkingTransformerConfig:
# Architecture. # Architecture.
# Vision backbone. # Vision backbone.
vision_backbone: str = "resnet18" vision_backbone: str = "resnet18"
use_pretrained_backbone: bool = True pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1"
replace_final_stride_with_dilation: int = False replace_final_stride_with_dilation: int = False
# Transformer layers. # Transformer layers.
pre_norm: bool = False pre_norm: bool = False

View File

@@ -104,7 +104,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
# Backbone for image feature extraction. # Backbone for image feature extraction.
backbone_model = getattr(torchvision.models, cfg.vision_backbone)( backbone_model = getattr(torchvision.models, cfg.vision_backbone)(
replace_stride_with_dilation=[False, False, cfg.replace_final_stride_with_dilation], replace_stride_with_dilation=[False, False, cfg.replace_final_stride_with_dilation],
pretrained=cfg.use_pretrained_backbone, weights=cfg.pretrained_backbone_weights,
norm_layer=FrozenBatchNorm2d, norm_layer=FrozenBatchNorm2d,
) )
# Note: The assumption here is that we are using a ResNet model (and hence layer4 is the final feature # Note: The assumption here is that we are using a ResNet model (and hence layer4 is the final feature

View File

@@ -35,8 +35,8 @@ class DiffusionConfig:
within the image size. If None, no cropping is done. within the image size. If None, no cropping is done.
crop_is_random: Whether the crop should be random at training time (it's always a center crop in eval crop_is_random: Whether the crop should be random at training time (it's always a center crop in eval
mode). mode).
use_pretrained_backbone: Whether the backbone should be initialized with pretrained weights from pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone.
torchvision. `None` means no pretrained weights.
use_group_norm: Whether to replace batch normalization with group normalization in the backbone. use_group_norm: Whether to replace batch normalization with group normalization in the backbone.
The group sizes are set to be about 16 (to be precise, feature_dim // 16). The group sizes are set to be about 16 (to be precise, feature_dim // 16).
spatial_softmax_num_keypoints: Number of keypoints for SpatialSoftmax. spatial_softmax_num_keypoints: Number of keypoints for SpatialSoftmax.
@@ -96,7 +96,7 @@ class DiffusionConfig:
vision_backbone: str = "resnet18" vision_backbone: str = "resnet18"
crop_shape: tuple[int, int] | None = (84, 84) crop_shape: tuple[int, int] | None = (84, 84)
crop_is_random: bool = True crop_is_random: bool = True
use_pretrained_backbone: bool = False pretrained_backbone_weights: str | None = None
use_group_norm: bool = True use_group_norm: bool = True
spatial_softmax_num_keypoints: int = 32 spatial_softmax_num_keypoints: int = 32
# Unet. # Unet.

View File

@@ -378,13 +378,13 @@ class _RgbEncoder(nn.Module):
# Set up backbone. # Set up backbone.
backbone_model = getattr(torchvision.models, cfg.vision_backbone)( backbone_model = getattr(torchvision.models, cfg.vision_backbone)(
pretrained=cfg.use_pretrained_backbone weights=cfg.pretrained_backbone_weights
) )
# Note: This assumes that the layer4 feature map is children()[-3] # Note: This assumes that the layer4 feature map is children()[-3]
# TODO(alexander-soare): Use a safer alternative. # TODO(alexander-soare): Use a safer alternative.
self.backbone = nn.Sequential(*(list(backbone_model.children())[:-2])) self.backbone = nn.Sequential(*(list(backbone_model.children())[:-2]))
if cfg.use_group_norm: if cfg.use_group_norm:
if cfg.use_pretrained_backbone: if cfg.pretrained_backbone_weights:
raise ValueError( raise ValueError(
"You can't replace BatchNorm in a pretrained model without ruining the weights!" "You can't replace BatchNorm in a pretrained model without ruining the weights!"
) )

View File

@@ -0,0 +1,12 @@
import warnings
import imageio
def write_video(video_path, stacked_frames, fps):
# Filter out DeprecationWarnings raised from pkg_resources
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "pkg_resources is deprecated as an API", category=DeprecationWarning
)
imageio.mimsave(video_path, stacked_frames, fps=fps)

View File

@@ -92,7 +92,8 @@ def init_hydra_config(config_path: str, overrides: list[str] | None = None) -> D
hydra.core.global_hydra.GlobalHydra.instance().clear() hydra.core.global_hydra.GlobalHydra.instance().clear()
# Hydra needs a path relative to this file. # Hydra needs a path relative to this file.
hydra.initialize( hydra.initialize(
str(_relative_path_between(Path(config_path).absolute().parent, Path(__file__).absolute().parent)) str(_relative_path_between(Path(config_path).absolute().parent, Path(__file__).absolute().parent)),
version_base="1.2",
) )
cfg = hydra.compose(Path(config_path).stem, overrides) cfg = hydra.compose(Path(config_path).stem, overrides)
return cfg return cfg

View File

@@ -45,7 +45,7 @@ policy:
# Architecture. # Architecture.
# Vision backbone. # Vision backbone.
vision_backbone: resnet18 vision_backbone: resnet18
use_pretrained_backbone: true pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
replace_final_stride_with_dilation: false replace_final_stride_with_dilation: false
# Transformer layers. # Transformer layers.
pre_norm: false pre_norm: false

View File

@@ -61,7 +61,7 @@ policy:
vision_backbone: resnet18 vision_backbone: resnet18
crop_shape: [84, 84] crop_shape: [84, 84]
crop_is_random: True crop_is_random: True
use_pretrained_backbone: false pretrained_backbone_weights: null
use_group_norm: True use_group_norm: True
spatial_softmax_num_keypoints: 32 spatial_softmax_num_keypoints: 32
# Unet. # Unet.

View File

@@ -38,7 +38,6 @@ from pathlib import Path
import einops import einops
import gymnasium as gym import gymnasium as gym
import imageio
import numpy as np import numpy as np
import torch import torch
from datasets import Dataset, Features, Image, Sequence, Value from datasets import Dataset, Features, Image, Sequence, Value
@@ -51,13 +50,10 @@ from lerobot.common.envs.factory import make_env
from lerobot.common.envs.utils import postprocess_action, preprocess_observation from lerobot.common.envs.utils import postprocess_action, preprocess_observation
from lerobot.common.logger import log_output_dir from lerobot.common.logger import log_output_dir
from lerobot.common.policies.factory import make_policy from lerobot.common.policies.factory import make_policy
from lerobot.common.utils.io_utils import write_video
from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed
def write_video(video_path, stacked_frames, fps):
imageio.mimsave(video_path, stacked_frames, fps=fps)
def eval_policy( def eval_policy(
env: gym.vector.VectorEnv, env: gym.vector.VectorEnv,
policy: torch.nn.Module, policy: torch.nn.Module,

View File

@@ -22,7 +22,7 @@ from lerobot.common.utils.utils import (
from lerobot.scripts.eval import eval_policy from lerobot.scripts.eval import eval_policy
@hydra.main(version_base=None, config_name="default", config_path="../configs") @hydra.main(version_base="1.2", config_name="default", config_path="../configs")
def train_cli(cfg: dict): def train_cli(cfg: dict):
train( train(
cfg, cfg,

View File

@@ -16,7 +16,7 @@ MAX_NUM_STEPS = 1000
FIRST_FRAME = 0 FIRST_FRAME = 0
@hydra.main(version_base=None, config_name="default", config_path="../configs") @hydra.main(version_base="1.2", config_name="default", config_path="../configs")
def visualize_dataset_cli(cfg: dict): def visualize_dataset_cli(cfg: dict):
visualize_dataset(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir) visualize_dataset(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)

26
poetry.lock generated
View File

@@ -597,13 +597,13 @@ files = [
[[package]] [[package]]
name = "diffusers" name = "diffusers"
version = "0.26.3" version = "0.27.2"
description = "State-of-the-art diffusion in PyTorch and JAX." description = "State-of-the-art diffusion in PyTorch and JAX."
optional = false optional = false
python-versions = ">=3.8.0" python-versions = ">=3.8.0"
files = [ files = [
{file = "diffusers-0.26.3-py3-none-any.whl", hash = "sha256:f8f5710c8f9170e9749f0b104f50fc4a1259f8aff3effed99598409a5ea9b1cd"}, {file = "diffusers-0.27.2-py3-none-any.whl", hash = "sha256:85da5cd1098ab428535d592136973ec0c3f12f78148c94b379cb9f02d2414e75"},
{file = "diffusers-0.26.3.tar.gz", hash = "sha256:e217ea39e85b0bd34fee11f8b39fd00116680b05ff7a70c0b4fdab5351ae4f96"}, {file = "diffusers-0.27.2.tar.gz", hash = "sha256:6cefd7770d7fc1d139614233aa17cdcd639c138d0c3517b8d8bbc8cf573050a0"},
] ]
[package.dependencies] [package.dependencies]
@@ -617,12 +617,12 @@ requests = "*"
safetensors = ">=0.3.1" safetensors = ">=0.3.1"
[package.extras] [package.extras]
dev = ["GitPython (<3.1.19)", "Jinja2", "accelerate (>=0.11.0)", "compel (==0.1.8)", "datasets", "flax (>=0.4.1)", "hf-doc-builder (>=0.3.0)", "invisible-watermark (>=0.2.0)", "isort (>=5.5.4)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "ruff (==0.1.5)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "torch (>=1.4,<2.2.0)", "torchvision (<0.17)", "transformers (>=4.25.1)", "urllib3 (<=2.0.0)"] dev = ["GitPython (<3.1.19)", "Jinja2", "accelerate (>=0.11.0)", "compel (==0.1.8)", "datasets", "flax (>=0.4.1)", "hf-doc-builder (>=0.3.0)", "invisible-watermark (>=0.2.0)", "isort (>=5.5.4)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "ruff (==0.1.5)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "torch (>=1.4)", "torchvision", "transformers (>=4.25.1)", "urllib3 (<=2.0.0)"]
docs = ["hf-doc-builder (>=0.3.0)"] docs = ["hf-doc-builder (>=0.3.0)"]
flax = ["flax (>=0.4.1)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)"] flax = ["flax (>=0.4.1)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)"]
quality = ["hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<=2.0.0)"] quality = ["hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<=2.0.0)"]
test = ["GitPython (<3.1.19)", "Jinja2", "compel (==0.1.8)", "datasets", "invisible-watermark (>=0.2.0)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "torchvision (<0.17)", "transformers (>=4.25.1)"] test = ["GitPython (<3.1.19)", "Jinja2", "compel (==0.1.8)", "datasets", "invisible-watermark (>=0.2.0)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "torchvision", "transformers (>=4.25.1)"]
torch = ["accelerate (>=0.11.0)", "torch (>=1.4,<2.2.0)"] torch = ["accelerate (>=0.11.0)", "torch (>=1.4)"]
training = ["Jinja2", "accelerate (>=0.11.0)", "datasets", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "tensorboard"] training = ["Jinja2", "accelerate (>=0.11.0)", "datasets", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "tensorboard"]
[[package]] [[package]]
@@ -779,13 +779,13 @@ files = [
[[package]] [[package]]
name = "einops" name = "einops"
version = "0.7.0" version = "0.8.0"
description = "A new flavour of deep learning operations" description = "A new flavour of deep learning operations"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "einops-0.7.0-py3-none-any.whl", hash = "sha256:0f3096f26b914f465f6ff3c66f5478f9a5e380bb367ffc6493a68143fbbf1fd1"}, {file = "einops-0.8.0-py3-none-any.whl", hash = "sha256:9572fb63046264a862693b0a87088af3bdc8c068fde03de63453cbbde245465f"},
{file = "einops-0.7.0.tar.gz", hash = "sha256:b2b04ad6081a3b227080c9bf5e3ace7160357ff03043cd66cc5b2319eb7031d1"}, {file = "einops-0.8.0.tar.gz", hash = "sha256:63486517fed345712a8385c100cb279108d9d47e6ae59099b07657e983deae85"},
] ]
[[package]] [[package]]
@@ -1121,7 +1121,7 @@ mujoco = "^2.3.7"
type = "git" type = "git"
url = "git@github.com:huggingface/gym-aloha.git" url = "git@github.com:huggingface/gym-aloha.git"
reference = "HEAD" reference = "HEAD"
resolved_reference = "c636f05ba0d1760df94537da84c860be1487e17f" resolved_reference = "12c8171d6708ec6d8547c310fe736d8d494195c5"
[[package]] [[package]]
name = "gym-pusht" name = "gym-pusht"
@@ -1144,7 +1144,7 @@ shapely = "^2.0.3"
type = "git" type = "git"
url = "git@github.com:huggingface/gym-pusht.git" url = "git@github.com:huggingface/gym-pusht.git"
reference = "HEAD" reference = "HEAD"
resolved_reference = "080d4ce4d8d3140b2fd204ed628bda14dc58ff06" resolved_reference = "e0684ff988d223808c0a9dcfaba9dc4991791370"
[[package]] [[package]]
name = "gym-xarm" name = "gym-xarm"
@@ -1164,7 +1164,7 @@ mujoco = "^2.3.7"
type = "git" type = "git"
url = "git@github.com:huggingface/gym-xarm.git" url = "git@github.com:huggingface/gym-xarm.git"
reference = "HEAD" reference = "HEAD"
resolved_reference = "27e65c981f9a8d252eca8f157f83508ba6149db7" resolved_reference = "415811fc34863d349ed113eab77e756726c03525"
[[package]] [[package]]
name = "gymnasium" name = "gymnasium"
@@ -4299,4 +4299,4 @@ xarm = ["gym-xarm"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "8bd1352973c6104e52f50b68f7387d26ced9b07a52e889540b73d132865cda38" content-hash = "0f72eb92ac8817a46f0659b4d72647a6b76f6e4ba762d11b280f8a88e6cd4371"

View File

@@ -34,13 +34,13 @@ wandb = "^0.16.3"
imageio = {extras = ["ffmpeg"], version = "^2.34.0"} imageio = {extras = ["ffmpeg"], version = "^2.34.0"}
gdown = "^5.1.0" gdown = "^5.1.0"
hydra-core = "^1.3.2" hydra-core = "^1.3.2"
einops = "^0.7.0" einops = "^0.8.0"
pymunk = "^6.6.0" pymunk = "^6.6.0"
zarr = "^2.17.0" zarr = "^2.17.0"
numba = "^0.59.0" numba = "^0.59.0"
torch = "^2.2.1" torch = "^2.2.1"
opencv-python = "^4.9.0.80" opencv-python = "^4.9.0.80"
diffusers = "^0.26.3" diffusers = "^0.27.2"
torchvision = "^0.18.0" torchvision = "^0.18.0"
h5py = "^3.10.0" h5py = "^3.10.0"
huggingface-hub = "^0.21.4" huggingface-hub = "^0.21.4"