From 791506dfb8155a83bcdc568d174ecfd9e194a913 Mon Sep 17 00:00:00 2001 From: Simon Alibert <75076266+aliberts@users.noreply.github.com> Date: Mon, 29 Apr 2024 00:31:33 +0200 Subject: [PATCH] Remove warnings (#111) - Replace `use_pretrained_backbone` with `pretrained_backbone_weights` - Bump diffusers' minimum version `0.26.3` -> `0.27.2` - Add ignore flags in CI's pytest - Change Box observation spaces in simulation environments - Set `version_base="1.2"` in Hydra initializations - Bump einops' minimum version `0.7.0` -> `0.8.0` --- .github/workflows/test.yml | 11 ++++++-- .../common/policies/act/configuration_act.py | 6 ++--- lerobot/common/policies/act/modeling_act.py | 2 +- .../diffusion/configuration_diffusion.py | 6 ++--- .../policies/diffusion/modeling_diffusion.py | 4 +-- lerobot/common/utils/io_utils.py | 12 +++++++++ lerobot/common/utils/utils.py | 3 ++- lerobot/configs/policy/act.yaml | 2 +- lerobot/configs/policy/diffusion.yaml | 2 +- lerobot/scripts/eval.py | 6 +---- lerobot/scripts/train.py | 2 +- lerobot/scripts/visualize_dataset.py | 2 +- poetry.lock | 26 +++++++++---------- pyproject.toml | 4 +-- 14 files changed, 52 insertions(+), 36 deletions(-) create mode 100644 lerobot/common/utils/io_utils.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7bc643d15..76e7000db 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,7 +53,14 @@ jobs: poetry install --all-extras - name: Test with pytest - run: pytest -v --cov=./lerobot --durations=0 tests + run: | + pytest tests -v --cov=./lerobot --durations=0 \ + -W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \ + -W ignore::UserWarning:torch.utils.data.dataloader:558 \ + -W ignore::UserWarning:gymnasium.utils.env_checker:247 \ + && rm -rf tests/outputs outputs - name: Test end-to-end - run: make test-end-to-end + run: | + make test-end-to-end \ + && rm -rf outputs diff --git a/lerobot/common/policies/act/configuration_act.py b/lerobot/common/policies/act/configuration_act.py index c8c85c049..7564e6f70 100644 --- a/lerobot/common/policies/act/configuration_act.py +++ b/lerobot/common/policies/act/configuration_act.py @@ -33,8 +33,8 @@ class ActionChunkingTransformerConfig: deviation and "min_max" which rescale in a [-1, 1] range. unnormalize_output_modes: Similar dictionary as `normalize_input_modes`, but to unormalize in original scale. vision_backbone: Name of the torchvision resnet backbone to use for encoding images. - use_pretrained_backbone: Whether the backbone should be initialized with pretrained weights from - torchvision. + pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone. + `None` means no pretrained weights. replace_final_stride_with_dilation: Whether to replace the ResNet's final 2x2 stride with a dilated convolution. pre_norm: Whether to use "pre-norm" in the transformer blocks. @@ -90,7 +90,7 @@ class ActionChunkingTransformerConfig: # Architecture. # Vision backbone. vision_backbone: str = "resnet18" - use_pretrained_backbone: bool = True + pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1" replace_final_stride_with_dilation: int = False # Transformer layers. pre_norm: bool = False diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py index 4501c6cc4..f0190ed33 100644 --- a/lerobot/common/policies/act/modeling_act.py +++ b/lerobot/common/policies/act/modeling_act.py @@ -104,7 +104,7 @@ class ActionChunkingTransformerPolicy(nn.Module): # Backbone for image feature extraction. backbone_model = getattr(torchvision.models, cfg.vision_backbone)( replace_stride_with_dilation=[False, False, cfg.replace_final_stride_with_dilation], - pretrained=cfg.use_pretrained_backbone, + weights=cfg.pretrained_backbone_weights, norm_layer=FrozenBatchNorm2d, ) # Note: The assumption here is that we are using a ResNet model (and hence layer4 is the final feature diff --git a/lerobot/common/policies/diffusion/configuration_diffusion.py b/lerobot/common/policies/diffusion/configuration_diffusion.py index a5c739c46..432afa218 100644 --- a/lerobot/common/policies/diffusion/configuration_diffusion.py +++ b/lerobot/common/policies/diffusion/configuration_diffusion.py @@ -35,8 +35,8 @@ class DiffusionConfig: within the image size. If None, no cropping is done. crop_is_random: Whether the crop should be random at training time (it's always a center crop in eval mode). - use_pretrained_backbone: Whether the backbone should be initialized with pretrained weights from - torchvision. + pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone. + `None` means no pretrained weights. use_group_norm: Whether to replace batch normalization with group normalization in the backbone. The group sizes are set to be about 16 (to be precise, feature_dim // 16). spatial_softmax_num_keypoints: Number of keypoints for SpatialSoftmax. @@ -96,7 +96,7 @@ class DiffusionConfig: vision_backbone: str = "resnet18" crop_shape: tuple[int, int] | None = (84, 84) crop_is_random: bool = True - use_pretrained_backbone: bool = False + pretrained_backbone_weights: str | None = None use_group_norm: bool = True spatial_softmax_num_keypoints: int = 32 # Unet. diff --git a/lerobot/common/policies/diffusion/modeling_diffusion.py b/lerobot/common/policies/diffusion/modeling_diffusion.py index 1dd545d34..9e52ae926 100644 --- a/lerobot/common/policies/diffusion/modeling_diffusion.py +++ b/lerobot/common/policies/diffusion/modeling_diffusion.py @@ -378,13 +378,13 @@ class _RgbEncoder(nn.Module): # Set up backbone. backbone_model = getattr(torchvision.models, cfg.vision_backbone)( - pretrained=cfg.use_pretrained_backbone + weights=cfg.pretrained_backbone_weights ) # Note: This assumes that the layer4 feature map is children()[-3] # TODO(alexander-soare): Use a safer alternative. self.backbone = nn.Sequential(*(list(backbone_model.children())[:-2])) if cfg.use_group_norm: - if cfg.use_pretrained_backbone: + if cfg.pretrained_backbone_weights: raise ValueError( "You can't replace BatchNorm in a pretrained model without ruining the weights!" ) diff --git a/lerobot/common/utils/io_utils.py b/lerobot/common/utils/io_utils.py new file mode 100644 index 000000000..5d727bd74 --- /dev/null +++ b/lerobot/common/utils/io_utils.py @@ -0,0 +1,12 @@ +import warnings + +import imageio + + +def write_video(video_path, stacked_frames, fps): + # Filter out DeprecationWarnings raised from pkg_resources + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", "pkg_resources is deprecated as an API", category=DeprecationWarning + ) + imageio.mimsave(video_path, stacked_frames, fps=fps) diff --git a/lerobot/common/utils/utils.py b/lerobot/common/utils/utils.py index 81b3d986e..9d0ddd986 100644 --- a/lerobot/common/utils/utils.py +++ b/lerobot/common/utils/utils.py @@ -92,7 +92,8 @@ def init_hydra_config(config_path: str, overrides: list[str] | None = None) -> D hydra.core.global_hydra.GlobalHydra.instance().clear() # Hydra needs a path relative to this file. hydra.initialize( - str(_relative_path_between(Path(config_path).absolute().parent, Path(__file__).absolute().parent)) + str(_relative_path_between(Path(config_path).absolute().parent, Path(__file__).absolute().parent)), + version_base="1.2", ) cfg = hydra.compose(Path(config_path).stem, overrides) return cfg diff --git a/lerobot/configs/policy/act.yaml b/lerobot/configs/policy/act.yaml index d4ad195c2..c67793e47 100644 --- a/lerobot/configs/policy/act.yaml +++ b/lerobot/configs/policy/act.yaml @@ -45,7 +45,7 @@ policy: # Architecture. # Vision backbone. vision_backbone: resnet18 - use_pretrained_backbone: true + pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1 replace_final_stride_with_dilation: false # Transformer layers. pre_norm: false diff --git a/lerobot/configs/policy/diffusion.yaml b/lerobot/configs/policy/diffusion.yaml index 999d62ea4..f96e21c2d 100644 --- a/lerobot/configs/policy/diffusion.yaml +++ b/lerobot/configs/policy/diffusion.yaml @@ -61,7 +61,7 @@ policy: vision_backbone: resnet18 crop_shape: [84, 84] crop_is_random: True - use_pretrained_backbone: false + pretrained_backbone_weights: null use_group_norm: True spatial_softmax_num_keypoints: 32 # Unet. diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py index c66e7ee9f..0c10b7a53 100644 --- a/lerobot/scripts/eval.py +++ b/lerobot/scripts/eval.py @@ -38,7 +38,6 @@ from pathlib import Path import einops import gymnasium as gym -import imageio import numpy as np import torch from datasets import Dataset, Features, Image, Sequence, Value @@ -51,13 +50,10 @@ from lerobot.common.envs.factory import make_env from lerobot.common.envs.utils import postprocess_action, preprocess_observation from lerobot.common.logger import log_output_dir from lerobot.common.policies.factory import make_policy +from lerobot.common.utils.io_utils import write_video from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed -def write_video(video_path, stacked_frames, fps): - imageio.mimsave(video_path, stacked_frames, fps=fps) - - def eval_policy( env: gym.vector.VectorEnv, policy: torch.nn.Module, diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 0447c84ed..c4c0ea574 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -22,7 +22,7 @@ from lerobot.common.utils.utils import ( from lerobot.scripts.eval import eval_policy -@hydra.main(version_base=None, config_name="default", config_path="../configs") +@hydra.main(version_base="1.2", config_name="default", config_path="../configs") def train_cli(cfg: dict): train( cfg, diff --git a/lerobot/scripts/visualize_dataset.py b/lerobot/scripts/visualize_dataset.py index 3d4d8c536..a5be5e3f9 100644 --- a/lerobot/scripts/visualize_dataset.py +++ b/lerobot/scripts/visualize_dataset.py @@ -16,7 +16,7 @@ MAX_NUM_STEPS = 1000 FIRST_FRAME = 0 -@hydra.main(version_base=None, config_name="default", config_path="../configs") +@hydra.main(version_base="1.2", config_name="default", config_path="../configs") def visualize_dataset_cli(cfg: dict): visualize_dataset(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir) diff --git a/poetry.lock b/poetry.lock index b7cb0758b..79c486411 100644 --- a/poetry.lock +++ b/poetry.lock @@ -597,13 +597,13 @@ files = [ [[package]] name = "diffusers" -version = "0.26.3" +version = "0.27.2" description = "State-of-the-art diffusion in PyTorch and JAX." optional = false python-versions = ">=3.8.0" files = [ - {file = "diffusers-0.26.3-py3-none-any.whl", hash = "sha256:f8f5710c8f9170e9749f0b104f50fc4a1259f8aff3effed99598409a5ea9b1cd"}, - {file = "diffusers-0.26.3.tar.gz", hash = "sha256:e217ea39e85b0bd34fee11f8b39fd00116680b05ff7a70c0b4fdab5351ae4f96"}, + {file = "diffusers-0.27.2-py3-none-any.whl", hash = "sha256:85da5cd1098ab428535d592136973ec0c3f12f78148c94b379cb9f02d2414e75"}, + {file = "diffusers-0.27.2.tar.gz", hash = "sha256:6cefd7770d7fc1d139614233aa17cdcd639c138d0c3517b8d8bbc8cf573050a0"}, ] [package.dependencies] @@ -617,12 +617,12 @@ requests = "*" safetensors = ">=0.3.1" [package.extras] -dev = ["GitPython (<3.1.19)", "Jinja2", "accelerate (>=0.11.0)", "compel (==0.1.8)", "datasets", "flax (>=0.4.1)", "hf-doc-builder (>=0.3.0)", "invisible-watermark (>=0.2.0)", "isort (>=5.5.4)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "ruff (==0.1.5)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "torch (>=1.4,<2.2.0)", "torchvision (<0.17)", "transformers (>=4.25.1)", "urllib3 (<=2.0.0)"] +dev = ["GitPython (<3.1.19)", "Jinja2", "accelerate (>=0.11.0)", "compel (==0.1.8)", "datasets", "flax (>=0.4.1)", "hf-doc-builder (>=0.3.0)", "invisible-watermark (>=0.2.0)", "isort (>=5.5.4)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "ruff (==0.1.5)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "torch (>=1.4)", "torchvision", "transformers (>=4.25.1)", "urllib3 (<=2.0.0)"] docs = ["hf-doc-builder (>=0.3.0)"] flax = ["flax (>=0.4.1)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)"] quality = ["hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<=2.0.0)"] -test = ["GitPython (<3.1.19)", "Jinja2", "compel (==0.1.8)", "datasets", "invisible-watermark (>=0.2.0)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "torchvision (<0.17)", "transformers (>=4.25.1)"] -torch = ["accelerate (>=0.11.0)", "torch (>=1.4,<2.2.0)"] +test = ["GitPython (<3.1.19)", "Jinja2", "compel (==0.1.8)", "datasets", "invisible-watermark (>=0.2.0)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "torchvision", "transformers (>=4.25.1)"] +torch = ["accelerate (>=0.11.0)", "torch (>=1.4)"] training = ["Jinja2", "accelerate (>=0.11.0)", "datasets", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "tensorboard"] [[package]] @@ -779,13 +779,13 @@ files = [ [[package]] name = "einops" -version = "0.7.0" +version = "0.8.0" description = "A new flavour of deep learning operations" optional = false python-versions = ">=3.8" files = [ - {file = "einops-0.7.0-py3-none-any.whl", hash = "sha256:0f3096f26b914f465f6ff3c66f5478f9a5e380bb367ffc6493a68143fbbf1fd1"}, - {file = "einops-0.7.0.tar.gz", hash = "sha256:b2b04ad6081a3b227080c9bf5e3ace7160357ff03043cd66cc5b2319eb7031d1"}, + {file = "einops-0.8.0-py3-none-any.whl", hash = "sha256:9572fb63046264a862693b0a87088af3bdc8c068fde03de63453cbbde245465f"}, + {file = "einops-0.8.0.tar.gz", hash = "sha256:63486517fed345712a8385c100cb279108d9d47e6ae59099b07657e983deae85"}, ] [[package]] @@ -1121,7 +1121,7 @@ mujoco = "^2.3.7" type = "git" url = "git@github.com:huggingface/gym-aloha.git" reference = "HEAD" -resolved_reference = "c636f05ba0d1760df94537da84c860be1487e17f" +resolved_reference = "12c8171d6708ec6d8547c310fe736d8d494195c5" [[package]] name = "gym-pusht" @@ -1144,7 +1144,7 @@ shapely = "^2.0.3" type = "git" url = "git@github.com:huggingface/gym-pusht.git" reference = "HEAD" -resolved_reference = "080d4ce4d8d3140b2fd204ed628bda14dc58ff06" +resolved_reference = "e0684ff988d223808c0a9dcfaba9dc4991791370" [[package]] name = "gym-xarm" @@ -1164,7 +1164,7 @@ mujoco = "^2.3.7" type = "git" url = "git@github.com:huggingface/gym-xarm.git" reference = "HEAD" -resolved_reference = "27e65c981f9a8d252eca8f157f83508ba6149db7" +resolved_reference = "415811fc34863d349ed113eab77e756726c03525" [[package]] name = "gymnasium" @@ -4299,4 +4299,4 @@ xarm = ["gym-xarm"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "8bd1352973c6104e52f50b68f7387d26ced9b07a52e889540b73d132865cda38" +content-hash = "0f72eb92ac8817a46f0659b4d72647a6b76f6e4ba762d11b280f8a88e6cd4371" diff --git a/pyproject.toml b/pyproject.toml index 3e9845cf4..107232387 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,13 +34,13 @@ wandb = "^0.16.3" imageio = {extras = ["ffmpeg"], version = "^2.34.0"} gdown = "^5.1.0" hydra-core = "^1.3.2" -einops = "^0.7.0" +einops = "^0.8.0" pymunk = "^6.6.0" zarr = "^2.17.0" numba = "^0.59.0" torch = "^2.2.1" opencv-python = "^4.9.0.80" -diffusers = "^0.26.3" +diffusers = "^0.27.2" torchvision = "^0.18.0" h5py = "^3.10.0" huggingface-hub = "^0.21.4"