From 791506dfb8155a83bcdc568d174ecfd9e194a913 Mon Sep 17 00:00:00 2001
From: Simon Alibert <75076266+aliberts@users.noreply.github.com>
Date: Mon, 29 Apr 2024 00:31:33 +0200
Subject: [PATCH] Remove warnings (#111)

- Replace `use_pretrained_backbone` with `pretrained_backbone_weights`
- Bump diffusers' minimum version `0.26.3` -> `0.27.2`
- Add ignore flags in CI's pytest
- Change Box observation spaces in simulation environments
- Set `version_base="1.2"` in Hydra initializations
- Bump einops' minimum version `0.7.0` -> `0.8.0`
---
 .github/workflows/test.yml                    | 11 ++++++--
 .../common/policies/act/configuration_act.py  |  6 ++---
 lerobot/common/policies/act/modeling_act.py   |  2 +-
 .../diffusion/configuration_diffusion.py      |  6 ++---
 .../policies/diffusion/modeling_diffusion.py  |  4 +--
 lerobot/common/utils/io_utils.py              | 12 +++++++++
 lerobot/common/utils/utils.py                 |  3 ++-
 lerobot/configs/policy/act.yaml               |  2 +-
 lerobot/configs/policy/diffusion.yaml         |  2 +-
 lerobot/scripts/eval.py                       |  6 +----
 lerobot/scripts/train.py                      |  2 +-
 lerobot/scripts/visualize_dataset.py          |  2 +-
 poetry.lock                                   | 26 +++++++++----------
 pyproject.toml                                |  4 +--
 14 files changed, 52 insertions(+), 36 deletions(-)
 create mode 100644 lerobot/common/utils/io_utils.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7bc643d15..76e7000db 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -53,7 +53,14 @@ jobs:
           poetry install --all-extras
 
       - name: Test with pytest
-        run: pytest -v --cov=./lerobot --durations=0 tests
+        run: |
+          pytest tests -v --cov=./lerobot --durations=0 \
+            -W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
+            -W ignore::UserWarning:torch.utils.data.dataloader:558 \
+            -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
+            && rm -rf tests/outputs outputs
 
       - name: Test end-to-end
-        run: make test-end-to-end
+        run: |
+          make test-end-to-end \
+            && rm -rf outputs
diff --git a/lerobot/common/policies/act/configuration_act.py b/lerobot/common/policies/act/configuration_act.py
index c8c85c049..7564e6f70 100644
--- a/lerobot/common/policies/act/configuration_act.py
+++ b/lerobot/common/policies/act/configuration_act.py
@@ -33,8 +33,8 @@ class ActionChunkingTransformerConfig:
             deviation and "min_max" which rescale in a [-1, 1] range.
         unnormalize_output_modes: Similar dictionary as `normalize_input_modes`, but to unormalize in original scale.
         vision_backbone: Name of the torchvision resnet backbone to use for encoding images.
-        use_pretrained_backbone: Whether the backbone should be initialized with pretrained weights from
-            torchvision.
+        pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone.
+            `None` means no pretrained weights.
         replace_final_stride_with_dilation: Whether to replace the ResNet's final 2x2 stride with a dilated
             convolution.
         pre_norm: Whether to use "pre-norm" in the transformer blocks.
@@ -90,7 +90,7 @@ class ActionChunkingTransformerConfig:
     # Architecture.
     # Vision backbone.
     vision_backbone: str = "resnet18"
-    use_pretrained_backbone: bool = True
+    pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1"
     replace_final_stride_with_dilation: int = False
     # Transformer layers.
     pre_norm: bool = False
diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py
index 4501c6cc4..f0190ed33 100644
--- a/lerobot/common/policies/act/modeling_act.py
+++ b/lerobot/common/policies/act/modeling_act.py
@@ -104,7 +104,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
         # Backbone for image feature extraction.
         backbone_model = getattr(torchvision.models, cfg.vision_backbone)(
             replace_stride_with_dilation=[False, False, cfg.replace_final_stride_with_dilation],
-            pretrained=cfg.use_pretrained_backbone,
+            weights=cfg.pretrained_backbone_weights,
             norm_layer=FrozenBatchNorm2d,
         )
         # Note: The assumption here is that we are using a ResNet model (and hence layer4 is the final feature
diff --git a/lerobot/common/policies/diffusion/configuration_diffusion.py b/lerobot/common/policies/diffusion/configuration_diffusion.py
index a5c739c46..432afa218 100644
--- a/lerobot/common/policies/diffusion/configuration_diffusion.py
+++ b/lerobot/common/policies/diffusion/configuration_diffusion.py
@@ -35,8 +35,8 @@ class DiffusionConfig:
             within the image size. If None, no cropping is done.
         crop_is_random: Whether the crop should be random at training time (it's always a center crop in eval
             mode).
-        use_pretrained_backbone: Whether the backbone should be initialized with pretrained weights from
-            torchvision.
+        pretrained_backbone_weights: Pretrained weights from torchvision to initalize the backbone.
+            `None` means no pretrained weights.
         use_group_norm: Whether to replace batch normalization with group normalization in the backbone.
             The group sizes are set to be about 16 (to be precise, feature_dim // 16).
         spatial_softmax_num_keypoints: Number of keypoints for SpatialSoftmax.
@@ -96,7 +96,7 @@ class DiffusionConfig:
     vision_backbone: str = "resnet18"
     crop_shape: tuple[int, int] | None = (84, 84)
     crop_is_random: bool = True
-    use_pretrained_backbone: bool = False
+    pretrained_backbone_weights: str | None = None
     use_group_norm: bool = True
     spatial_softmax_num_keypoints: int = 32
     # Unet.
diff --git a/lerobot/common/policies/diffusion/modeling_diffusion.py b/lerobot/common/policies/diffusion/modeling_diffusion.py
index 1dd545d34..9e52ae926 100644
--- a/lerobot/common/policies/diffusion/modeling_diffusion.py
+++ b/lerobot/common/policies/diffusion/modeling_diffusion.py
@@ -378,13 +378,13 @@ class _RgbEncoder(nn.Module):
 
         # Set up backbone.
         backbone_model = getattr(torchvision.models, cfg.vision_backbone)(
-            pretrained=cfg.use_pretrained_backbone
+            weights=cfg.pretrained_backbone_weights
         )
         # Note: This assumes that the layer4 feature map is children()[-3]
         # TODO(alexander-soare): Use a safer alternative.
         self.backbone = nn.Sequential(*(list(backbone_model.children())[:-2]))
         if cfg.use_group_norm:
-            if cfg.use_pretrained_backbone:
+            if cfg.pretrained_backbone_weights:
                 raise ValueError(
                     "You can't replace BatchNorm in a pretrained model without ruining the weights!"
                 )
diff --git a/lerobot/common/utils/io_utils.py b/lerobot/common/utils/io_utils.py
new file mode 100644
index 000000000..5d727bd74
--- /dev/null
+++ b/lerobot/common/utils/io_utils.py
@@ -0,0 +1,12 @@
+import warnings
+
+import imageio
+
+
+def write_video(video_path, stacked_frames, fps):
+    # Filter out DeprecationWarnings raised from pkg_resources
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", "pkg_resources is deprecated as an API", category=DeprecationWarning
+        )
+        imageio.mimsave(video_path, stacked_frames, fps=fps)
diff --git a/lerobot/common/utils/utils.py b/lerobot/common/utils/utils.py
index 81b3d986e..9d0ddd986 100644
--- a/lerobot/common/utils/utils.py
+++ b/lerobot/common/utils/utils.py
@@ -92,7 +92,8 @@ def init_hydra_config(config_path: str, overrides: list[str] | None = None) -> D
     hydra.core.global_hydra.GlobalHydra.instance().clear()
     # Hydra needs a path relative to this file.
     hydra.initialize(
-        str(_relative_path_between(Path(config_path).absolute().parent, Path(__file__).absolute().parent))
+        str(_relative_path_between(Path(config_path).absolute().parent, Path(__file__).absolute().parent)),
+        version_base="1.2",
     )
     cfg = hydra.compose(Path(config_path).stem, overrides)
     return cfg
diff --git a/lerobot/configs/policy/act.yaml b/lerobot/configs/policy/act.yaml
index d4ad195c2..c67793e47 100644
--- a/lerobot/configs/policy/act.yaml
+++ b/lerobot/configs/policy/act.yaml
@@ -45,7 +45,7 @@ policy:
   # Architecture.
   # Vision backbone.
   vision_backbone: resnet18
-  use_pretrained_backbone: true
+  pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
   replace_final_stride_with_dilation: false
   # Transformer layers.
   pre_norm: false
diff --git a/lerobot/configs/policy/diffusion.yaml b/lerobot/configs/policy/diffusion.yaml
index 999d62ea4..f96e21c2d 100644
--- a/lerobot/configs/policy/diffusion.yaml
+++ b/lerobot/configs/policy/diffusion.yaml
@@ -61,7 +61,7 @@ policy:
   vision_backbone: resnet18
   crop_shape: [84, 84]
   crop_is_random: True
-  use_pretrained_backbone: false
+  pretrained_backbone_weights: null
   use_group_norm: True
   spatial_softmax_num_keypoints: 32
   # Unet.
diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py
index c66e7ee9f..0c10b7a53 100644
--- a/lerobot/scripts/eval.py
+++ b/lerobot/scripts/eval.py
@@ -38,7 +38,6 @@ from pathlib import Path
 
 import einops
 import gymnasium as gym
-import imageio
 import numpy as np
 import torch
 from datasets import Dataset, Features, Image, Sequence, Value
@@ -51,13 +50,10 @@ from lerobot.common.envs.factory import make_env
 from lerobot.common.envs.utils import postprocess_action, preprocess_observation
 from lerobot.common.logger import log_output_dir
 from lerobot.common.policies.factory import make_policy
+from lerobot.common.utils.io_utils import write_video
 from lerobot.common.utils.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed
 
 
-def write_video(video_path, stacked_frames, fps):
-    imageio.mimsave(video_path, stacked_frames, fps=fps)
-
-
 def eval_policy(
     env: gym.vector.VectorEnv,
     policy: torch.nn.Module,
diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py
index 0447c84ed..c4c0ea574 100644
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -22,7 +22,7 @@ from lerobot.common.utils.utils import (
 from lerobot.scripts.eval import eval_policy
 
 
-@hydra.main(version_base=None, config_name="default", config_path="../configs")
+@hydra.main(version_base="1.2", config_name="default", config_path="../configs")
 def train_cli(cfg: dict):
     train(
         cfg,
diff --git a/lerobot/scripts/visualize_dataset.py b/lerobot/scripts/visualize_dataset.py
index 3d4d8c536..a5be5e3f9 100644
--- a/lerobot/scripts/visualize_dataset.py
+++ b/lerobot/scripts/visualize_dataset.py
@@ -16,7 +16,7 @@ MAX_NUM_STEPS = 1000
 FIRST_FRAME = 0
 
 
-@hydra.main(version_base=None, config_name="default", config_path="../configs")
+@hydra.main(version_base="1.2", config_name="default", config_path="../configs")
 def visualize_dataset_cli(cfg: dict):
     visualize_dataset(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)
 
diff --git a/poetry.lock b/poetry.lock
index b7cb0758b..79c486411 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -597,13 +597,13 @@ files = [
 
 [[package]]
 name = "diffusers"
-version = "0.26.3"
+version = "0.27.2"
 description = "State-of-the-art diffusion in PyTorch and JAX."
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "diffusers-0.26.3-py3-none-any.whl", hash = "sha256:f8f5710c8f9170e9749f0b104f50fc4a1259f8aff3effed99598409a5ea9b1cd"},
-    {file = "diffusers-0.26.3.tar.gz", hash = "sha256:e217ea39e85b0bd34fee11f8b39fd00116680b05ff7a70c0b4fdab5351ae4f96"},
+    {file = "diffusers-0.27.2-py3-none-any.whl", hash = "sha256:85da5cd1098ab428535d592136973ec0c3f12f78148c94b379cb9f02d2414e75"},
+    {file = "diffusers-0.27.2.tar.gz", hash = "sha256:6cefd7770d7fc1d139614233aa17cdcd639c138d0c3517b8d8bbc8cf573050a0"},
 ]
 
 [package.dependencies]
@@ -617,12 +617,12 @@ requests = "*"
 safetensors = ">=0.3.1"
 
 [package.extras]
-dev = ["GitPython (<3.1.19)", "Jinja2", "accelerate (>=0.11.0)", "compel (==0.1.8)", "datasets", "flax (>=0.4.1)", "hf-doc-builder (>=0.3.0)", "invisible-watermark (>=0.2.0)", "isort (>=5.5.4)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "ruff (==0.1.5)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "torch (>=1.4,<2.2.0)", "torchvision (<0.17)", "transformers (>=4.25.1)", "urllib3 (<=2.0.0)"]
+dev = ["GitPython (<3.1.19)", "Jinja2", "accelerate (>=0.11.0)", "compel (==0.1.8)", "datasets", "flax (>=0.4.1)", "hf-doc-builder (>=0.3.0)", "invisible-watermark (>=0.2.0)", "isort (>=5.5.4)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "ruff (==0.1.5)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "torch (>=1.4)", "torchvision", "transformers (>=4.25.1)", "urllib3 (<=2.0.0)"]
 docs = ["hf-doc-builder (>=0.3.0)"]
 flax = ["flax (>=0.4.1)", "jax (>=0.4.1)", "jaxlib (>=0.4.1)"]
 quality = ["hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<=2.0.0)"]
-test = ["GitPython (<3.1.19)", "Jinja2", "compel (==0.1.8)", "datasets", "invisible-watermark (>=0.2.0)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "torchvision (<0.17)", "transformers (>=4.25.1)"]
-torch = ["accelerate (>=0.11.0)", "torch (>=1.4,<2.2.0)"]
+test = ["GitPython (<3.1.19)", "Jinja2", "compel (==0.1.8)", "datasets", "invisible-watermark (>=0.2.0)", "k-diffusion (>=0.0.12)", "librosa", "parameterized", "pytest", "pytest-timeout", "pytest-xdist", "requests-mock (==1.10.0)", "safetensors (>=0.3.1)", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "torchvision", "transformers (>=4.25.1)"]
+torch = ["accelerate (>=0.11.0)", "torch (>=1.4)"]
 training = ["Jinja2", "accelerate (>=0.11.0)", "datasets", "peft (>=0.6.0)", "protobuf (>=3.20.3,<4)", "tensorboard"]
 
 [[package]]
@@ -779,13 +779,13 @@ files = [
 
 [[package]]
 name = "einops"
-version = "0.7.0"
+version = "0.8.0"
 description = "A new flavour of deep learning operations"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "einops-0.7.0-py3-none-any.whl", hash = "sha256:0f3096f26b914f465f6ff3c66f5478f9a5e380bb367ffc6493a68143fbbf1fd1"},
-    {file = "einops-0.7.0.tar.gz", hash = "sha256:b2b04ad6081a3b227080c9bf5e3ace7160357ff03043cd66cc5b2319eb7031d1"},
+    {file = "einops-0.8.0-py3-none-any.whl", hash = "sha256:9572fb63046264a862693b0a87088af3bdc8c068fde03de63453cbbde245465f"},
+    {file = "einops-0.8.0.tar.gz", hash = "sha256:63486517fed345712a8385c100cb279108d9d47e6ae59099b07657e983deae85"},
 ]
 
 [[package]]
@@ -1121,7 +1121,7 @@ mujoco = "^2.3.7"
 type = "git"
 url = "git@github.com:huggingface/gym-aloha.git"
 reference = "HEAD"
-resolved_reference = "c636f05ba0d1760df94537da84c860be1487e17f"
+resolved_reference = "12c8171d6708ec6d8547c310fe736d8d494195c5"
 
 [[package]]
 name = "gym-pusht"
@@ -1144,7 +1144,7 @@ shapely = "^2.0.3"
 type = "git"
 url = "git@github.com:huggingface/gym-pusht.git"
 reference = "HEAD"
-resolved_reference = "080d4ce4d8d3140b2fd204ed628bda14dc58ff06"
+resolved_reference = "e0684ff988d223808c0a9dcfaba9dc4991791370"
 
 [[package]]
 name = "gym-xarm"
@@ -1164,7 +1164,7 @@ mujoco = "^2.3.7"
 type = "git"
 url = "git@github.com:huggingface/gym-xarm.git"
 reference = "HEAD"
-resolved_reference = "27e65c981f9a8d252eca8f157f83508ba6149db7"
+resolved_reference = "415811fc34863d349ed113eab77e756726c03525"
 
 [[package]]
 name = "gymnasium"
@@ -4299,4 +4299,4 @@ xarm = ["gym-xarm"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "8bd1352973c6104e52f50b68f7387d26ced9b07a52e889540b73d132865cda38"
+content-hash = "0f72eb92ac8817a46f0659b4d72647a6b76f6e4ba762d11b280f8a88e6cd4371"
diff --git a/pyproject.toml b/pyproject.toml
index 3e9845cf4..107232387 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,13 +34,13 @@ wandb = "^0.16.3"
 imageio = {extras = ["ffmpeg"], version = "^2.34.0"}
 gdown = "^5.1.0"
 hydra-core = "^1.3.2"
-einops = "^0.7.0"
+einops = "^0.8.0"
 pymunk = "^6.6.0"
 zarr = "^2.17.0"
 numba = "^0.59.0"
 torch = "^2.2.1"
 opencv-python = "^4.9.0.80"
-diffusers = "^0.26.3"
+diffusers = "^0.27.2"
 torchvision = "^0.18.0"
 h5py = "^3.10.0"
 huggingface-hub = "^0.21.4"