Merge remote-tracking branch 'origin/main' into user/aliberts/2025_02_25_refactor_robots

2025-03-10 18:39:48 +01:00
parent 9bd0788131 84565c7c2e
commit d6ccdc222c
135 changed files with 2177 additions and 514 deletions
--- a/lerobot/configs/control.py
+++ b/lerobot/configs/control.py
@@ -1,14 +1,25 @@
-import logging
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass
 from pathlib import Path

 import draccus

 from lerobot.common.robots import RobotConfig
-from lerobot.common.utils.utils import auto_select_torch_device, is_amp_available, is_torch_device_available
 from lerobot.configs import parser
 from lerobot.configs.policies import PreTrainedConfig
-from lerobot.configs.train import TrainPipelineConfig


@dataclass
@@ -43,11 +54,6 @@ class RecordControlConfig(ControlConfig):
    # Root directory where the dataset will be stored (e.g. 'dataset/path').
    root: str | Path | None = None
    policy: PreTrainedConfig | None = None
-    # TODO(rcadene, aliberts): By default, use device and use_amp values from policy checkpoint.
-    device: str | None = None  # cuda | cpu | mps
-    # `use_amp` determines whether to use Automatic Mixed Precision (AMP) for training and evaluation. With AMP,
-    # automatic gradient scaling is used.
-    use_amp: bool | None = None
    # Limit the frames per second. By default, uses the policy fps.
    fps: int | None = None
    # Number of seconds before starting data collection. It allows the robot devices to warmup and synchronize.
@@ -90,27 +96,6 @@ class RecordControlConfig(ControlConfig):
            self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides)
            self.policy.pretrained_path = policy_path

-            # When no device or use_amp are given, use the one from training config.
-            if self.device is None or self.use_amp is None:
-                train_cfg = TrainPipelineConfig.from_pretrained(policy_path)
-                if self.device is None:
-                    self.device = train_cfg.device
-                if self.use_amp is None:
-                    self.use_amp = train_cfg.use_amp
-
-            # Automatically switch to available device if necessary
-            if not is_torch_device_available(self.device):
-                auto_device = auto_select_torch_device()
-                logging.warning(f"Device '{self.device}' is not available. Switching to '{auto_device}'.")
-                self.device = auto_device
-
-            # Automatically deactivate AMP if necessary
-            if self.use_amp and not is_amp_available(self.device):
-                logging.warning(
-                    f"Automatic Mixed Precision (amp) is not available on device '{self.device}'. Deactivating AMP."
-                )
-                self.use_amp = False
-

@ControlConfig.register_subclass("replay")
@dataclass
--- a/lerobot/configs/eval.py
+++ b/lerobot/configs/eval.py
@@ -1,14 +1,26 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import datetime as dt
 import logging
 from dataclasses import dataclass, field
 from pathlib import Path

 from lerobot.common import envs, policies  # noqa: F401
-from lerobot.common.utils.utils import auto_select_torch_device, is_amp_available, is_torch_device_available
 from lerobot.configs import parser
 from lerobot.configs.default import EvalConfig
 from lerobot.configs.policies import PreTrainedConfig
-from lerobot.configs.train import TrainPipelineConfig


@dataclass
@@ -21,11 +33,6 @@ class EvalPipelineConfig:
    policy: PreTrainedConfig | None = None
    output_dir: Path | None = None
    job_name: str | None = None
-    # TODO(rcadene, aliberts): By default, use device and use_amp values from policy checkpoint.
-    device: str | None = None  # cuda | cpu | mps
-    # `use_amp` determines whether to use Automatic Mixed Precision (AMP) for training and evaluation. With AMP,
-    # automatic gradient scaling is used.
-    use_amp: bool = False
    seed: int | None = 1000

    def __post_init__(self):
@@ -36,27 +43,6 @@ class EvalPipelineConfig:
            self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides)
            self.policy.pretrained_path = policy_path

-            # When no device or use_amp are given, use the one from training config.
-            if self.device is None or self.use_amp is None:
-                train_cfg = TrainPipelineConfig.from_pretrained(policy_path)
-                if self.device is None:
-                    self.device = train_cfg.device
-                if self.use_amp is None:
-                    self.use_amp = train_cfg.use_amp
-
-            # Automatically switch to available device if necessary
-            if not is_torch_device_available(self.device):
-                auto_device = auto_select_torch_device()
-                logging.warning(f"Device '{self.device}' is not available. Switching to '{auto_device}'.")
-                self.device = auto_device
-
-            # Automatically deactivate AMP if necessary
-            if self.use_amp and not is_amp_available(self.device):
-                logging.warning(
-                    f"Automatic Mixed Precision (amp) is not available on device '{self.device}'. Deactivating AMP."
-                )
-                self.use_amp = False
-
        else:
            logging.warning(
                "No pretrained path was provided, evaluated policy will be built from scratch (random weights)."
@@ -73,11 +59,6 @@ class EvalPipelineConfig:
            eval_dir = f"{now:%Y-%m-%d}/{now:%H-%M-%S}_{self.job_name}"
            self.output_dir = Path("outputs/eval") / eval_dir

-        if self.device is None:
-            raise ValueError("Set one of the following device: cuda, cpu or mps")
-        elif self.device == "cuda" and self.use_amp is None:
-            raise ValueError("Set 'use_amp' to True or False.")
-
    @classmethod
    def __get_path_fields__(cls) -> list[str]:
        """This enables the parser to load config from the policy using `--policy.path=local/dir`"""
--- a/lerobot/configs/parser.py
+++ b/lerobot/configs/parser.py
@@ -1,4 +1,19 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
 import inspect
+import pkgutil
 import sys
 from argparse import ArgumentError
 from functools import wraps
@@ -10,6 +25,7 @@ import draccus
 from lerobot.common.utils.utils import has_method

 PATH_KEY = "path"
+PLUGIN_DISCOVERY_SUFFIX = "discover_packages_path"
 draccus.set_config_type("json")


@@ -45,6 +61,86 @@ def parse_arg(arg_name: str, args: Sequence[str] | None = None) -> str | None:
    return None


+def parse_plugin_args(plugin_arg_suffix: str, args: Sequence[str]) -> dict:
+    """Parse plugin-related arguments from command-line arguments.
+
+    This function extracts arguments from command-line arguments that match a specified suffix pattern.
+    It processes arguments in the format '--key=value' and returns them as a dictionary.
+
+    Args:
+        plugin_arg_suffix (str): The suffix to identify plugin-related arguments.
+        cli_args (Sequence[str]): A sequence of command-line arguments to parse.
+
+    Returns:
+        dict: A dictionary containing the parsed plugin arguments where:
+            - Keys are the argument names (with '--' prefix removed if present)
+            - Values are the corresponding argument values
+
+    Example:
+        >>> args = ['--env.discover_packages_path=my_package',
+        ...         '--other_arg=value']
+        >>> parse_plugin_args('discover_packages_path', args)
+        {'env.discover_packages_path': 'my_package'}
+    """
+    plugin_args = {}
+    for arg in args:
+        if "=" in arg and plugin_arg_suffix in arg:
+            key, value = arg.split("=", 1)
+            # Remove leading '--' if present
+            if key.startswith("--"):
+                key = key[2:]
+            plugin_args[key] = value
+    return plugin_args
+
+
+class PluginLoadError(Exception):
+    """Raised when a plugin fails to load."""
+
+
+def load_plugin(plugin_path: str) -> None:
+    """Load and initialize a plugin from a given Python package path.
+
+    This function attempts to load a plugin by importing its package and any submodules.
+    Plugin registration is expected to happen during package initialization, i.e. when
+    the package is imported the gym environment should be registered and the config classes
+    registered with their parents using the `register_subclass` decorator.
+
+    Args:
+        plugin_path (str): The Python package path to the plugin (e.g. "mypackage.plugins.myplugin")
+
+    Raises:
+        PluginLoadError: If the plugin cannot be loaded due to import errors or if the package path is invalid.
+
+    Examples:
+        >>> load_plugin("external_plugin.core")       # Loads plugin from external package
+
+    Notes:
+        - The plugin package should handle its own registration during import
+        - All submodules in the plugin package will be imported
+        - Implementation follows the plugin discovery pattern from Python packaging guidelines
+
+    See Also:
+        https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/
+    """
+    try:
+        package_module = importlib.import_module(plugin_path, __package__)
+    except (ImportError, ModuleNotFoundError) as e:
+        raise PluginLoadError(
+            f"Failed to load plugin '{plugin_path}'. Verify the path and installation: {str(e)}"
+        ) from e
+
+    def iter_namespace(ns_pkg):
+        return pkgutil.iter_modules(ns_pkg.__path__, ns_pkg.__name__ + ".")
+
+    try:
+        for _finder, pkg_name, _ispkg in iter_namespace(package_module):
+            importlib.import_module(pkg_name)
+    except ImportError as e:
+        raise PluginLoadError(
+            f"Failed to load plugin '{plugin_path}'. Verify the path and installation: {str(e)}"
+        ) from e
+
+
 def get_path_arg(field_name: str, args: Sequence[str] | None = None) -> str | None:
    return parse_arg(f"{field_name}.{PATH_KEY}", args)

@@ -92,10 +188,13 @@ def filter_path_args(fields_to_filter: str | list[str], args: Sequence[str] | No

 def wrap(config_path: Path | None = None):
    """
-    HACK: Similar to draccus.wrap but does two additional things:
+    HACK: Similar to draccus.wrap but does three additional things:
        - Will remove '.path' arguments from CLI in order to process them later on.
        - If a 'config_path' is passed and the main config class has a 'from_pretrained' method, will
          initialize it from there to allow to fetch configs from the hub directly
+        - Will load plugins specified in the CLI arguments. These plugins will typically register
+            their own subclasses of config classes, so that draccus can find the right class to instantiate
+            from the CLI '.type' arguments
    """

    def wrapper_outer(fn):
@@ -108,6 +207,14 @@ def wrap(config_path: Path | None = None):
                args = args[1:]
            else:
                cli_args = sys.argv[1:]
+                plugin_args = parse_plugin_args(PLUGIN_DISCOVERY_SUFFIX, cli_args)
+                for plugin_cli_arg, plugin_path in plugin_args.items():
+                    try:
+                        load_plugin(plugin_path)
+                    except PluginLoadError as e:
+                        # add the relevant CLI arg to the error message
+                        raise PluginLoadError(f"{e}\nFailed plugin CLI Arg: {plugin_cli_arg}") from e
+                    cli_args = filter_arg(plugin_cli_arg, cli_args)
                config_path_cli = parse_arg("config_path", cli_args)
                if has_method(argtype, "__get_path_fields__"):
                    path_fields = argtype.__get_path_fields__()
--- a/lerobot/configs/policies.py
+++ b/lerobot/configs/policies.py
@@ -1,4 +1,18 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import abc
+import logging
 import os
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -12,6 +26,7 @@ from huggingface_hub.errors import HfHubHTTPError
 from lerobot.common.optim.optimizers import OptimizerConfig
 from lerobot.common.optim.schedulers import LRSchedulerConfig
 from lerobot.common.utils.hub import HubMixin
+from lerobot.common.utils.utils import auto_select_torch_device, is_amp_available, is_torch_device_available
 from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature

 # Generic variable that is either PreTrainedConfig or a subclass thereof
@@ -40,8 +55,24 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
    input_features: dict[str, PolicyFeature] = field(default_factory=dict)
    output_features: dict[str, PolicyFeature] = field(default_factory=dict)

+    device: str | None = None  # cuda | cpu | mp
+    # `use_amp` determines whether to use Automatic Mixed Precision (AMP) for training and evaluation. With AMP,
+    # automatic gradient scaling is used.
+    use_amp: bool = False
+
    def __post_init__(self):
        self.pretrained_path = None
+        if not self.device or not is_torch_device_available(self.device):
+            auto_device = auto_select_torch_device()
+            logging.warning(f"Device '{self.device}' is not available. Switching to '{auto_device}'.")
+            self.device = auto_device.type
+
+        # Automatically deactivate AMP if necessary
+        if self.use_amp and not is_amp_available(self.device):
+            logging.warning(
+                f"Automatic Mixed Precision (amp) is not available on device '{self.device}'. Deactivating AMP."
+            )
+            self.use_amp = False

    @property
    def type(self) -> str:
--- a/lerobot/configs/train.py
+++ b/lerobot/configs/train.py
@@ -1,5 +1,17 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import datetime as dt
-import logging
 import os
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -13,7 +25,6 @@ from lerobot.common import envs
 from lerobot.common.optim import OptimizerConfig
 from lerobot.common.optim.schedulers import LRSchedulerConfig
 from lerobot.common.utils.hub import HubMixin
-from lerobot.common.utils.utils import auto_select_torch_device, is_amp_available
 from lerobot.configs import parser
 from lerobot.configs.default import DatasetConfig, EvalConfig, WandBConfig
 from lerobot.configs.policies import PreTrainedConfig
@@ -35,10 +46,6 @@ class TrainPipelineConfig(HubMixin):
    # Note that when resuming a run, the default behavior is to use the configuration from the checkpoint,
    # regardless of what's provided with the training command at the time of resumption.
    resume: bool = False
-    device: str | None = None  # cuda | cpu | mp
-    # `use_amp` determines whether to use Automatic Mixed Precision (AMP) for training and evaluation. With AMP,
-    # automatic gradient scaling is used.
-    use_amp: bool = False
    # `seed` is used for training (eg: model initialization, dataset shuffling)
    # AND for the evaluation environments.
    seed: int | None = 1000
@@ -61,18 +68,6 @@ class TrainPipelineConfig(HubMixin):
        self.checkpoint_path = None

    def validate(self):
-        if not self.device:
-            logging.warning("No device specified, trying to infer device automatically")
-            device = auto_select_torch_device()
-            self.device = device.type
-
-        # Automatically deactivate AMP if necessary
-        if self.use_amp and not is_amp_available(self.device):
-            logging.warning(
-                f"Automatic Mixed Precision (amp) is not available on device '{self.device}'. Deactivating AMP."
-            )
-            self.use_amp = False
-
        # HACK: We parse again the cli args here to get the pretrained paths if there was some.
        policy_path = parser.get_path_arg("policy")
        if policy_path:
--- a/lerobot/configs/types.py
+++ b/lerobot/configs/types.py
@@ -1,3 +1,16 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 # Note: We subclass str so that serialization is straightforward
 # https://stackoverflow.com/questions/24481852/serialising-an-enum-member-to-json
 from dataclasses import dataclass