Remove offline training, refactor train.py and logging/checkpointing (#670)

Co-authored-by: Remi <remi.cadene@huggingface.co>
2025-02-11 10:36:06 +01:00
parent 334deb985d
commit 90e099b39f
40 changed files with 1515 additions and 935 deletions
--- a/lerobot/common/optim/factory.py
+++ b/lerobot/common/optim/factory.py
@@ -14,15 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from pathlib import Path

-import torch
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LRScheduler

-from lerobot.common.logger import TRAINING_STATE
 from lerobot.common.policies.pretrained import PreTrainedPolicy
-from lerobot.common.utils.utils import get_global_random_state, set_global_random_state
 from lerobot.configs.train import TrainPipelineConfig


@@ -40,22 +36,5 @@ def make_optimizer_and_scheduler(
    """
    params = policy.get_optim_params() if cfg.use_policy_training_preset else policy.parameters()
    optimizer = cfg.optimizer.build(params)
-    lr_scheduler = cfg.scheduler.build(optimizer, cfg.offline.steps) if cfg.scheduler is not None else None
+    lr_scheduler = cfg.scheduler.build(optimizer, cfg.steps) if cfg.scheduler is not None else None
    return optimizer, lr_scheduler
-
-
-def load_training_state(checkpoint_dir: Path, optimizer: Optimizer, scheduler: LRScheduler | None) -> int:
-    """
-    Given the checkpoint directory, load the optimizer state, scheduler state, and random state, and
-    return the global training step.
-    """
-    # TODO(aliberts): use safetensors instead as weights_only=False is unsafe
-    training_state = torch.load(checkpoint_dir / TRAINING_STATE, weights_only=False)
-    optimizer.load_state_dict(training_state["optimizer"])
-    if scheduler is not None:
-        scheduler.load_state_dict(training_state["scheduler"])
-    elif "scheduler" in training_state:
-        raise ValueError("The checkpoint contains a scheduler state_dict, but no LRScheduler was provided.")
-    # Small HACK to get the expected keys: use `get_global_random_state`.
-    set_global_random_state({k: training_state[k] for k in get_global_random_state()})
-    return training_state["step"], optimizer, scheduler
--- a/lerobot/common/optim/optimizers.py
+++ b/lerobot/common/optim/optimizers.py
@@ -1,8 +1,32 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import abc
 from dataclasses import asdict, dataclass
+from pathlib import Path

 import draccus
 import torch
+from safetensors.torch import load_file, save_file
+
+from lerobot.common.constants import (
+    OPTIMIZER_PARAM_GROUPS,
+    OPTIMIZER_STATE,
+)
+from lerobot.common.datasets.utils import flatten_dict, unflatten_dict, write_json
+from lerobot.common.utils.io_utils import deserialize_json_into_object


@dataclass
@@ -68,3 +92,27 @@ class SGDConfig(OptimizerConfig):
        kwargs = asdict(self)
        kwargs.pop("grad_clip_norm")
        return torch.optim.SGD(params, **kwargs)
+
+
+def save_optimizer_state(optimizer: torch.optim.Optimizer, save_dir: Path) -> None:
+    state = optimizer.state_dict()
+    param_groups = state.pop("param_groups")
+    flat_state = flatten_dict(state)
+    save_file(flat_state, save_dir / OPTIMIZER_STATE)
+    write_json(param_groups, save_dir / OPTIMIZER_PARAM_GROUPS)
+
+
+def load_optimizer_state(optimizer: torch.optim.Optimizer, save_dir: Path) -> torch.optim.Optimizer:
+    current_state_dict = optimizer.state_dict()
+    flat_state = load_file(save_dir / OPTIMIZER_STATE)
+    state = unflatten_dict(flat_state)
+    loaded_state_dict = {"state": {int(k): v for k, v in state["state"].items()}}
+
+    if "param_groups" in current_state_dict:
+        param_groups = deserialize_json_into_object(
+            save_dir / OPTIMIZER_PARAM_GROUPS, current_state_dict["param_groups"]
+        )
+        loaded_state_dict["param_groups"] = param_groups
+
+    optimizer.load_state_dict(loaded_state_dict)
+    return optimizer
--- a/lerobot/common/optim/schedulers.py
+++ b/lerobot/common/optim/schedulers.py
@@ -1,11 +1,31 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import abc
 import math
 from dataclasses import asdict, dataclass
+from pathlib import Path

 import draccus
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LambdaLR, LRScheduler

+from lerobot.common.constants import SCHEDULER_STATE
+from lerobot.common.datasets.utils import write_json
+from lerobot.common.utils.io_utils import deserialize_json_into_object
+

@dataclass
 class LRSchedulerConfig(draccus.ChoiceRegistry, abc.ABC):
@@ -89,3 +109,14 @@ class CosineDecayWithWarmupSchedulerConfig(LRSchedulerConfig):
            return cosine_decay_schedule(current_step)

        return LambdaLR(optimizer, lr_lambda, -1)
+
+
+def save_scheduler_state(scheduler: LRScheduler, save_dir: Path) -> None:
+    state_dict = scheduler.state_dict()
+    write_json(state_dict, save_dir / SCHEDULER_STATE)
+
+
+def load_scheduler_state(scheduler: LRScheduler, save_dir: Path) -> LRScheduler:
+    state_dict = deserialize_json_into_object(save_dir / SCHEDULER_STATE, scheduler.state_dict())
+    scheduler.load_state_dict(state_dict)
+    return scheduler