Remove offline training, refactor train.py and logging/checkpointing (#670)

Co-authored-by: Remi <remi.cadene@huggingface.co>
This commit is contained in:
Simon Alibert
2025-02-11 10:36:06 +01:00
committed by GitHub
parent 334deb985d
commit 90e099b39f
40 changed files with 1515 additions and 935 deletions

81
tests/test_schedulers.py Normal file
View File

@@ -0,0 +1,81 @@
from torch.optim.lr_scheduler import LambdaLR
from lerobot.common.constants import SCHEDULER_STATE
from lerobot.common.optim.schedulers import (
CosineDecayWithWarmupSchedulerConfig,
DiffuserSchedulerConfig,
VQBeTSchedulerConfig,
load_scheduler_state,
save_scheduler_state,
)
def test_diffuser_scheduler(optimizer):
config = DiffuserSchedulerConfig(name="cosine", num_warmup_steps=5)
scheduler = config.build(optimizer, num_training_steps=100)
assert isinstance(scheduler, LambdaLR)
optimizer.step() # so that we don't get torch warning
scheduler.step()
expected_state_dict = {
"_get_lr_called_within_step": False,
"_last_lr": [0.0002],
"_step_count": 2,
"base_lrs": [0.001],
"last_epoch": 1,
"lr_lambdas": [None],
"verbose": False,
}
assert scheduler.state_dict() == expected_state_dict
def test_vqbet_scheduler(optimizer):
config = VQBeTSchedulerConfig(num_warmup_steps=10, num_vqvae_training_steps=20, num_cycles=0.5)
scheduler = config.build(optimizer, num_training_steps=100)
assert isinstance(scheduler, LambdaLR)
optimizer.step()
scheduler.step()
expected_state_dict = {
"_get_lr_called_within_step": False,
"_last_lr": [0.001],
"_step_count": 2,
"base_lrs": [0.001],
"last_epoch": 1,
"lr_lambdas": [None],
"verbose": False,
}
assert scheduler.state_dict() == expected_state_dict
def test_cosine_decay_with_warmup_scheduler(optimizer):
config = CosineDecayWithWarmupSchedulerConfig(
num_warmup_steps=10, num_decay_steps=90, peak_lr=0.01, decay_lr=0.001
)
scheduler = config.build(optimizer, num_training_steps=100)
assert isinstance(scheduler, LambdaLR)
optimizer.step()
scheduler.step()
expected_state_dict = {
"_get_lr_called_within_step": False,
"_last_lr": [0.0001818181818181819],
"_step_count": 2,
"base_lrs": [0.001],
"last_epoch": 1,
"lr_lambdas": [None],
"verbose": False,
}
assert scheduler.state_dict() == expected_state_dict
def test_save_scheduler_state(scheduler, tmp_path):
save_scheduler_state(scheduler, tmp_path)
assert (tmp_path / SCHEDULER_STATE).is_file()
def test_save_load_scheduler_state(scheduler, tmp_path):
save_scheduler_state(scheduler, tmp_path)
loaded_scheduler = load_scheduler_state(scheduler, tmp_path)
assert scheduler.state_dict() == loaded_scheduler.state_dict()