Merge remote-tracking branch 'upstream/main' into fix_pusht_diffusion
This commit is contained in:
@@ -84,7 +84,7 @@ class AlohaExperienceReplay(AbstractExperienceReplay):
|
||||
def __init__(
|
||||
self,
|
||||
dataset_id: str,
|
||||
version: str | None = "v1.0",
|
||||
version: str | None = "v1.1",
|
||||
batch_size: int = None,
|
||||
*,
|
||||
shuffle: bool = True,
|
||||
|
||||
@@ -87,7 +87,7 @@ class PushtExperienceReplay(AbstractExperienceReplay):
|
||||
def __init__(
|
||||
self,
|
||||
dataset_id: str,
|
||||
version: str | None = "v1.0",
|
||||
version: str | None = "v1.1",
|
||||
batch_size: int = None,
|
||||
*,
|
||||
shuffle: bool = True,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import abc
|
||||
from collections import deque
|
||||
from typing import Optional
|
||||
|
||||
@@ -44,26 +43,20 @@ class AbstractEnv(EnvBase):
|
||||
raise NotImplementedError()
|
||||
# self._prev_action_queue = deque(maxlen=self.num_prev_action)
|
||||
|
||||
@abc.abstractmethod
|
||||
def render(self, mode="rgb_array", width=640, height=480):
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
@abc.abstractmethod
|
||||
def _reset(self, tensordict: Optional[TensorDict] = None):
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
@abc.abstractmethod
|
||||
def _step(self, tensordict: TensorDict):
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
@abc.abstractmethod
|
||||
def _make_env(self):
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
@abc.abstractmethod
|
||||
def _make_spec(self):
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
@abc.abstractmethod
|
||||
def _set_seed(self, seed: Optional[int]):
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import deque
|
||||
|
||||
import torch
|
||||
from torch import Tensor, nn
|
||||
|
||||
|
||||
class AbstractPolicy(nn.Module, ABC):
|
||||
class AbstractPolicy(nn.Module):
|
||||
"""Base policy which all policies should be derived from.
|
||||
|
||||
The forward method should generally not be overriden as it plays the role of handling multi-step policies. See its
|
||||
@@ -22,9 +21,9 @@ class AbstractPolicy(nn.Module, ABC):
|
||||
self.n_action_steps = n_action_steps
|
||||
self.clear_action_queue()
|
||||
|
||||
@abstractmethod
|
||||
def update(self, replay_buffer, step):
|
||||
"""One step of the policy's learning algorithm."""
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
def save(self, fp):
|
||||
torch.save(self.state_dict(), fp)
|
||||
@@ -33,13 +32,13 @@ class AbstractPolicy(nn.Module, ABC):
|
||||
d = torch.load(fp)
|
||||
self.load_state_dict(d)
|
||||
|
||||
@abstractmethod
|
||||
def select_actions(self, observation) -> Tensor:
|
||||
"""Select an action (or trajectory of actions) based on an observation during rollout.
|
||||
|
||||
If n_action_steps was provided at initialization, this should return a (batch_size, n_action_steps, *) tensor of
|
||||
actions. Otherwise if n_actions_steps is None, this should return a (batch_size, *) tensor of actions.
|
||||
"""
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
def clear_action_queue(self):
|
||||
"""This should be called whenever the environment is reset."""
|
||||
|
||||
@@ -7,6 +7,7 @@ import torchvision.transforms as transforms
|
||||
|
||||
from lerobot.common.policies.abstract import AbstractPolicy
|
||||
from lerobot.common.policies.act.detr_vae import build
|
||||
from lerobot.common.utils import get_safe_torch_device
|
||||
|
||||
|
||||
def build_act_model_and_optimizer(cfg):
|
||||
@@ -45,7 +46,7 @@ class ActionChunkingTransformerPolicy(AbstractPolicy):
|
||||
super().__init__(n_action_steps)
|
||||
self.cfg = cfg
|
||||
self.n_action_steps = n_action_steps
|
||||
self.device = device
|
||||
self.device = get_safe_torch_device(device)
|
||||
self.model, self.optimizer = build_act_model_and_optimizer(cfg)
|
||||
self.kl_weight = self.cfg.kl_weight
|
||||
logging.info(f"KL Weight {self.kl_weight}")
|
||||
|
||||
@@ -9,6 +9,7 @@ from lerobot.common.policies.abstract import AbstractPolicy
|
||||
from lerobot.common.policies.diffusion.diffusion_unet_image_policy import DiffusionUnetImagePolicy
|
||||
from lerobot.common.policies.diffusion.model.lr_scheduler import get_scheduler
|
||||
from lerobot.common.policies.diffusion.model.multi_image_obs_encoder import MultiImageObsEncoder, RgbEncoder
|
||||
from lerobot.common.utils import get_safe_torch_device
|
||||
|
||||
|
||||
class DiffusionPolicy(AbstractPolicy):
|
||||
@@ -66,9 +67,8 @@ class DiffusionPolicy(AbstractPolicy):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self.device = torch.device(cfg_device)
|
||||
if torch.cuda.is_available() and cfg_device == "cuda":
|
||||
self.diffusion.cuda()
|
||||
self.device = get_safe_torch_device(cfg_device)
|
||||
self.diffusion.to(self.device)
|
||||
|
||||
self.ema_diffusion = None
|
||||
self.ema = None
|
||||
|
||||
@@ -10,6 +10,7 @@ import torch.nn as nn
|
||||
|
||||
import lerobot.common.policies.tdmpc.helper as h
|
||||
from lerobot.common.policies.abstract import AbstractPolicy
|
||||
from lerobot.common.utils import get_safe_torch_device
|
||||
|
||||
FIRST_FRAME = 0
|
||||
|
||||
@@ -94,9 +95,10 @@ class TDMPC(AbstractPolicy):
|
||||
self.action_dim = cfg.action_dim
|
||||
|
||||
self.cfg = cfg
|
||||
self.device = torch.device(device)
|
||||
self.device = get_safe_torch_device(device)
|
||||
self.std = h.linear_schedule(cfg.std_schedule, 0)
|
||||
self.model = TOLD(cfg).cuda() if torch.cuda.is_available() and device == "cuda" else TOLD(cfg)
|
||||
self.model = TOLD(cfg)
|
||||
self.model.to(self.device)
|
||||
self.model_target = deepcopy(self.model)
|
||||
self.optim = torch.optim.Adam(self.model.parameters(), lr=self.cfg.lr)
|
||||
self.pi_optim = torch.optim.Adam(self.model._pi.parameters(), lr=self.cfg.lr)
|
||||
|
||||
@@ -6,6 +6,26 @@ import numpy as np
|
||||
import torch
|
||||
|
||||
|
||||
def get_safe_torch_device(cfg_device: str, log: bool = False) -> torch.device:
|
||||
match cfg_device:
|
||||
case "cuda":
|
||||
assert torch.cuda.is_available()
|
||||
device = torch.device("cuda")
|
||||
case "mps":
|
||||
assert torch.backends.mps.is_available()
|
||||
device = torch.device("mps")
|
||||
case "cpu":
|
||||
device = torch.device("cpu")
|
||||
if log:
|
||||
logging.warning("Using CPU, this will be slow.")
|
||||
case _:
|
||||
device = torch.device(cfg_device)
|
||||
if log:
|
||||
logging.warning(f"Using custom {cfg_device} device.")
|
||||
|
||||
return device
|
||||
|
||||
|
||||
def set_seed(seed):
|
||||
"""Set seed for reproducibility."""
|
||||
random.seed(seed)
|
||||
|
||||
@@ -18,7 +18,7 @@ from lerobot.common.envs.factory import make_env
|
||||
from lerobot.common.logger import log_output_dir
|
||||
from lerobot.common.policies.abstract import AbstractPolicy
|
||||
from lerobot.common.policies.factory import make_policy
|
||||
from lerobot.common.utils import init_logging, set_seed
|
||||
from lerobot.common.utils import get_safe_torch_device, init_logging, set_seed
|
||||
|
||||
|
||||
def write_video(video_path, stacked_frames, fps):
|
||||
@@ -35,7 +35,8 @@ def eval_policy(
|
||||
fps: int = 15,
|
||||
return_first_video: bool = False,
|
||||
):
|
||||
policy.eval()
|
||||
if policy is not None:
|
||||
policy.eval()
|
||||
start = time.time()
|
||||
sum_rewards = []
|
||||
max_rewards = []
|
||||
@@ -56,7 +57,8 @@ def eval_policy(
|
||||
with torch.inference_mode():
|
||||
# TODO(alexander-soare): When `break_when_any_done == False` this rolls out for max_steps even when all
|
||||
# envs are done the first time. But we only use the first rollout. This is a waste of compute.
|
||||
policy.clear_action_queue()
|
||||
if policy is not None:
|
||||
policy.clear_action_queue()
|
||||
rollout = env.rollout(
|
||||
max_steps=max_steps,
|
||||
policy=policy,
|
||||
@@ -129,10 +131,8 @@ def eval(cfg: dict, out_dir=None):
|
||||
|
||||
init_logging()
|
||||
|
||||
if cfg.device == "cuda":
|
||||
assert torch.cuda.is_available()
|
||||
else:
|
||||
logging.warning("Using CPU, this will be slow.")
|
||||
# Check device is available
|
||||
get_safe_torch_device(cfg.device, log=True)
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
|
||||
@@ -12,7 +12,7 @@ from lerobot.common.datasets.factory import make_offline_buffer
|
||||
from lerobot.common.envs.factory import make_env
|
||||
from lerobot.common.logger import Logger, log_output_dir
|
||||
from lerobot.common.policies.factory import make_policy
|
||||
from lerobot.common.utils import format_big_number, init_logging, set_seed
|
||||
from lerobot.common.utils import format_big_number, get_safe_torch_device, init_logging, set_seed
|
||||
from lerobot.scripts.eval import eval_policy
|
||||
|
||||
|
||||
@@ -112,13 +112,13 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||
raise NotImplementedError()
|
||||
if job_name is None:
|
||||
raise NotImplementedError()
|
||||
if cfg.online_steps > 0:
|
||||
assert cfg.rollout_batch_size == 1, "rollout_batch_size > 1 not supported for online training steps"
|
||||
|
||||
init_logging()
|
||||
|
||||
if cfg.device == "cuda":
|
||||
assert torch.cuda.is_available()
|
||||
else:
|
||||
logging.warning("Using CPU, this will be slow.")
|
||||
# Check device is available
|
||||
get_safe_torch_device(cfg.device, log=True)
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
@@ -220,11 +220,11 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||
# TODO: add configurable number of rollout? (default=1)
|
||||
with torch.no_grad():
|
||||
rollout = env.rollout(
|
||||
max_steps=cfg.env.episode_length // cfg.n_action_steps,
|
||||
max_steps=cfg.env.episode_length,
|
||||
policy=td_policy,
|
||||
auto_cast_to_device=True,
|
||||
)
|
||||
assert len(rollout) <= cfg.env.episode_length // cfg.n_action_steps
|
||||
assert len(rollout) <= cfg.env.episode_length
|
||||
# set same episode index for all time steps contained in this rollout
|
||||
rollout["episode"] = torch.tensor([env_step] * len(rollout), dtype=torch.int)
|
||||
online_buffer.extend(rollout)
|
||||
|
||||
Reference in New Issue
Block a user