Fix diffusion (rm transpose), Add prefetch

2024-02-28 17:45:01 +00:00
parent cf5063e50e
commit ac90b9c3ee
6 changed files with 52 additions and 11 deletions
--- a/lerobot/common/datasets/factory.py
+++ b/lerobot/common/datasets/factory.py
@@ -1,9 +1,8 @@
 import torch
 from torchrl.data.replay_buffers import PrioritizedSliceSampler
 from lerobot.common.datasets.pusht import PushtExperienceReplay
 from lerobot.common.datasets.simxarm import SimxarmExperienceReplay
 # from rl.torchrl.data.replay_buffers.samplers import PrioritizedSliceSampler
 from torchrl.data.replay_buffers import PrioritizedSliceSampler
 # TODO(rcadene): implement
@@ -27,6 +26,17 @@ from torchrl.data.replay_buffers import PrioritizedSliceSampler
 def make_offline_buffer(cfg, sampler=None):
    if cfg.policy.balanced_sampling:
        assert cfg.online_steps > 0
        batch_size = None
        pin_memory = False
        prefetch = None
    else:
        assert cfg.online_steps == 0
        num_slices = cfg.policy.batch_size
        batch_size = cfg.policy.horizon * num_slices
        pin_memory = cfg.device == "cuda"
        prefetch = cfg.prefetch
    overwrite_sampler = sampler is not None
@@ -52,6 +62,9 @@ def make_offline_buffer(cfg, sampler=None):
            streaming=False,
            root="data",
            sampler=sampler,
            batch_size=batch_size,
            pin_memory=pin_memory,
            prefetch=prefetch,
        )
    elif cfg.env.name == "pusht":
        offline_buffer = PushtExperienceReplay(
@@ -61,6 +74,9 @@ def make_offline_buffer(cfg, sampler=None):
            streaming=False,
            root="data",
            sampler=sampler,
            batch_size=batch_size,
            pin_memory=pin_memory,
            prefetch=prefetch,
        )
    else:
        raise ValueError(cfg.env.name)
--- a/lerobot/common/policies/diffusion.py
+++ b/lerobot/common/policies/diffusion.py
@@ -119,9 +119,9 @@ class DiffusionPolicy(nn.Module):
        assert batch_size % num_slices == 0
        def process_batch(batch, horizon, num_slices):
-            # trajectory t = 256, horizon h = 5
+            # trajectory t = 64, horizon h = 16
-            # (t h) ... -> h t ...
+            # (t h) ... -> t h ...
-            batch = batch.reshape(num_slices, horizon).transpose(1, 0).contiguous()
+            batch = batch.reshape(num_slices, horizon)  # .transpose(1, 0).contiguous()
            out = {
                "obs": {
@@ -132,7 +132,10 @@ class DiffusionPolicy(nn.Module):
            }
            return out
-        batch = replay_buffer.sample(batch_size)
+        if self.cfg.balanced_sampling:
            batch = replay_buffer.sample(batch_size)
        else:
            batch = replay_buffer.sample()
        batch = process_batch(batch, self.cfg.horizon, num_slices)
        loss = self.diffusion.compute_loss(batch)
@@ -149,4 +152,17 @@ class DiffusionPolicy(nn.Module):
            "total_loss": loss.item(),
            "lr": self.lr_scheduler.get_last_lr()[0],
        }
        # TODO(rcadene): remove hardcoding
        # in diffusion_policy, len(dataloader) is 168 for a batch_size of 64
        if step % 168 == 0:
            self.global_step += 1
        return metrics
    def save(self, fp):
        torch.save(self.state_dict(), fp)
    def load(self, fp):
        d = torch.load(fp)
        self.load_state_dict(d)
--- a/lerobot/common/policies/tdmpc.py
+++ b/lerobot/common/policies/tdmpc.py
@@ -359,7 +359,11 @@ class TDMPC(nn.Module):
            weights = batch["_weight"][FIRST_FRAME, :, None]
            return obs, action, next_obses, reward, mask, done, idxs, weights
-        batch = replay_buffer.sample(batch_size)
+        if self.cfg.balanced_sampling:
            batch = replay_buffer.sample(batch_size)
        else:
            batch = replay_buffer.sample()
        obs, action, next_obses, reward, mask, done, idxs, weights = process_batch(
            batch, self.cfg.horizon, num_slices
        )
--- a/lerobot/configs/default.yaml
+++ b/lerobot/configs/default.yaml
@@ -12,6 +12,7 @@ hydra:
 seed: 1337
 device: cuda
 buffer_device: cuda
 prefetch: 4
 eval_freq: ???
 save_freq: ???
 eval_episodes: ???
--- a/lerobot/configs/policy/diffusion.yaml
+++ b/lerobot/configs/policy/diffusion.yaml
@@ -21,7 +21,12 @@ past_action_visible: False
 keypoint_visible_rate: 1.0
 obs_as_global_cond: True
-offline_steps: 50000
+eval_episodes: 50
 eval_freq: 10000
 save_freq: 100000
 log_freq: 250
 offline_steps: 1344000
 online_steps: 0
 policy:
@@ -48,8 +53,7 @@ policy:
  per_alpha: 0.6
  per_beta: 0.4
-  balanced_sampling: true
+  balanced_sampling: false
  utd: 1
  offline_steps: ${offline_steps}
  use_ema: true
--- a/sbatch.sh
+++ b/sbatch.sh
@@ -6,7 +6,7 @@
 #SBATCH --time=2-00:00:00
 #SBATCH --output=/home/rcadene/slurm/%j.out
 #SBATCH --error=/home/rcadene/slurm/%j.err
-#SBATCH --qos=low
+#SBATCH --qos=medium
 #SBATCH --mail-user=re.cadene@gmail.com
 #SBATCH --mail-type=ALL