From 4c8f0020551bc6ba30ac2d7f54906aeed55ab85d Mon Sep 17 00:00:00 2001
From: Adil Zouitine <adilzouitinegm@gmail.com>
Date: Thu, 24 Jul 2025 17:09:12 +0200
Subject: [PATCH] fix(act): disable VAE during offline inference (#1588)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prevent VAE inference when running in offline mode. In the lerobot dataset, the presence of the 'action' field incorrectly triggers the VAE inference block. This leads to a RuntimeError due to mismatched tensor dimensions (3 vs 2) when concatenating cls_embed, robot_state_embed, and action_embed—since action_embed lacks the chunk_size dimension. Additionally, this aligns with the original paper, where variational inference is skipped during inference.
---
 src/lerobot/policies/act/modeling_act.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lerobot/policies/act/modeling_act.py b/src/lerobot/policies/act/modeling_act.py
index 4a048e63..cfd549b2 100644
--- a/src/lerobot/policies/act/modeling_act.py
+++ b/src/lerobot/policies/act/modeling_act.py
@@ -420,7 +420,7 @@ class ACT(nn.Module):
             batch_size = batch["observation.environment_state"].shape[0]
 
         # Prepare the latent for input to the transformer encoder.
-        if self.config.use_vae and "action" in batch:
+        if self.config.use_vae and "action" in batch and self.training:
             # Prepare the input to the VAE encoder: [cls, *joint_space_configuration, *action_sequence].
             cls_embed = einops.repeat(
                 self.vae_encoder_cls_embed.weight, "1 d -> b 1 d", b=batch_size