From 4c8f0020551bc6ba30ac2d7f54906aeed55ab85d Mon Sep 17 00:00:00 2001 From: Adil Zouitine Date: Thu, 24 Jul 2025 17:09:12 +0200 Subject: [PATCH] fix(act): disable VAE during offline inference (#1588) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent VAE inference when running in offline mode. In the lerobot dataset, the presence of the 'action' field incorrectly triggers the VAE inference block. This leads to a RuntimeError due to mismatched tensor dimensions (3 vs 2) when concatenating cls_embed, robot_state_embed, and action_embed—since action_embed lacks the chunk_size dimension. Additionally, this aligns with the original paper, where variational inference is skipped during inference. --- src/lerobot/policies/act/modeling_act.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lerobot/policies/act/modeling_act.py b/src/lerobot/policies/act/modeling_act.py index 4a048e63..cfd549b2 100644 --- a/src/lerobot/policies/act/modeling_act.py +++ b/src/lerobot/policies/act/modeling_act.py @@ -420,7 +420,7 @@ class ACT(nn.Module): batch_size = batch["observation.environment_state"].shape[0] # Prepare the latent for input to the transformer encoder. - if self.config.use_vae and "action" in batch: + if self.config.use_vae and "action" in batch and self.training: # Prepare the input to the VAE encoder: [cls, *joint_space_configuration, *action_sequence]. cls_embed = einops.repeat( self.vae_encoder_cls_embed.weight, "1 d -> b 1 d", b=batch_size