diff --git a/lerobot/common/policies/smolvla/smolvlm_with_expert.py b/lerobot/common/policies/smolvla/smolvlm_with_expert.py
index ce3b5cd0..07eae808 100644
--- a/lerobot/common/policies/smolvla/smolvlm_with_expert.py
+++ b/lerobot/common/policies/smolvla/smolvlm_with_expert.py
@@ -133,9 +133,7 @@ class SmolVLMWithExpertModel(nn.Module):
         self.expert_hidden_size = lm_expert_config.hidden_size
         self.set_requires_grad()
 
-    def get_vlm_model(
-        self,
-    ):
+    def get_vlm_model(self):
         return self.vlm.model
 
     def set_requires_grad(self):
diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py
index 88fe97ea..13beda8b 100644
--- a/lerobot/common/robot_devices/control_utils.py
+++ b/lerobot/common/robot_devices/control_utils.py
@@ -109,8 +109,9 @@ def predict_action(observation, policy, device, use_amp):
     ):
         # Convert to pytorch format: channel first and float32 in [0,1] with batch dimension
         for name in observation:
+            # Skip all observations that are not tensors (e.g. text)
             if not isinstance(observation[name], torch.Tensor):
-                continue  # VLA-like and multirobot policies include textual inputs in the observation
+                continue
 
             if "image" in name:
                 observation[name] = observation[name].type(torch.float32) / 255