diff --git a/lerobot/common/robots/so100_follower_end_effector/so100_follower_end_effector.py b/lerobot/common/robots/so100_follower_end_effector/so100_follower_end_effector.py
index 4dad73363..338bff178 100644
--- a/lerobot/common/robots/so100_follower_end_effector/so100_follower_end_effector.py
+++ b/lerobot/common/robots/so100_follower_end_effector/so100_follower_end_effector.py
@@ -18,6 +18,7 @@ import logging
 from typing import Any, Dict
 
 import numpy as np
+import time
 
 from lerobot.common.errors import DeviceNotConnectedError
 from lerobot.common.model.kinematics import RobotKinematics
@@ -26,6 +27,7 @@ from lerobot.common.motors.feetech import FeetechMotorsBus
 
 from ..so100_follower import SO100Follower
 from .config_so100_follower_end_effector import SO100FollowerEndEffectorConfig
+from lerobot.common.cameras import make_cameras_from_configs
 
 logger = logging.getLogger(__name__)
 
@@ -56,6 +58,8 @@ class SO100FollowerEndEffector(SO100Follower):
             calibration=self.calibration,
         )
 
+        self.cameras = make_cameras_from_configs(config.cameras)
+
         self.config = config
 
         # Initialize the kinematics module for the so100 robot
@@ -164,3 +168,24 @@ class SO100FollowerEndEffector(SO100Follower):
         )
         # Send joint space action to parent class
         return super().send_action(joint_action)
+    
+
+    def get_observation(self) -> dict[str, Any]:
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")
+
+        # Read arm position
+        start = time.perf_counter()
+        obs_dict = self.bus.sync_read("Present_Position")
+        obs_dict = {f"{motor}.pos": val for motor, val in obs_dict.items()}
+        dt_ms = (time.perf_counter() - start) * 1e3
+        logger.debug(f"{self} read state: {dt_ms:.1f}ms")
+
+        # Capture images from cameras
+        for cam_key, cam in self.cameras.items():
+            start = time.perf_counter()
+            obs_dict[cam_key] = cam.async_read()
+            dt_ms = (time.perf_counter() - start) * 1e3
+            logger.debug(f"{self} read {cam_key}: {dt_ms:.1f}ms")
+
+        return obs_dict
diff --git a/lerobot/scripts/server/gym_manipulator.py b/lerobot/scripts/server/gym_manipulator.py
index aafed343f..34fa0b674 100644
--- a/lerobot/scripts/server/gym_manipulator.py
+++ b/lerobot/scripts/server/gym_manipulator.py
@@ -220,15 +220,22 @@ class RobotEnv(gym.Env):
         self.current_step = 0
         self.episode_data = None
 
+        self._joint_names = [f"{key}.pos" for key in self.robot.bus.motors.keys()]
+        self._image_keys = self.robot.cameras.keys()
+
         # Read initial joint positions using the bus
-        self.current_joint_positions = self._get_observation()
+        self.current_joint_positions = self._get_observation()["agent_pos"]
+
 
         self._setup_spaces()
 
     def _get_observation(self) -> np.ndarray:
         """Helper to convert a dictionary from bus.sync_read to an ordered numpy array."""
-        joint_positions_dict = self.robot.bus.sync_read("Present_Position")
-        return np.array([joint_positions_dict[name] for name in joint_positions_dict.keys()], dtype=np.float32)
+        obs_dict = self.robot.get_observation()
+        joint_positions = np.array([obs_dict[name] for name in self._joint_names], dtype=np.float32)
+ 
+        images = {key: obs_dict[key] for key in self._image_keys}
+        return {"agent_pos": joint_positions, "pixels": images}
 
     def _setup_spaces(self):
         """
@@ -244,16 +251,20 @@ class RobotEnv(gym.Env):
         """
         example_obs = self._get_observation()
 
+        observation_spaces = {}
+
         # Define observation spaces for images and other states.
-        image_keys = [key for key in example_obs if "image" in key]
-        observation_spaces = {
-            key: gym.spaces.Box(low=0, high=255, shape=example_obs[key].shape, dtype=np.uint8)
-            for key in image_keys
-        }
+        if "pixels" in example_obs:
+            prefix = "observation.images" if len(example_obs["pixels"]) > 1 else "observation.image"
+            observation_spaces = {
+                f"{prefix}.{key}": gym.spaces.Box(low=0, high=255, shape=example_obs["pixels"][key].shape, dtype=np.uint8)
+                for key in example_obs["pixels"]
+            }
+
         observation_spaces["observation.state"] = gym.spaces.Box(
             low=0,
             high=10,
-            shape=example_obs["observation.state"].shape,
+            shape=example_obs["agent_pos"].shape,
             dtype=np.float32,
         )
 
@@ -315,10 +326,10 @@ class RobotEnv(gym.Env):
                 - truncated (bool): True if the episode was truncated (e.g., time constraints).
                 - info (dict): Additional debugging information including intervention status.
         """
-        self.current_joint_positions = self._get_observation()
+        self.current_joint_positions = self._get_observation()["observation.state"]
 
         self.robot.send_action(torch.from_numpy(action))
-        observation = self._get_observation()
+        observation = self._get_observation()["observation.state"]
 
         if self.display_cameras:
             self.render()
@@ -412,11 +423,9 @@ class AddJointVelocityToObservation(gym.ObservationWrapper):
         Returns:
             The modified observation with joint velocities.
         """
-        joint_velocities = (observation["observation.state"] - self.last_joint_positions) / self.dt
-        self.last_joint_positions = observation["observation.state"].clone()
-        observation["observation.state"] = torch.cat(
-            [observation["observation.state"], joint_velocities], dim=-1
-        )
+        joint_velocities = (observation["agent_pos"] - self.last_joint_positions) / self.dt
+        self.last_joint_positions = observation["agent_pos"]
+        observation["agent_pos"] = np.concatenate([observation["agent_pos"], joint_velocities], axis=-1)
         return observation
 
 
@@ -466,12 +475,8 @@ class AddCurrentToObservation(gym.ObservationWrapper):
         Returns:
             The modified observation with current values.
         """
-        present_current_dict = self.unwrapped.robot.bus.sync_read("Present_Current")
-        present_current_observation = torch.tensor([present_current_dict[name] for name in present_current_dict.keys()], dtype=np.float32)
-        
-        observation["observation.state"] = torch.cat(
-            [observation["observation.state"], present_current_observation], dim=-1
-        )
+        present_current_observation = self.unwrapped._get_observation()["agent_pos"]
+        observation["agent_pos"] = np.concatenate([observation["agent_pos"], present_current_observation], axis=-1)
         return observation
 
 
@@ -740,16 +745,8 @@ class ConvertToLeRobotObservation(gym.ObservationWrapper):
         Returns:
             The processed observation with normalized images and proper tensor formats.
         """
-        for key in observation:
-            observation[key] = observation[key].float()
-            if "image" in key:
-                observation[key] = observation[key].permute(2, 0, 1)
-                observation[key] /= 255.0
-        observation = {
-            key: observation[key].to(self.device, non_blocking=self.device.type == "cuda")
-            for key in observation
-        }
-
+        observation = preprocess_observation(observation)
+        observation = {key: observation[key].to(self.device, non_blocking=self.device.type == "cuda") for key in observation}
         return observation
 
 
@@ -1078,7 +1075,7 @@ class EEActionWrapper(gym.ActionWrapper):
             gripper_command = action[-1]
             action = action[:-1]
 
-        current_joint_pos = self.unwrapped._get_observation()
+        current_joint_pos = self.unwrapped._get_observation()["observation.state"]
 
         current_ee_pos = self.fk_function(current_joint_pos)
         desired_ee_pos[:3, 3] = np.clip(
@@ -1141,7 +1138,7 @@ class EEObservationWrapper(gym.ObservationWrapper):
         Returns:
             Enhanced observation with end-effector pose information.
         """
-        current_joint_pos = self.unwrapped._get_observation()
+        current_joint_pos = self.unwrapped._get_observation()["observation.state"]
 
         current_ee_pos = self.fk_function(current_joint_pos)
         observation["observation.state"] = torch.cat(
@@ -1881,9 +1878,7 @@ def make_robot_env(cfg: EnvConfig) -> gym.Env:
             env = AddJointVelocityToObservation(env=env, fps=cfg.fps)
         if cfg.wrapper.add_current_to_observation:
             env = AddCurrentToObservation(env=env)
-        if cfg.wrapper.add_ee_pose_to_observation:
-            if cfg.wrapper.ee_action_space_params is None or cfg.wrapper.ee_action_space_params.bounds is None:
-                raise ValueError("EEActionSpaceConfig with bounds must be provided for EEObservationWrapper.")
+        if False and cfg.wrapper.add_ee_pose_to_observation:
             env = EEObservationWrapper(env=env, ee_pose_limits=cfg.wrapper.ee_action_space_params.bounds)
 
     env = ConvertToLeRobotObservation(env=env, device=cfg.device)
@@ -1917,7 +1912,7 @@ def make_robot_env(cfg: EnvConfig) -> gym.Env:
         # )
 
         # Control mode specific wrappers
-        control_mode = cfg.wrapper.ee_action_space_params.control_mode
+        control_mode = cfg.wrapper.control_mode
         if control_mode == "gamepad":
             if teleop_device is None:
                     raise ValueError("A teleop_device must be instantiated for gamepad control mode.")
diff --git a/lerobot/teleoperate.py b/lerobot/teleoperate.py
index 3b7ae8386..1b92a1a37 100644
--- a/lerobot/teleoperate.py
+++ b/lerobot/teleoperate.py
@@ -91,6 +91,7 @@ def teleop_loop(
                 if isinstance(val, float):
                     rr.log(f"action_{act}", rr.Scalar(val))
 
+        breakpoint()
         robot.send_action(action)
         loop_s = time.perf_counter() - loop_start