lerobot/lerobot/common/robot_devices/robots/realman_dual.py


"""
    Teleoperation Realman with a PS5 controller and LLM interaction
"""
import time
import torch
import numpy as np
import logging
from typing import Optional, Tuple, Dict
from dataclasses import dataclass, field, replace
from collections import deque
import signal
import sys
from lerobot.common.robot_devices.teleop.realman_aloha_dual import HybridController
from lerobot.common.robot_devices.motors.utils import get_motor_names, make_motors_buses_from_configs
from lerobot.common.robot_devices.cameras.utils import make_cameras_from_configs
from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
from lerobot.common.robot_devices.robots.configs import RealmanDualRobotConfig
from lerobot.common.robot_devices.robots.utils import (
    ask_llm,
    extract_json_from_response,
    create_keyboard_listener,
    start_keyboard_listener,
    stop_keyboard_listener,
    speak_async,
    handle_llm_interaction_with_images
)


class RealmanDualRobot:
    """RealmanDual机器人控制类，支持双臂操作和LLM交互"""

    def __init__(self, config: RealmanDualRobotConfig | None = None, **kwargs):
        if config is None:
            config = RealmanDualRobotConfig()

        # 配置初始化
        self.config = replace(config, **kwargs)
        self.robot_type = self.config.type
        self.inference_time = self.config.inference_time

        # 硬件初始化
        self.cameras = make_cameras_from_configs(self.config.cameras)
        self.piper_motors = make_motors_buses_from_configs(self.config.follower_arm)
        self.arm = self.piper_motors['main']

        # 控制系统初始化
        self._initialize_teleop()
        self._initialize_state()
        self._initialize_keyboard_interface()

        # 状态标志
        self._shutdown_flag = False
        self._llm_triggered = False


    def _initialize_teleop(self):
        """初始化遥操作控制器"""
        self.init_info = {
            'init_joint': self.arm.init_joint_position,
            'init_pose': self.arm.init_pose,
            'max_gripper': self.config.max_gripper,
            'min_gripper': self.config.min_gripper,
            'servo_config_file': self.config.servo_config_file,
            'end_control_info': {
                'left': self.config.left_end_control_guid,
                'right': self.config.right_end_control_guid
            }
        }

        if not self.inference_time:
            self.teleop = HybridController(self.init_info)
        else:
            self.teleop = None

    def _initialize_state(self):
        """初始化状态管理"""
        self.joint_queue = deque(maxlen=2)
        self.last_endpose = self.arm.init_pose
        self.logs = {}
        self.is_connected = False

    def _initialize_keyboard_interface(self):
        """初始化键盘交互接口"""
        self.w_pressed = False  # W键触发LLM
        self.q_pressed = False  # Q键退出
        self.keyboard_listener = None
        self._start_keyboard_listener()

    def _start_keyboard_listener(self):
        """启动键盘监听器"""
        def on_key_press(key_char):
            if key_char == 'w':
                self.w_pressed = True
                print("检测到W键按下")
            elif key_char == 'q':
                self.q_pressed = True
                print("检测到Q键按下")
        self.keyboard_listener = create_keyboard_listener(on_key_press)
        success = start_keyboard_listener(self.keyboard_listener)
        if success:
            print("键盘监听器启动成功 (W键:调用LLM, Q键:退出)")
        else:
            print("键盘监听器启动失败")

    def _read_robot_state(self) -> dict:
        """读取机器人状态"""
        before_read_t = time.perf_counter()
        from copy import deepcopy
        state = deepcopy(self.arm.read())
        self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
        return state

    def _execute_action(self, action: dict, state: dict):
        """执行机器人动作"""
        before_write_t = time.perf_counter()
        if action['control_mode'] == 'joint':
            pass
        else:
            if list(action['pose'].values()) != list(state['pose'].values()):
                pose = list(action['pose'].values())
                self.arm.write_endpose_canfd(pose)
            elif list(action['joint'].values()) != list(state['joint'].values()):
                target_joint = list(action['joint'].values())
                self.arm.write(target_joint)
        self.logs["write_pos_dt_s"] = time.perf_counter() - before_write_t

    def _prepare_record_data(self) -> Tuple[Dict, Dict]:
        """准备记录数据 - 保持原有逻辑"""
        if len(self.joint_queue) < 2:
            return {}, {}
        state = torch.as_tensor(list(self.joint_queue[0]), dtype=torch.float32)
        action = torch.as_tensor(list(self.joint_queue[-1]), dtype=torch.float32)
        # 捕获图像
        images = self._capture_images()
        # 构建输出字典
        obs_dict = {
            "observation.state": state,
            **{f"observation.images.{name}": img for name, img in images.items()}
        }
        action_dict = {"action": action}
        return obs_dict, action_dict

    def _update_state_queue(self):
        """更新状态队列"""
        current_state = self.arm.read()['joint']
        current_state_lst = []
        for data in current_state:
            if "joint" in data:
                current_state_lst.append(current_state[data] / 180)
            elif "gripper" in data:
                current_state_lst.append((current_state[data]-500)/500)
        self.joint_queue.append(current_state_lst)

    def _capture_images(self) -> Dict[str, torch.Tensor]:
        """捕获摄像头图像"""
        images = {}
        for name, camera in self.cameras.items():
            before_camread_t = time.perf_counter()
            image = camera.async_read()
            images[name] = torch.from_numpy(image)
            self.logs[f"read_camera_{name}_dt_s"] = camera.logs["delta_timestamp_s"]
            self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
        return images

    # def _handle_llm_interaction(self, obs_dict: Dict) -> bool:
    #     """处理LLM交互逻辑"""
    #     print("[W键已按下] 正在准备数据并调用LLM...")

    #     # 提取图像数据
    #     camera_images = {}
    #     for key, value in obs_dict.items():
    #         if key.startswith("observation.images."):
    #             camera_name = key.replace("observation.images.", "")
    #             camera_images[camera_name] = value.cpu().numpy()

    #     # 使用utils中的函数处理LLM交互
    #     success, response = handle_llm_interaction_with_images(
    #         "将超声仪左下角试管架上的蓝色试管移动到超声仪中",
    #         camera_images
    #     )
    #     return success

    def connect(self) -> None:
        """连接机器人和摄像头"""
        if self.is_connected:
            raise RobotDeviceAlreadyConnectedError(
                "RealmanArm is already connected. Do not run `robot.connect()` twice."
            )
       # 连接机械臂
        self.arm.connect(enable=True)
        print("RealmanArm conneted")

        # 连接摄像头
        for name in self.cameras:
            self.cameras[name].connect()
            self.is_connected = self.is_connected and self.cameras[name].is_connected
            print(f"camera {name} conneted")

        print("All connected")
        self.is_connected = True
        self.run_calibration()

    def disconnect(self) -> None:
        """断开连接"""
        if self._shutdown_flag:
            return
        self._shutdown_flag = True

        try:
            # 停止键盘监听器
            stop_keyboard_listener(self.keyboard_listener)
            print("键盘监听器已停止")

            # 停止遥操作控制器
            if hasattr(self, 'teleop') and self.teleop and not self.inference_time:
                self.teleop.stop()
                print("遥操作控制器已停止")

            # 断开机械臂连接
            if hasattr(self, 'arm'):
                try:
                    self.arm.safe_disconnect()
                    print("RealmanArm 安全断开连接")
                    time.sleep(2)
                    self.arm.connect(enable=False)
                    print("RealmanArm 已禁用")
                except Exception as e:
                    print(f"断开机械臂连接时出错: {e}")

            # 断开摄像头连接
            if len(self.cameras) > 0:
                for name, cam in self.cameras.items():
                    try:
                        cam.disconnect()
                        print(f"Camera {name} 已断开连接")
                    except Exception as e:
                        print(f"断开相机 {name} 时出错: {e}")

            self.is_connected = False
            print("所有设备已断开连接")
        except Exception as e:
            print(f"断开连接时发生错误: {e}")

    def run_calibration(self):
        """运行标定"""
        if not self.is_connected:
            raise ConnectionError()
        self.arm.apply_calibration()
        if not self.inference_time:
            self.teleop.reset()

    def teleop_step(self, record_data=False) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
        """遥操作步骤 - 保持原有数据记录逻辑，添加LLM交互"""
        if not self.is_connected:
            raise ConnectionError()

        if self.teleop is None and self.inference_time:
            self.teleop = HybridController(self.init_info)

        try:
            # 检查退出条件
            if self.q_pressed:
                print("检测到Q键，任务终止...")
                speak_async("任务已终止")
                raise KeyboardInterrupt("用户请求退出")

            # 执行基础遥操作
            state = self._read_robot_state()
            action = self.teleop.get_action(state)
            self._execute_action(action, state)

            # 更新状态队列
            self._update_state_queue()
            time.sleep(0.019)  # 50Hz

            # 处理数据记录
            if record_data:
                data = self._prepare_record_data()

                # 处理LLM交互请求（只有在有有效数据时才处理）
                if data[0] and self.w_pressed:  # 如果有有效数据且W键被按下
                    self.w_pressed = False
                    camera_images = {
                        name.replace("observation.images.", ""): img.cpu().numpy()
                        for name, img in data[0].items()
                        if name.startswith("observation.images.")
                    }
                    success, resp = handle_llm_interaction_with_images(
                        "将超声仪左下角试管架上的蓝色试管移动到超声仪中",
                        camera_images
    )
                    # self.w_pressed = False  # 重置标志位
                    # self._llm_triggered = True
                    # success = self._handle_llm_interaction(data[0])
                    if not success:
                        print("LLM交互处理失败")

                # 如果没有有效数据，创建默认数据
                if not data[0]:
                    # 创建默认的观测数据
                    if len(self.joint_queue) > 0:
                        state_tensor = torch.as_tensor(list(self.joint_queue[-1]), dtype=torch.float32)
                    else:
                        state_tensor = torch.zeros(14, dtype=torch.float32)  # 根据你的机器人调整维度

                    # 捕获当前图像
                    images = self._capture_images()
                    obs_dict = {
                        "observation.state": state_tensor,
                        **{f"observation.images.{name}": img for name, img in images.items()}
                    }
                    action_dict = {"action": state_tensor}  # 使用相同的状态作为动作
                    data = (obs_dict, action_dict)
                return data
            return None

        except KeyboardInterrupt:
            # 重新抛出键盘中断，让上层处理
            raise
        except Exception as e:
            logging.error(f"遥操作步骤失败: {e}")

            # 即使出错，在record_data=True时也要返回有效数据
            if record_data:
                # 创建紧急默认数据
                state_tensor = torch.zeros(14, dtype=torch.float32)  # 根据你的机器人调整维度
                images = {}
                try:
                    images = self._capture_images()
                except:
                    # 如果连图像都无法捕获，创建空图像
                    for camera_name in self.cameras.keys():
                        images[camera_name] = torch.zeros((480, 640, 3), dtype=torch.uint8)
                obs_dict = {
                    "observation.state": state_tensor,
                    **{f"observation.images.{name}": img for name, img in images.items()}
                }
                action_dict = {"action": state_tensor}
                return obs_dict, action_dict
            return None

    def send_action(self, action: torch.Tensor) -> torch.Tensor:
        """发送动作到机器人"""
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
                "Piper is not connected. You need to run `robot.connect()`."
            )

        target_joints = action.tolist()
        len_joint = len(target_joints) - 1
        target_joints = [target_joints[i]*180 for i in range(len_joint)] + [target_joints[-1]]
        target_joints[-1] = int(target_joints[-1]*500 + 500)
        self.arm.write(target_joints)
        return action

    def capture_observation(self) -> dict:
        """捕获当前观测"""
        if not self.is_connected:
            raise RobotDeviceNotConnectedError(
                "Piper is not connected. You need to run `robot.connect()`."
            )
        before_read_t = time.perf_counter()
        state = self.arm.read()
        self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t

        state = torch.as_tensor(list(state.values()), dtype=torch.float32)
        # 读取图像
        images = {}
        for name in self.cameras:
            before_camread_t = time.perf_counter()
            images[name] = self.cameras[name].async_read()
            images[name] = torch.from_numpy(images[name])
            self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
            self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
        # 构建观测字典
        obs_dict = {}
        obs_dict["observation.state"] = state
        for name in self.cameras:
            obs_dict[f"observation.images.{name}"] = images[name]
        return obs_dict

    def teleop_safety_stop(self):
        """遥操作安全停止"""
        self.run_calibration()
    @property
    def camera_features(self) -> dict:
        """获取摄像头特征"""
        cam_ft = {}
        for cam_key, cam in self.cameras.items():
            key = f"observation.images.{cam_key}"
            cam_ft[key] = {
                "shape": (cam.height, cam.width, cam.channels),
                "names": ["height", "width", "channels"],
                "info": None,
            }
        return cam_ft

    @property
    def motor_features(self) -> dict:
        """获取电机特征"""
        action_names = get_motor_names(self.piper_motors)
        state_names = get_motor_names(self.piper_motors)
        return {
            "action": {
                "dtype": "float32",
                "shape": (len(action_names),),
                "names": action_names,
            },
            "observation.state": {
                "dtype": "float32",
                "shape": (len(state_names),),
                "names": state_names,
            },
        }

    @property
    def has_camera(self):
        return len(self.cameras) > 0

    @property
    def num_cameras(self):
        return len(self.cameras)

    def __del__(self):
        """析构函数"""
        try:
            if not self._shutdown_flag:
                self.disconnect()
        except:
            pass

def signal_handler(signum, frame):
    """信号处理器"""
    print("\n收到中断信号，正在安全退出...")
    sys.exit(0)

if __name__ == '__main__':
    signal.signal(signal.SIGINT, signal_handler)
    robot = None

    try:
        robot = RealmanDualRobot()
        robot.connect()

        print("RealmanDual 机器人控制已启动")
        print("操作说明：")
        print("   - 使用手柄进行遥操作控制")
        print("   - 按 W 键：调用LLM分析当前场景")
        print("   - 按 Q 键：安全退出程序")
        print("   - Ctrl+C：强制退出")
        print("\n等待操作...")

        while True:
            result = robot.teleop_step(record_data=True)
            if result is None:
                continue
            # 这里可以处理记录的数据
    except KeyboardInterrupt:
        print("\n收到键盘中断信号")
    except Exception as e:
        print(f"程序运行出错: {e}")
    finally:
        if robot:
            try:
                robot.disconnect()
            except:
                pass
        print("程序已完全退出")