forked from tangger/lerobot
470 lines
18 KiB
Python
470 lines
18 KiB
Python
|
||
"""
|
||
Teleoperation Realman with a PS5 controller and LLM interaction
|
||
"""
|
||
import time
|
||
import torch
|
||
import numpy as np
|
||
import logging
|
||
from typing import Optional, Tuple, Dict
|
||
from dataclasses import dataclass, field, replace
|
||
from collections import deque
|
||
import signal
|
||
import sys
|
||
from lerobot.common.robot_devices.teleop.realman_aloha_dual import HybridController
|
||
from lerobot.common.robot_devices.motors.utils import get_motor_names, make_motors_buses_from_configs
|
||
from lerobot.common.robot_devices.cameras.utils import make_cameras_from_configs
|
||
from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError
|
||
from lerobot.common.robot_devices.robots.configs import RealmanDualRobotConfig
|
||
from lerobot.common.robot_devices.robots.utils import (
|
||
ask_llm,
|
||
extract_json_from_response,
|
||
create_keyboard_listener,
|
||
start_keyboard_listener,
|
||
stop_keyboard_listener,
|
||
speak_async,
|
||
handle_llm_interaction_with_images
|
||
)
|
||
|
||
|
||
class RealmanDualRobot:
|
||
"""RealmanDual机器人控制类,支持双臂操作和LLM交互"""
|
||
|
||
def __init__(self, config: RealmanDualRobotConfig | None = None, **kwargs):
|
||
if config is None:
|
||
config = RealmanDualRobotConfig()
|
||
|
||
# 配置初始化
|
||
self.config = replace(config, **kwargs)
|
||
self.robot_type = self.config.type
|
||
self.inference_time = self.config.inference_time
|
||
|
||
# 硬件初始化
|
||
self.cameras = make_cameras_from_configs(self.config.cameras)
|
||
self.piper_motors = make_motors_buses_from_configs(self.config.follower_arm)
|
||
self.arm = self.piper_motors['main']
|
||
|
||
# 控制系统初始化
|
||
self._initialize_teleop()
|
||
self._initialize_state()
|
||
self._initialize_keyboard_interface()
|
||
|
||
# 状态标志
|
||
self._shutdown_flag = False
|
||
self._llm_triggered = False
|
||
|
||
|
||
def _initialize_teleop(self):
|
||
"""初始化遥操作控制器"""
|
||
self.init_info = {
|
||
'init_joint': self.arm.init_joint_position,
|
||
'init_pose': self.arm.init_pose,
|
||
'max_gripper': self.config.max_gripper,
|
||
'min_gripper': self.config.min_gripper,
|
||
'servo_config_file': self.config.servo_config_file,
|
||
'end_control_info': {
|
||
'left': self.config.left_end_control_guid,
|
||
'right': self.config.right_end_control_guid
|
||
}
|
||
}
|
||
|
||
if not self.inference_time:
|
||
self.teleop = HybridController(self.init_info)
|
||
else:
|
||
self.teleop = None
|
||
|
||
def _initialize_state(self):
|
||
"""初始化状态管理"""
|
||
self.joint_queue = deque(maxlen=2)
|
||
self.last_endpose = self.arm.init_pose
|
||
self.logs = {}
|
||
self.is_connected = False
|
||
|
||
def _initialize_keyboard_interface(self):
|
||
"""初始化键盘交互接口"""
|
||
self.w_pressed = False # W键触发LLM
|
||
self.q_pressed = False # Q键退出
|
||
self.keyboard_listener = None
|
||
self._start_keyboard_listener()
|
||
|
||
def _start_keyboard_listener(self):
|
||
"""启动键盘监听器"""
|
||
def on_key_press(key_char):
|
||
if key_char == 'w':
|
||
self.w_pressed = True
|
||
print("检测到W键按下")
|
||
elif key_char == 'q':
|
||
self.q_pressed = True
|
||
print("检测到Q键按下")
|
||
self.keyboard_listener = create_keyboard_listener(on_key_press)
|
||
success = start_keyboard_listener(self.keyboard_listener)
|
||
if success:
|
||
print("键盘监听器启动成功 (W键:调用LLM, Q键:退出)")
|
||
else:
|
||
print("键盘监听器启动失败")
|
||
|
||
def _read_robot_state(self) -> dict:
|
||
"""读取机器人状态"""
|
||
before_read_t = time.perf_counter()
|
||
from copy import deepcopy
|
||
state = deepcopy(self.arm.read())
|
||
self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
|
||
return state
|
||
|
||
def _execute_action(self, action: dict, state: dict):
|
||
"""执行机器人动作"""
|
||
before_write_t = time.perf_counter()
|
||
if action['control_mode'] == 'joint':
|
||
pass
|
||
else:
|
||
if list(action['pose'].values()) != list(state['pose'].values()):
|
||
pose = list(action['pose'].values())
|
||
self.arm.write_endpose_canfd(pose)
|
||
elif list(action['joint'].values()) != list(state['joint'].values()):
|
||
target_joint = list(action['joint'].values())
|
||
self.arm.write(target_joint)
|
||
self.logs["write_pos_dt_s"] = time.perf_counter() - before_write_t
|
||
|
||
def _prepare_record_data(self) -> Tuple[Dict, Dict]:
|
||
"""准备记录数据 - 保持原有逻辑"""
|
||
if len(self.joint_queue) < 2:
|
||
return {}, {}
|
||
state = torch.as_tensor(list(self.joint_queue[0]), dtype=torch.float32)
|
||
action = torch.as_tensor(list(self.joint_queue[-1]), dtype=torch.float32)
|
||
# 捕获图像
|
||
images = self._capture_images()
|
||
# 构建输出字典
|
||
obs_dict = {
|
||
"observation.state": state,
|
||
**{f"observation.images.{name}": img for name, img in images.items()}
|
||
}
|
||
action_dict = {"action": action}
|
||
return obs_dict, action_dict
|
||
|
||
def _update_state_queue(self):
|
||
"""更新状态队列"""
|
||
current_state = self.arm.read()['joint']
|
||
current_state_lst = []
|
||
for data in current_state:
|
||
if "joint" in data:
|
||
current_state_lst.append(current_state[data] / 180)
|
||
elif "gripper" in data:
|
||
current_state_lst.append((current_state[data]-500)/500)
|
||
self.joint_queue.append(current_state_lst)
|
||
|
||
def _capture_images(self) -> Dict[str, torch.Tensor]:
|
||
"""捕获摄像头图像"""
|
||
images = {}
|
||
for name, camera in self.cameras.items():
|
||
before_camread_t = time.perf_counter()
|
||
image = camera.async_read()
|
||
images[name] = torch.from_numpy(image)
|
||
self.logs[f"read_camera_{name}_dt_s"] = camera.logs["delta_timestamp_s"]
|
||
self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
|
||
return images
|
||
|
||
# def _handle_llm_interaction(self, obs_dict: Dict) -> bool:
|
||
# """处理LLM交互逻辑"""
|
||
# print("[W键已按下] 正在准备数据并调用LLM...")
|
||
|
||
# # 提取图像数据
|
||
# camera_images = {}
|
||
# for key, value in obs_dict.items():
|
||
# if key.startswith("observation.images."):
|
||
# camera_name = key.replace("observation.images.", "")
|
||
# camera_images[camera_name] = value.cpu().numpy()
|
||
|
||
# # 使用utils中的函数处理LLM交互
|
||
# success, response = handle_llm_interaction_with_images(
|
||
# "将超声仪左下角试管架上的蓝色试管移动到超声仪中",
|
||
# camera_images
|
||
# )
|
||
# return success
|
||
|
||
def connect(self) -> None:
|
||
"""连接机器人和摄像头"""
|
||
if self.is_connected:
|
||
raise RobotDeviceAlreadyConnectedError(
|
||
"RealmanArm is already connected. Do not run `robot.connect()` twice."
|
||
)
|
||
# 连接机械臂
|
||
self.arm.connect(enable=True)
|
||
print("RealmanArm conneted")
|
||
|
||
# 连接摄像头
|
||
for name in self.cameras:
|
||
self.cameras[name].connect()
|
||
self.is_connected = self.is_connected and self.cameras[name].is_connected
|
||
print(f"camera {name} conneted")
|
||
|
||
print("All connected")
|
||
self.is_connected = True
|
||
self.run_calibration()
|
||
|
||
def disconnect(self) -> None:
|
||
"""断开连接"""
|
||
if self._shutdown_flag:
|
||
return
|
||
self._shutdown_flag = True
|
||
|
||
try:
|
||
# 停止键盘监听器
|
||
stop_keyboard_listener(self.keyboard_listener)
|
||
print("键盘监听器已停止")
|
||
|
||
# 停止遥操作控制器
|
||
if hasattr(self, 'teleop') and self.teleop and not self.inference_time:
|
||
self.teleop.stop()
|
||
print("遥操作控制器已停止")
|
||
|
||
# 断开机械臂连接
|
||
if hasattr(self, 'arm'):
|
||
try:
|
||
self.arm.safe_disconnect()
|
||
print("RealmanArm 安全断开连接")
|
||
time.sleep(2)
|
||
self.arm.connect(enable=False)
|
||
print("RealmanArm 已禁用")
|
||
except Exception as e:
|
||
print(f"断开机械臂连接时出错: {e}")
|
||
|
||
# 断开摄像头连接
|
||
if len(self.cameras) > 0:
|
||
for name, cam in self.cameras.items():
|
||
try:
|
||
cam.disconnect()
|
||
print(f"Camera {name} 已断开连接")
|
||
except Exception as e:
|
||
print(f"断开相机 {name} 时出错: {e}")
|
||
|
||
self.is_connected = False
|
||
print("所有设备已断开连接")
|
||
except Exception as e:
|
||
print(f"断开连接时发生错误: {e}")
|
||
|
||
def run_calibration(self):
|
||
"""运行标定"""
|
||
if not self.is_connected:
|
||
raise ConnectionError()
|
||
self.arm.apply_calibration()
|
||
if not self.inference_time:
|
||
self.teleop.reset()
|
||
|
||
def teleop_step(self, record_data=False) -> None | tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
|
||
"""遥操作步骤 - 保持原有数据记录逻辑,添加LLM交互"""
|
||
if not self.is_connected:
|
||
raise ConnectionError()
|
||
|
||
if self.teleop is None and self.inference_time:
|
||
self.teleop = HybridController(self.init_info)
|
||
|
||
try:
|
||
# 检查退出条件
|
||
if self.q_pressed:
|
||
print("检测到Q键,任务终止...")
|
||
speak_async("任务已终止")
|
||
raise KeyboardInterrupt("用户请求退出")
|
||
|
||
# 执行基础遥操作
|
||
state = self._read_robot_state()
|
||
action = self.teleop.get_action(state)
|
||
self._execute_action(action, state)
|
||
|
||
# 更新状态队列
|
||
self._update_state_queue()
|
||
time.sleep(0.019) # 50Hz
|
||
|
||
# 处理数据记录
|
||
if record_data:
|
||
data = self._prepare_record_data()
|
||
|
||
# 处理LLM交互请求(只有在有有效数据时才处理)
|
||
if data[0] and self.w_pressed: # 如果有有效数据且W键被按下
|
||
self.w_pressed = False
|
||
camera_images = {
|
||
name.replace("observation.images.", ""): img.cpu().numpy()
|
||
for name, img in data[0].items()
|
||
if name.startswith("observation.images.")
|
||
}
|
||
success, resp = handle_llm_interaction_with_images(
|
||
"将超声仪左下角试管架上的蓝色试管移动到超声仪中",
|
||
camera_images
|
||
)
|
||
# self.w_pressed = False # 重置标志位
|
||
# self._llm_triggered = True
|
||
# success = self._handle_llm_interaction(data[0])
|
||
if not success:
|
||
print("LLM交互处理失败")
|
||
|
||
# 如果没有有效数据,创建默认数据
|
||
if not data[0]:
|
||
# 创建默认的观测数据
|
||
if len(self.joint_queue) > 0:
|
||
state_tensor = torch.as_tensor(list(self.joint_queue[-1]), dtype=torch.float32)
|
||
else:
|
||
state_tensor = torch.zeros(14, dtype=torch.float32) # 根据你的机器人调整维度
|
||
|
||
# 捕获当前图像
|
||
images = self._capture_images()
|
||
obs_dict = {
|
||
"observation.state": state_tensor,
|
||
**{f"observation.images.{name}": img for name, img in images.items()}
|
||
}
|
||
action_dict = {"action": state_tensor} # 使用相同的状态作为动作
|
||
data = (obs_dict, action_dict)
|
||
return data
|
||
return None
|
||
|
||
except KeyboardInterrupt:
|
||
# 重新抛出键盘中断,让上层处理
|
||
raise
|
||
except Exception as e:
|
||
logging.error(f"遥操作步骤失败: {e}")
|
||
|
||
# 即使出错,在record_data=True时也要返回有效数据
|
||
if record_data:
|
||
# 创建紧急默认数据
|
||
state_tensor = torch.zeros(14, dtype=torch.float32) # 根据你的机器人调整维度
|
||
images = {}
|
||
try:
|
||
images = self._capture_images()
|
||
except:
|
||
# 如果连图像都无法捕获,创建空图像
|
||
for camera_name in self.cameras.keys():
|
||
images[camera_name] = torch.zeros((480, 640, 3), dtype=torch.uint8)
|
||
obs_dict = {
|
||
"observation.state": state_tensor,
|
||
**{f"observation.images.{name}": img for name, img in images.items()}
|
||
}
|
||
action_dict = {"action": state_tensor}
|
||
return obs_dict, action_dict
|
||
return None
|
||
|
||
def send_action(self, action: torch.Tensor) -> torch.Tensor:
|
||
"""发送动作到机器人"""
|
||
if not self.is_connected:
|
||
raise RobotDeviceNotConnectedError(
|
||
"Piper is not connected. You need to run `robot.connect()`."
|
||
)
|
||
|
||
target_joints = action.tolist()
|
||
len_joint = len(target_joints) - 1
|
||
target_joints = [target_joints[i]*180 for i in range(len_joint)] + [target_joints[-1]]
|
||
target_joints[-1] = int(target_joints[-1]*500 + 500)
|
||
self.arm.write(target_joints)
|
||
return action
|
||
|
||
def capture_observation(self) -> dict:
|
||
"""捕获当前观测"""
|
||
if not self.is_connected:
|
||
raise RobotDeviceNotConnectedError(
|
||
"Piper is not connected. You need to run `robot.connect()`."
|
||
)
|
||
before_read_t = time.perf_counter()
|
||
state = self.arm.read()
|
||
self.logs["read_pos_dt_s"] = time.perf_counter() - before_read_t
|
||
|
||
state = torch.as_tensor(list(state.values()), dtype=torch.float32)
|
||
# 读取图像
|
||
images = {}
|
||
for name in self.cameras:
|
||
before_camread_t = time.perf_counter()
|
||
images[name] = self.cameras[name].async_read()
|
||
images[name] = torch.from_numpy(images[name])
|
||
self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"]
|
||
self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t
|
||
# 构建观测字典
|
||
obs_dict = {}
|
||
obs_dict["observation.state"] = state
|
||
for name in self.cameras:
|
||
obs_dict[f"observation.images.{name}"] = images[name]
|
||
return obs_dict
|
||
|
||
def teleop_safety_stop(self):
|
||
"""遥操作安全停止"""
|
||
self.run_calibration()
|
||
@property
|
||
def camera_features(self) -> dict:
|
||
"""获取摄像头特征"""
|
||
cam_ft = {}
|
||
for cam_key, cam in self.cameras.items():
|
||
key = f"observation.images.{cam_key}"
|
||
cam_ft[key] = {
|
||
"shape": (cam.height, cam.width, cam.channels),
|
||
"names": ["height", "width", "channels"],
|
||
"info": None,
|
||
}
|
||
return cam_ft
|
||
|
||
@property
|
||
def motor_features(self) -> dict:
|
||
"""获取电机特征"""
|
||
action_names = get_motor_names(self.piper_motors)
|
||
state_names = get_motor_names(self.piper_motors)
|
||
return {
|
||
"action": {
|
||
"dtype": "float32",
|
||
"shape": (len(action_names),),
|
||
"names": action_names,
|
||
},
|
||
"observation.state": {
|
||
"dtype": "float32",
|
||
"shape": (len(state_names),),
|
||
"names": state_names,
|
||
},
|
||
}
|
||
|
||
@property
|
||
def has_camera(self):
|
||
return len(self.cameras) > 0
|
||
|
||
@property
|
||
def num_cameras(self):
|
||
return len(self.cameras)
|
||
|
||
def __del__(self):
|
||
"""析构函数"""
|
||
try:
|
||
if not self._shutdown_flag:
|
||
self.disconnect()
|
||
except:
|
||
pass
|
||
|
||
def signal_handler(signum, frame):
|
||
"""信号处理器"""
|
||
print("\n收到中断信号,正在安全退出...")
|
||
sys.exit(0)
|
||
|
||
if __name__ == '__main__':
|
||
signal.signal(signal.SIGINT, signal_handler)
|
||
robot = None
|
||
|
||
try:
|
||
robot = RealmanDualRobot()
|
||
robot.connect()
|
||
|
||
print("RealmanDual 机器人控制已启动")
|
||
print("操作说明:")
|
||
print(" - 使用手柄进行遥操作控制")
|
||
print(" - 按 W 键:调用LLM分析当前场景")
|
||
print(" - 按 Q 键:安全退出程序")
|
||
print(" - Ctrl+C:强制退出")
|
||
print("\n等待操作...")
|
||
|
||
while True:
|
||
result = robot.teleop_step(record_data=True)
|
||
if result is None:
|
||
continue
|
||
# 这里可以处理记录的数据
|
||
except KeyboardInterrupt:
|
||
print("\n收到键盘中断信号")
|
||
except Exception as e:
|
||
print(f"程序运行出错: {e}")
|
||
finally:
|
||
if robot:
|
||
try:
|
||
robot.disconnect()
|
||
except:
|
||
pass
|
||
print("程序已完全退出")
|