Initial commit

2026-01-12 18:30:12 +08:00
commit 214e15c04c
102 changed files with 27857 additions and 0 deletions
--- a/scripts/tools/process_trajectory.py
+++ b/scripts/tools/process_trajectory.py
@@ -0,0 +1,165 @@
+"""
+轨迹数据后处理
+将录制的原始Host坐标转换为VM内坐标
+"""
+import json
+import os
+import sys
+import argparse
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def process_trajectory(task_id, project_root=".", force=False, 
+                       scale_x_adjust=1.0, scale_y_adjust=1.0,
+                       offset_x=0, offset_y=0):
+    """
+    处理轨迹数据：将Host坐标转换为VM坐标
+    
+    Args:
+        task_id: 任务ID
+        project_root: 项目根目录
+        force: 是否强制覆盖已有的处理结果
+        scale_x_adjust: X轴缩放调整系数
+        scale_y_adjust: Y轴缩放调整系数
+        offset_x: X轴偏移调整
+        offset_y: Y轴偏移调整
+    """
+    # 路径
+    task_dir = os.path.join(project_root, "tasks", task_id)
+    human_demo_dir = os.path.join(task_dir, "human_demo")
+    raw_path = os.path.join(human_demo_dir, "actions_raw.json")
+    processed_path = os.path.join(human_demo_dir, "actions.json")
+    
+    # 检查文件
+    if not os.path.exists(raw_path):
+        logger.error(f"❌ 原始轨迹文件不存在: {raw_path}")
+        logger.info("   请先运行: python scripts/collect_task.py <task_id> --mode record")
+        return False
+    
+    if os.path.exists(processed_path) and not force:
+        logger.warning(f"⚠️  处理后的文件已存在: {processed_path}")
+        logger.info("   使用 --force 参数强制覆盖")
+        return False
+    
+    # 读取原始数据
+    logger.info(f"读取原始轨迹: {raw_path}")
+    with open(raw_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    
+    metadata = data['metadata']
+    actions = data['actions']
+    
+    logger.info(f"任务ID: {metadata['task_id']}")
+    logger.info(f"动作数: {len(actions)}")
+    
+    # 获取分辨率信息
+    if 'vm_resolution' in metadata and 'vm_screenshot_resolution' in metadata:
+        vm_w, vm_h = metadata['vm_resolution']
+        screenshot_w, screenshot_h = metadata['vm_screenshot_resolution']
+        
+        # 计算缩放比例
+        # 注意：Host端的点击坐标对应截图坐标，需要转换为VM内实际坐标
+        scale_x = (vm_w / screenshot_w) * scale_x_adjust
+        scale_y = (vm_h / screenshot_h) * scale_y_adjust
+        
+        logger.info(f"VM分辨率: {vm_w}x{vm_h}")
+        logger.info(f"截图分辨率: {screenshot_w}x{screenshot_h}")
+        logger.info(f"转换比例: X={scale_x:.3f}, Y={scale_y:.3f}")
+        
+        if scale_x_adjust != 1.0 or scale_y_adjust != 1.0:
+            logger.info(f"应用调整系数: X={scale_x_adjust}, Y={scale_y_adjust}")
+        if offset_x != 0 or offset_y != 0:
+            logger.info(f"应用偏移调整: X={offset_x}, Y={offset_y}")
+    else:
+        logger.warning("⚠️  元数据缺少分辨率信息，使用默认比例1.0")
+        scale_x = 1.0 * scale_x_adjust
+        scale_y = 1.0 * scale_y_adjust
+    
+    # 转换坐标
+    converted_count = 0
+    for action in actions:
+        if 'pos_host' in action and action['pos_host']:
+            host_x, host_y = action['pos_host']
+            
+            # 应用转换
+            vm_x = int(host_x * scale_x + offset_x)
+            vm_y = int(host_y * scale_y + offset_y)
+            
+            action['pos_vm'] = [vm_x, vm_y]
+            converted_count += 1
+    
+    logger.info(f"✅ 坐标转换完成: {converted_count}/{len(actions)} 个动作")
+    
+    # 添加处理信息到元数据
+    metadata['processed'] = {
+        "processed_at": __import__('datetime').datetime.now().isoformat(),
+        "scale_x": scale_x,
+        "scale_y": scale_y,
+        "offset_x": offset_x,
+        "offset_y": offset_y,
+        "converted_actions": converted_count
+    }
+    
+    # 保存处理后的数据
+    logger.info(f"保存处理后的轨迹: {processed_path}")
+    with open(processed_path, 'w', encoding='utf-8') as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
+    
+    logger.info("✅ 处理完成！")
+    
+    # 输出示例
+    print("\n" + "=" * 60)
+    print("📊 坐标转换示例（前5个点击）:")
+    print("-" * 60)
+    
+    click_count = 0
+    for action in actions:
+        if action['type'] == 'click' and 'pos_host' in action:
+            host_x, host_y = action['pos_host']
+            vm_x, vm_y = action['pos_vm'] if action['pos_vm'] else (0, 0)
+            # 转换为整数显示
+            print(f"  Host({int(host_x):4d}, {int(host_y):4d}) → VM({int(vm_x):4d}, {int(vm_y):4d})")
+            
+            click_count += 1
+            if click_count >= 5:
+                break
+    
+    print("=" * 60)
+    print("\n💡 下一步：可视化验证（可选）")
+    print(f"   python scripts/visualize_trajectory.py {task_id}")
+    print("=" * 60 + "\n")
+    
+    return True
+
+
+def main():
+    parser = argparse.ArgumentParser(description="处理轨迹数据，转换坐标")
+    parser.add_argument("task_id", help="任务ID")
+    parser.add_argument("--project-root", default=".", help="项目根目录")
+    parser.add_argument("--force", action="store_true", help="强制覆盖已有文件")
+    parser.add_argument("--scale-x", type=float, default=1.0, help="X轴缩放调整系数")
+    parser.add_argument("--scale-y", type=float, default=1.0, help="Y轴缩放调整系数")
+    parser.add_argument("--offset-x", type=int, default=0, help="X轴偏移调整")
+    parser.add_argument("--offset-y", type=int, default=0, help="Y轴偏移调整")
+    
+    args = parser.parse_args()
+    
+    success = process_trajectory(
+        task_id=args.task_id,
+        project_root=args.project_root,
+        force=args.force,
+        scale_x_adjust=args.scale_x,
+        scale_y_adjust=args.scale_y,
+        offset_x=args.offset_x,
+        offset_y=args.offset_y
+    )
+    
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
+