sci-gui-agent-benchmark/scripts/tools/extract_task_from_tutorial.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
从教程信息快速生成任务定义
用法:
    python scripts/tools/extract_task_from_tutorial.py
"""

import json
import os
import sys
from pathlib import Path

# 添加项目根目录到路径
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))

from scripts.tools.init_task import init_task


# 任务类别和难度映射
CATEGORY_MAP = {
    "1": "basic_processing",
    "2": "peak_analysis",
    "3": "phase_identification",
    "4": "crystal_parameters",
    "5": "calibration",
    "6": "advanced_analysis",
}

DIFFICULTY_MAP = {
    "1": "easy",
    "2": "medium",
    "3": "hard",
}

# 常见任务模板
TASK_TEMPLATES = {
    "basic_processing": {
        "open_file": "请打开桌面上的 {filename} 文件。",
        "smooth": "请打开桌面上的 {filename} 文件，进行平滑处理 (Smoothing)，然后将处理后的曲线导出为 ASCII (.txt) 文件并命名为 {output}。",
        "background": "请打开桌面上的 {filename} 文件，进行背景扣除 (Background Removal)，然后将处理后的曲线导出为 ASCII (.txt) 文件并命名为 {output}。",
        "export": "请打开桌面上的 {filename} 文件，将当前曲线导出为 ASCII (.txt) 文件并命名为 {output}。",
    },
    "peak_analysis": {
        "peak_search": "请打开桌面上的 {filename} 文件，进行寻峰操作 (Peak Search)，并导出寻峰结果文件 {output}。",
        "peak_separation": "请打开桌面上的 {filename} 文件，进行多峰分离操作 (Peak Separation)，并导出结果文件 {output}。",
        "peak_fitting": "请打开桌面上的 {filename} 文件，进行峰形拟合 (Peak Fitting)，并导出结果文件 {output}。",
    },
    "phase_identification": {
        "phase_search": "请打开桌面上的 {filename} 文件，进行物相检索 (Phase Search)，并导出检索结果文件 {output}。",
        "quantitative": "请打开桌面上的 {filename} 文件，进行物相定量分析 (Quantitative Analysis)，并导出结果文件 {output}。",
    },
    "crystal_parameters": {
        "lattice_constant": "请打开桌面上的 {filename} 文件，精确测定晶格常数 (Lattice Constant)，并导出结果文件 {output}。",
        "crystal_size": "请打开桌面上的 {filename} 文件，使用Scherrer公式计算晶粒大小 (Crystal Size)，并导出结果文件 {output}。",
        "stress": "请打开桌面上的 {filename} 文件，进行残余应力分析 (Stress Analysis)，并导出结果文件 {output}。",
        "crystallinity": "请打开桌面上的 {filename} 文件，计算结晶化度 (Crystallinity)，并导出结果文件 {output}。",
    },
}


def print_category_menu():
    """打印类别菜单"""
    print("\n📚 任务类别:")
    print("  1. basic_processing (基础处理)")
    print("  2. peak_analysis (峰分析)")
    print("  3. phase_identification (物相检索)")
    print("  4. crystal_parameters (晶体参数)")
    print("  5. calibration (校正)")
    print("  6. advanced_analysis (高级分析)")


def print_difficulty_menu():
    """打印难度菜单"""
    print("\n📊 难度等级:")
    print("  1. easy (简单，3-5步操作)")
    print("  2. medium (中等，5-10步操作)")
    print("  3. hard (困难，10+步操作)")


def get_user_input():
    """交互式获取用户输入"""
    print("=" * 60)
    print("🎯 从教程提取任务 - 快速生成工具")
    print("=" * 60)

    # 任务ID
    task_id = input("\n📝 任务ID (例如: peak_search_001): ").strip()
    if not task_id:
        print("❌ 任务ID不能为空")
        return None

    # 类别
    print_category_menu()
    category_choice = input("\n选择类别 (1-6): ").strip()
    category = CATEGORY_MAP.get(category_choice)
    if not category:
        print("❌ 无效的类别选择")
        return None

    # 难度
    print_difficulty_menu()
    difficulty_choice = input("\n选择难度 (1-3): ").strip()
    difficulty = DIFFICULTY_MAP.get(difficulty_choice)
    if not difficulty:
        print("❌ 无效的难度选择")
        return None

    # 输入文件
    print("\n📁 输入文件配置:")
    source_file = input("  源文件路径 (相对于data/source/, 例如: DEMO01.MDI): ").strip()
    if not source_file:
        source_file = "DEMO01.MDI"

    # 输出文件
    print("\n📤 输出文件配置:")
    output_filename = input("  输出文件名 (例如: result.txt): ").strip()
    if not output_filename:
        output_filename = "result.txt"

    # 任务类型（如果类别有模板）
    task_type = None
    if category in TASK_TEMPLATES:
        templates = TASK_TEMPLATES[category]
        print(f"\n📋 可用任务模板 ({category}):")
        for i, (key, template) in enumerate(templates.items(), 1):
            print(f"  {i}. {key}")

        use_template = input("\n使用模板? (y/n, 默认n): ").strip().lower()
        if use_template == 'y':
            template_choice = input(f"选择模板 (1-{len(templates)}): ").strip()
            try:
                template_key = list(templates.keys())[int(template_choice) - 1]
                task_type = template_key
            except (ValueError, IndexError):
                print("⚠️  无效的模板选择，将使用自定义指令")

    # 指令
    if task_type and category in TASK_TEMPLATES:
        # 使用模板
        template = TASK_TEMPLATES[category][task_type]
        instruction = template.format(
            filename=os.path.basename(source_file),
            output=output_filename
        )
        print(f"\n✅ 生成的指令 (模板): {instruction}")
        confirm = input("使用此指令? (y/n, 默认y): ").strip().lower()
        if confirm == 'n':
            instruction = input("\n📝 自定义指令: ").strip()
    else:
        # 自定义指令
        instruction = input("\n📝 任务指令 (中文描述): ").strip()

    if not instruction:
        print("❌ 指令不能为空")
        return None

    # 教程来源（可选）
    tutorial_source = input("\n📚 教程来源 (可选，例如: 教程(1)): ").strip()

    return {
        "task_id": task_id,
        "category": category,
        "difficulty": difficulty,
        "instruction": instruction,
        "source_file": source_file,
        "output_filename": output_filename,
        "tutorial_source": tutorial_source,
    }


def create_task_from_info(info):
    """根据信息创建任务"""
    task_id = info["task_id"]
    category = info["category"]
    difficulty = info["difficulty"]
    instruction = info["instruction"]

    # 构建源文件路径
    source_file = info["source_file"]
    if not os.path.isabs(source_file):
        # 相对路径，假设在 data/source/ 下
        source_file = f"../../data/source/{source_file}"

    # 构建VM路径
    filename = os.path.basename(source_file)
    inject_to = f"C:\\Users\\lzy\\Desktop\\{filename}"

    # 输出文件路径
    output_filename = info["output_filename"]
    collect_from = f"C:\\Users\\lzy\\Desktop\\{output_filename}"

    print(f"\n🚀 正在创建任务: {task_id}")
    print(f"   类别: {category}")
    print(f"   难度: {difficulty}")
    print(f"   源文件: {source_file}")
    print(f"   输出文件: {output_filename}")

    # 调用 init_task
    try:
        init_task(
            task_id=task_id,
            category=category,
            difficulty=difficulty,
            instruction=instruction,
            project_root=str(project_root)
        )

        # 更新 task.json
        task_json_path = project_root / "tasks" / task_id / "task.json"
        if task_json_path.exists():
            with open(task_json_path, 'r', encoding='utf-8') as f:
                task_config = json.load(f)

            # 更新输入输出配置
            task_config["input"] = {
                "source_file": source_file,
                "inject_to": inject_to
            }
            task_config["output"] = {
                "expected_file": output_filename,
                "collect_from": collect_from
            }

            # 添加教程来源（如果有）
            if info.get("tutorial_source"):
                task_config["tutorial_source"] = info["tutorial_source"]

            # 保存
            with open(task_json_path, 'w', encoding='utf-8') as f:
                json.dump(task_config, f, ensure_ascii=False, indent=2)

            print(f"\n✅ 任务创建成功!")
            print(f"   任务目录: tasks/{task_id}/")
            print(f"   配置文件: tasks/{task_id}/task.json")
            print(f"\n📝 下一步:")
            print(f"   1. 检查并完善 task.json")
            print(f"   2. 运行: python scripts/tools/collect_task.py {task_id} --mode full")

            return True
        else:
            print(f"❌ 任务目录创建失败: {task_json_path}")
            return False

    except Exception as e:
        print(f"❌ 创建任务时出错: {e}")
        import traceback
        traceback.print_exc()
        return False


def main():
    """主函数"""
    try:
        info = get_user_input()
        if info:
            create_task_from_info(info)
        else:
            print("\n❌ 任务创建取消")
    except KeyboardInterrupt:
        print("\n\n⚠️  用户取消操作")
    except Exception as e:
        print(f"\n❌ 发生错误: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    main()