import json import numpy as np import os import sys import re def load_xrd_data(file_path): """ 读取 XRD 导出的 txt 文件(如 background_result.txt),跳过头部的 Metadata。 """ if not os.path.exists(file_path): raise FileNotFoundError(f"文件不存在: {file_path}") data = [] with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: for line in f: line = line.strip() if not line: continue parts = line.split() try: values = [float(x) for x in parts] if len(values) >= 2: data.append(values[:2]) except ValueError: continue if not data: raise ValueError(f"在文件中未找到有效的数值数据: {file_path}") return np.array(data) def load_peak_report(file_path): """ 专门解析 JADE Peak Search Report (.pid) 提取表格部分的数值数据 """ if not os.path.exists(file_path): raise FileNotFoundError(f"文件不存在: {file_path}") peaks = [] metadata = {} with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() # 查找表格开始的位置(在含有 "2-Theta" 的行之后) table_started = False for line in lines: line = line.strip() if not line: continue # 提取元数据示例: JADE: Peak Search Report (72 Peaks, Max P/N = 19.3) if "Peak Search Report" in line: match = re.search(r"\((\d+) Peaks", line) if match: metadata['reported_peak_count'] = int(match.group(1)) # 识别表头 if "2-Theta" in line and "d(" in line: table_started = True continue if table_started: parts = line.split() # Peak 报告通常每行有 8-9 个字段 if len(parts) >= 6: try: # 尝试将前几个字段转为 float # 0: 2-Theta, 1: d, 2: BG, 3: Height, 4: I%, 5: Area, 6: I%, 7: FWHM peak_data = [float(p) for p in parts[:8]] peaks.append(peak_data) except ValueError: # 如果转换失败,可能是说明文字或空行,跳过 continue if not peaks: raise ValueError(f"未能从 Peak 报告中解析出有效数据: {file_path}") return np.array(peaks), metadata def evaluate(gt_path, agent_path, tolerance=1e-4, mode="xrd_data"): """ 对比 Ground Truth 和 Agent Output。 支持两种模式: - xrd_data: 对比 (2-Theta, Intensity) 原始数据点 - peak_report: 对比 Peak 搜索结果列表 """ try: if mode == "peak_report": gt_data, gt_meta = load_peak_report(gt_path) agent_data, agent_meta = load_peak_report(agent_path) # 对于 Peak 报告,我们主要关注 2-Theta 位置和 Height # 这里对比全表,但放宽容差,因为 Peak Search 的算法可能在不同环境下有极细微差异 if gt_data.shape != agent_data.shape: # 如果数量不匹配,直接判定失败 return 0, f"失败: Peak 数量不匹配。GT {len(gt_data)}, Agent {len(agent_data)}" else: gt_data = load_xrd_data(gt_path) agent_data = load_xrd_data(agent_path) if gt_data.shape != agent_data.shape: return 0, f"失败: 数据维度不匹配。GT 形状 {gt_data.shape}, Agent 形状 {agent_data.shape}" diff = np.abs(gt_data - agent_data) max_error = np.max(diff) if max_error < tolerance: return 1, f"成功: 最大绝对误差 {max_error:.2e} < 阈值 {tolerance}" else: return 0, f"失败: 最大绝对误差 {max_error:.2e} 超过阈值 {tolerance}" except Exception as e: return 0, f"错误: {str(e)}" def evaluate_by_config(config_path): """ 根据任务配置文件进行评测。 """ # 兼容性处理:如果传入的是任务 ID 路径 if not os.path.isabs(config_path) and not config_path.startswith('.'): # 尝试补全路径,例如 instructions/smoothing_001.json if not config_path.endswith('.json'): config_path = config_path + ".json" if not os.path.exists(config_path): # 尝试在任务目录下找 pass base_dir = os.path.dirname(os.path.dirname(os.path.abspath(config_path))) with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) eval_cfg = config.get('evaluation', {}) # 处理相对路径 gt_path = os.path.join(base_dir, eval_cfg['ground_truth']) agent_path = os.path.join(base_dir, eval_cfg['target_output']) tolerance = eval_cfg.get('tolerance', 1e-4) # 自动识别模式 mode = "xrd_data" if gt_path.lower().endswith('.pid') or eval_cfg.get('type') == 'peak_report': mode = "peak_report" # Peak 报告的默认容差放宽一些,因为算法可能受环境微小影响 if 'tolerance' not in eval_cfg: tolerance = 1e-2 print(f"--- 正在执行评测: {config.get('id', 'unknown')} ---") print(f"指令: {config.get('instruction')}") print(f"模式: {mode}") score, message = evaluate(gt_path, agent_path, tolerance, mode=mode) return score, message if __name__ == "__main__": if len(sys.argv) == 2 and sys.argv[1].endswith('.json'): # 模式 A: Config 模式 try: score, message = evaluate_by_config(sys.argv[1]) except Exception as e: print(f"配置文件解析失败: {e}") sys.exit(1) elif len(sys.argv) >= 3: # 模式 B: 直接对比模式 score, message = evaluate(sys.argv[1], sys.argv[2]) else: print("用法:") print(" python scripts/evaluator.py instructions/smoothing_001.json") print(" python scripts/evaluator.py ") sys.exit(1) print("-" * 30) print(f"Score: {score}") print(f"Reason: {message}") print("-" * 30) sys.exit(0 if score == 1 else 1)