import json import time import os import asyncio from concurrent.futures import ThreadPoolExecutor from openai import OpenAI import numpy as np from tqdm import tqdm from prompts import CLEAN_PROMPTS,SELECT_QUESTION_PROMPT API_KEY="" BASE_URL="https://vip.apiyi.com/v1" MODEL_GPT="text-embedding-ada-002" MODELS = ["deepseek-reasoner", "claude-3-7-sonnet-20250219", "qwen-max", "deepseek-chat", "gemini-pro"] def load_data(file_path): with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) return data # 判断问题是否完整 def check_question_completeness(question,answer): try: client = OpenAI(api_key= API_KEY, base_url= BASE_URL) response = client.chat.completions.create( model="gpt-4-turbo", messages=[ {"role": "system", "content": ""}, {"role": "user", "content": CLEAN_PROMPTS.replace("{QUESTION}", question).replace("{ANSWER}", answer)} ], temperature=0.0 ) result = response.choices[0].message.content.strip() # 尝试提取数字结果 if "1" in result: return 1 else: return 0 except Exception as e: print(f"Error checking question completeness: {e}") return 0 # 对问题进行难度打分 def score_question_difficulty(model_name, question, answer): try: client = OpenAI(api_key= API_KEY, base_url= BASE_URL) response = client.chat.completions.create( model = model_name, messages=[ {"role": "system", "content": ""}, {"role": "user", "content": SELECT_QUESTION_PROMPT.replace("{QUESTION}", question).replace("{ANSWER}", answer)} ], temperature=0.2 ) result = response.choices[0].message.content.strip() # 尝试从响应中提取JSON try: # 查找JSON开始和结束的位置 start_idx = result.find('{') end_idx = result.rfind('}') + 1 if start_idx >= 0 and end_idx > start_idx: json_str = result[start_idx:end_idx] json_result = json.loads(json_str) return json_result.get("score", 0) else: # 如果无法找到JSON,尝试直接从文本中提取分数 import re score_match = re.search(r'"score":\s*(\d+)', result) if score_match: return int(score_match.group(1)) else: print(f"无法解析模型返回的分数: {result}") return 0 except Exception as e: print(f"解析JSON发生错误: {e}") print(f"原始响应: {result}") return 0 except Exception as e: print(f"模型{model_name}评分出错: {e}") time.sleep(5) # 出错后暂停 return 0 # 异步处理单个问题 async def process_question(data_item, executor): idx = data_item["idx"] question = data_item["question"] answer = data_item["answer"] # 首先判断问题是否完整 is_complete = check_question_completeness(question, answer) if is_complete != 1: return None # 使用线程池并行评分 scores = {} loop = asyncio.get_event_loop() score_tasks = [] for model in MODELS: score_tasks.append( loop.run_in_executor( executor, score_question_difficulty, model, question, answer, idx ) ) # 获取所有评分结果 model_scores = await asyncio.gather(*score_tasks) # 合并评分结果 total_score = 0 for i, model in enumerate(MODELS): scores[model] = model_scores[i] total_score += model_scores[i] # 构建结果 result = { "id": idx, "question": question, "answer": answer, "total_score": total_score } # 添加各模型评分 for model in MODELS: result[model] = scores[model] return result # 保存结果到文件 def save_results(results, output_file): with open(output_file, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2) # 主处理函数 async def main(input_file, output_file, score_file, top_n=2000): # 加载数据 data = load_data(input_file) results = [] with ThreadPoolExecutor(max_workers=5) as executor: tasks = [] for item in data: tasks.append(process_question(item, executor)) # 显示进度条 for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Processing questions"): result = await f if result is not None: results.append(result) # 按总分排序 results.sort(key=lambda x: x["total_score"], reverse=True) # 选取前top_n个 top_results = results[:top_n] # 保存结果 save_results(top_results, output_file) # 保存评分结果 score_results = [] for item in results: score_item = { "id": item["id"], "question": item["question"], "answer": item["answer"] } # 添加各模型得分 for model in MODELS: score_item[model] = item[model] score_results.append(score_item) save_results(score_results, score_file) print(f"处理完成。共有{len(results)}道完整问题,已选取前{len(top_results)}道最难问题。") if __name__ == "__main__": input_file = "input.json" # 输入的JSON文件 output_file = "top_difficult_questions.json" # 输出前2000道最难问题 score_file = "scores.json" # 保存所有模型评分 # 运行主函数 asyncio.run(main(input_file, output_file, score_file, 2000))