Files
MatBench/layer2/PGEE/code/clean&norepeat.py
2025-05-28 11:00:24 +08:00

194 lines
5.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import time
import os
import asyncio
from concurrent.futures import ThreadPoolExecutor
from openai import OpenAI
import numpy as np
from tqdm import tqdm
from prompts import CLEAN_PROMPTS,SELECT_QUESTION_PROMPT
API_KEY=""
BASE_URL="https://vip.apiyi.com/v1"
MODEL_GPT="text-embedding-ada-002"
MODELS = ["deepseek-reasoner", "claude-3-7-sonnet-20250219", "qwen-max", "deepseek-chat", "gemini-pro"]
def load_data(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return data
# 判断问题是否完整
def check_question_completeness(question,answer):
try:
client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[
{"role": "system", "content": ""},
{"role": "user", "content": CLEAN_PROMPTS.replace("{QUESTION}", question).replace("{ANSWER}", answer)}
],
temperature=0.0
)
result = response.choices[0].message.content.strip()
# 尝试提取数字结果
if "1" in result:
return 1
else:
return 0
except Exception as e:
print(f"Error checking question completeness: {e}")
return 0
# 对问题进行难度打分
def score_question_difficulty(model_name, question, answer):
try:
client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
response = client.chat.completions.create(
model = model_name,
messages=[
{"role": "system", "content": ""},
{"role": "user", "content": SELECT_QUESTION_PROMPT.replace("{QUESTION}", question).replace("{ANSWER}", answer)}
],
temperature=0.2
)
result = response.choices[0].message.content.strip()
# 尝试从响应中提取JSON
try:
# 查找JSON开始和结束的位置
start_idx = result.find('{')
end_idx = result.rfind('}') + 1
if start_idx >= 0 and end_idx > start_idx:
json_str = result[start_idx:end_idx]
json_result = json.loads(json_str)
return json_result.get("score", 0)
else:
# 如果无法找到JSON尝试直接从文本中提取分数
import re
score_match = re.search(r'"score":\s*(\d+)', result)
if score_match:
return int(score_match.group(1))
else:
print(f"无法解析模型返回的分数: {result}")
return 0
except Exception as e:
print(f"解析JSON发生错误: {e}")
print(f"原始响应: {result}")
return 0
except Exception as e:
print(f"模型{model_name}评分出错: {e}")
time.sleep(5) # 出错后暂停
return 0
# 异步处理单个问题
async def process_question(data_item, executor):
idx = data_item["idx"]
question = data_item["question"]
answer = data_item["answer"]
# 首先判断问题是否完整
is_complete = check_question_completeness(question, answer)
if is_complete != 1:
return None
# 使用线程池并行评分
scores = {}
loop = asyncio.get_event_loop()
score_tasks = []
for model in MODELS:
score_tasks.append(
loop.run_in_executor(
executor,
score_question_difficulty,
model,
question,
answer,
idx
)
)
# 获取所有评分结果
model_scores = await asyncio.gather(*score_tasks)
# 合并评分结果
total_score = 0
for i, model in enumerate(MODELS):
scores[model] = model_scores[i]
total_score += model_scores[i]
# 构建结果
result = {
"id": idx,
"question": question,
"answer": answer,
"total_score": total_score
}
# 添加各模型评分
for model in MODELS:
result[model] = scores[model]
return result
# 保存结果到文件
def save_results(results, output_file):
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
# 主处理函数
async def main(input_file, output_file, score_file, top_n=2000):
# 加载数据
data = load_data(input_file)
results = []
with ThreadPoolExecutor(max_workers=5) as executor:
tasks = []
for item in data:
tasks.append(process_question(item, executor))
# 显示进度条
for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Processing questions"):
result = await f
if result is not None:
results.append(result)
# 按总分排序
results.sort(key=lambda x: x["total_score"], reverse=True)
# 选取前top_n个
top_results = results[:top_n]
# 保存结果
save_results(top_results, output_file)
# 保存评分结果
score_results = []
for item in results:
score_item = {
"id": item["id"],
"question": item["question"],
"answer": item["answer"]
}
# 添加各模型得分
for model in MODELS:
score_item[model] = item[model]
score_results.append(score_item)
save_results(score_results, score_file)
print(f"处理完成。共有{len(results)}道完整问题,已选取前{len(top_results)}道最难问题。")
if __name__ == "__main__":
input_file = "input.json" # 输入的JSON文件
output_file = "top_difficult_questions.json" # 输出前2000道最难问题
score_file = "scores.json" # 保存所有模型评分
# 运行主函数
asyncio.run(main(input_file, output_file, score_file, 2000))