layer2 commit

This commit is contained in:
lzy
2025-05-28 11:00:24 +08:00
parent 6a6b09ae20
commit 9f5318c23d
66 changed files with 286574 additions and 0 deletions

View File

@@ -0,0 +1,194 @@
import json
import time
import os
import asyncio
from concurrent.futures import ThreadPoolExecutor
from openai import OpenAI
import numpy as np
from tqdm import tqdm
from prompts import CLEAN_PROMPTS,SELECT_QUESTION_PROMPT
API_KEY=""
BASE_URL="https://vip.apiyi.com/v1"
MODEL_GPT="text-embedding-ada-002"
MODELS = ["deepseek-reasoner", "claude-3-7-sonnet-20250219", "qwen-max", "deepseek-chat", "gemini-pro"]
def load_data(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return data
# 判断问题是否完整
def check_question_completeness(question,answer):
try:
client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[
{"role": "system", "content": ""},
{"role": "user", "content": CLEAN_PROMPTS.replace("{QUESTION}", question).replace("{ANSWER}", answer)}
],
temperature=0.0
)
result = response.choices[0].message.content.strip()
# 尝试提取数字结果
if "1" in result:
return 1
else:
return 0
except Exception as e:
print(f"Error checking question completeness: {e}")
return 0
# 对问题进行难度打分
def score_question_difficulty(model_name, question, answer):
try:
client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
response = client.chat.completions.create(
model = model_name,
messages=[
{"role": "system", "content": ""},
{"role": "user", "content": SELECT_QUESTION_PROMPT.replace("{QUESTION}", question).replace("{ANSWER}", answer)}
],
temperature=0.2
)
result = response.choices[0].message.content.strip()
# 尝试从响应中提取JSON
try:
# 查找JSON开始和结束的位置
start_idx = result.find('{')
end_idx = result.rfind('}') + 1
if start_idx >= 0 and end_idx > start_idx:
json_str = result[start_idx:end_idx]
json_result = json.loads(json_str)
return json_result.get("score", 0)
else:
# 如果无法找到JSON尝试直接从文本中提取分数
import re
score_match = re.search(r'"score":\s*(\d+)', result)
if score_match:
return int(score_match.group(1))
else:
print(f"无法解析模型返回的分数: {result}")
return 0
except Exception as e:
print(f"解析JSON发生错误: {e}")
print(f"原始响应: {result}")
return 0
except Exception as e:
print(f"模型{model_name}评分出错: {e}")
time.sleep(5) # 出错后暂停
return 0
# 异步处理单个问题
async def process_question(data_item, executor):
idx = data_item["idx"]
question = data_item["question"]
answer = data_item["answer"]
# 首先判断问题是否完整
is_complete = check_question_completeness(question, answer)
if is_complete != 1:
return None
# 使用线程池并行评分
scores = {}
loop = asyncio.get_event_loop()
score_tasks = []
for model in MODELS:
score_tasks.append(
loop.run_in_executor(
executor,
score_question_difficulty,
model,
question,
answer,
idx
)
)
# 获取所有评分结果
model_scores = await asyncio.gather(*score_tasks)
# 合并评分结果
total_score = 0
for i, model in enumerate(MODELS):
scores[model] = model_scores[i]
total_score += model_scores[i]
# 构建结果
result = {
"id": idx,
"question": question,
"answer": answer,
"total_score": total_score
}
# 添加各模型评分
for model in MODELS:
result[model] = scores[model]
return result
# 保存结果到文件
def save_results(results, output_file):
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
# 主处理函数
async def main(input_file, output_file, score_file, top_n=2000):
# 加载数据
data = load_data(input_file)
results = []
with ThreadPoolExecutor(max_workers=5) as executor:
tasks = []
for item in data:
tasks.append(process_question(item, executor))
# 显示进度条
for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Processing questions"):
result = await f
if result is not None:
results.append(result)
# 按总分排序
results.sort(key=lambda x: x["total_score"], reverse=True)
# 选取前top_n个
top_results = results[:top_n]
# 保存结果
save_results(top_results, output_file)
# 保存评分结果
score_results = []
for item in results:
score_item = {
"id": item["id"],
"question": item["question"],
"answer": item["answer"]
}
# 添加各模型得分
for model in MODELS:
score_item[model] = item[model]
score_results.append(score_item)
save_results(score_results, score_file)
print(f"处理完成。共有{len(results)}道完整问题,已选取前{len(top_results)}道最难问题。")
if __name__ == "__main__":
input_file = "input.json" # 输入的JSON文件
output_file = "top_difficult_questions.json" # 输出前2000道最难问题
score_file = "scores.json" # 保存所有模型评分
# 运行主函数
asyncio.run(main(input_file, output_file, score_file, 2000))