layer2 commit

2025-05-28 11:00:24 +08:00
parent 6a6b09ae20
commit 9f5318c23d
66 changed files with 286574 additions and 0 deletions
--- a/layer2/PGEE/code/clean&norepeat.py
+++ b/layer2/PGEE/code/clean&norepeat.py
@@ -0,0 +1,194 @@
+import json
+import time
+import os
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+from openai import OpenAI
+import numpy as np
+from tqdm import tqdm
+from prompts import CLEAN_PROMPTS,SELECT_QUESTION_PROMPT
+
+API_KEY=""
+BASE_URL="https://vip.apiyi.com/v1"
+MODEL_GPT="text-embedding-ada-002"
+MODELS = ["deepseek-reasoner", "claude-3-7-sonnet-20250219", "qwen-max", "deepseek-chat", "gemini-pro"]
+
+def load_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data
+
+# 判断问题是否完整
+def check_question_completeness(question,answer):
+    try:
+        client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
+        response = client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=[
+                {"role": "system", "content": ""},
+                {"role": "user", "content": CLEAN_PROMPTS.replace("{QUESTION}", question).replace("{ANSWER}", answer)}
+            ],
+            temperature=0.0
+        )
+        result = response.choices[0].message.content.strip()
+        # 尝试提取数字结果
+        if "1" in result:
+            return 1
+        else:
+            return 0
+    except Exception as e:
+        print(f"Error checking question completeness: {e}")
+        return 0
+
+# 对问题进行难度打分
+def score_question_difficulty(model_name, question, answer):
+    try:
+        client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
+        response = client.chat.completions.create(
+            model = model_name,
+            messages=[
+                {"role": "system", "content": ""},
+                {"role": "user", "content": SELECT_QUESTION_PROMPT.replace("{QUESTION}", question).replace("{ANSWER}", answer)}
+            ],
+            temperature=0.2
+        )
+        
+        result = response.choices[0].message.content.strip()
+        
+        # 尝试从响应中提取JSON
+        try:
+            # 查找JSON开始和结束的位置
+            start_idx = result.find('{')
+            end_idx = result.rfind('}') + 1
+            
+            if start_idx >= 0 and end_idx > start_idx:
+                json_str = result[start_idx:end_idx]
+                json_result = json.loads(json_str)
+                return json_result.get("score", 0)
+            else:
+                # 如果无法找到JSON，尝试直接从文本中提取分数
+                import re
+                score_match = re.search(r'"score":\s*(\d+)', result)
+                if score_match:
+                    return int(score_match.group(1))
+                else:
+                    print(f"无法解析模型返回的分数: {result}")
+                    return 0
+        except Exception as e:
+            print(f"解析JSON发生错误: {e}")
+            print(f"原始响应: {result}")
+            return 0
+            
+    except Exception as e:
+        print(f"模型{model_name}评分出错: {e}")
+        time.sleep(5)  # 出错后暂停
+        return 0
+
+# 异步处理单个问题
+async def process_question(data_item, executor):
+    idx = data_item["idx"]
+    question = data_item["question"]
+    answer = data_item["answer"]
+    
+    # 首先判断问题是否完整
+    is_complete = check_question_completeness(question, answer)
+    
+    if is_complete != 1:
+        return None
+    
+    # 使用线程池并行评分
+    scores = {}
+    loop = asyncio.get_event_loop()
+    score_tasks = []
+    
+    for model in MODELS:
+        score_tasks.append(
+            loop.run_in_executor(
+                executor,
+                score_question_difficulty,
+                model,
+                question,
+                answer,
+                idx
+            )
+        )
+    
+    # 获取所有评分结果
+    model_scores = await asyncio.gather(*score_tasks)
+    
+    # 合并评分结果
+    total_score = 0
+    for i, model in enumerate(MODELS):
+        scores[model] = model_scores[i]
+        total_score += model_scores[i]
+    
+    # 构建结果
+    result = {
+        "id": idx,
+        "question": question,
+        "answer": answer,
+        "total_score": total_score
+    }
+    
+    # 添加各模型评分
+    for model in MODELS:
+        result[model] = scores[model]
+    
+    return result
+
+# 保存结果到文件
+def save_results(results, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+
+# 主处理函数
+async def main(input_file, output_file, score_file, top_n=2000):
+    # 加载数据
+    data = load_data(input_file)
+    
+    results = []
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        tasks = []
+        for item in data:
+            tasks.append(process_question(item, executor))
+        
+        # 显示进度条
+        for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Processing questions"):
+            result = await f
+            if result is not None:
+                results.append(result)
+    
+    # 按总分排序
+    results.sort(key=lambda x: x["total_score"], reverse=True)
+    
+    # 选取前top_n个
+    top_results = results[:top_n]
+    
+    # 保存结果
+    save_results(top_results, output_file)
+    
+    # 保存评分结果
+    score_results = []
+    for item in results:
+        score_item = {
+            "id": item["id"],
+            "question": item["question"],
+            "answer": item["answer"]
+        }
+        # 添加各模型得分
+        for model in MODELS:
+            score_item[model] = item[model]
+        
+        score_results.append(score_item)
+    
+    save_results(score_results, score_file)
+    
+    print(f"处理完成。共有{len(results)}道完整问题，已选取前{len(top_results)}道最难问题。")
+
+if __name__ == "__main__":
+    input_file = "input.json"    # 输入的JSON文件
+    output_file = "top_difficult_questions.json"  # 输出前2000道最难问题
+    score_file = "scores.json"   # 保存所有模型评分
+    
+    # 运行主函数
+    asyncio.run(main(input_file, output_file, score_file, 2000))