layer2 commit

2025-05-28 11:00:24 +08:00
parent 6a6b09ae20
commit 9f5318c23d
66 changed files with 286574 additions and 0 deletions
--- a/layer2/PGEE/code/EN_ckj.json
+++ b/layer2/PGEE/code/EN_ckj.json
--- a/layer2/PGEE/code/pycache/prompts.cpython-311.pyc
+++ b/layer2/PGEE/code/pycache/prompts.cpython-311.pyc
--- a/layer2/PGEE/code/pycache/prompts.cpython-312.pyc
+++ b/layer2/PGEE/code/pycache/prompts.cpython-312.pyc
--- a/layer2/PGEE/code/clean&norepeat.py
+++ b/layer2/PGEE/code/clean&norepeat.py
@@ -0,0 +1,194 @@
+import json
+import time
+import os
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+from openai import OpenAI
+import numpy as np
+from tqdm import tqdm
+from prompts import CLEAN_PROMPTS,SELECT_QUESTION_PROMPT
+
+API_KEY=""
+BASE_URL="https://vip.apiyi.com/v1"
+MODEL_GPT="text-embedding-ada-002"
+MODELS = ["deepseek-reasoner", "claude-3-7-sonnet-20250219", "qwen-max", "deepseek-chat", "gemini-pro"]
+
+def load_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data
+
+# 判断问题是否完整
+def check_question_completeness(question,answer):
+    try:
+        client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
+        response = client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=[
+                {"role": "system", "content": ""},
+                {"role": "user", "content": CLEAN_PROMPTS.replace("{QUESTION}", question).replace("{ANSWER}", answer)}
+            ],
+            temperature=0.0
+        )
+        result = response.choices[0].message.content.strip()
+        # 尝试提取数字结果
+        if "1" in result:
+            return 1
+        else:
+            return 0
+    except Exception as e:
+        print(f"Error checking question completeness: {e}")
+        return 0
+
+# 对问题进行难度打分
+def score_question_difficulty(model_name, question, answer):
+    try:
+        client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
+        response = client.chat.completions.create(
+            model = model_name,
+            messages=[
+                {"role": "system", "content": ""},
+                {"role": "user", "content": SELECT_QUESTION_PROMPT.replace("{QUESTION}", question).replace("{ANSWER}", answer)}
+            ],
+            temperature=0.2
+        )
+        
+        result = response.choices[0].message.content.strip()
+        
+        # 尝试从响应中提取JSON
+        try:
+            # 查找JSON开始和结束的位置
+            start_idx = result.find('{')
+            end_idx = result.rfind('}') + 1
+            
+            if start_idx >= 0 and end_idx > start_idx:
+                json_str = result[start_idx:end_idx]
+                json_result = json.loads(json_str)
+                return json_result.get("score", 0)
+            else:
+                # 如果无法找到JSON，尝试直接从文本中提取分数
+                import re
+                score_match = re.search(r'"score":\s*(\d+)', result)
+                if score_match:
+                    return int(score_match.group(1))
+                else:
+                    print(f"无法解析模型返回的分数: {result}")
+                    return 0
+        except Exception as e:
+            print(f"解析JSON发生错误: {e}")
+            print(f"原始响应: {result}")
+            return 0
+            
+    except Exception as e:
+        print(f"模型{model_name}评分出错: {e}")
+        time.sleep(5)  # 出错后暂停
+        return 0
+
+# 异步处理单个问题
+async def process_question(data_item, executor):
+    idx = data_item["idx"]
+    question = data_item["question"]
+    answer = data_item["answer"]
+    
+    # 首先判断问题是否完整
+    is_complete = check_question_completeness(question, answer)
+    
+    if is_complete != 1:
+        return None
+    
+    # 使用线程池并行评分
+    scores = {}
+    loop = asyncio.get_event_loop()
+    score_tasks = []
+    
+    for model in MODELS:
+        score_tasks.append(
+            loop.run_in_executor(
+                executor,
+                score_question_difficulty,
+                model,
+                question,
+                answer,
+                idx
+            )
+        )
+    
+    # 获取所有评分结果
+    model_scores = await asyncio.gather(*score_tasks)
+    
+    # 合并评分结果
+    total_score = 0
+    for i, model in enumerate(MODELS):
+        scores[model] = model_scores[i]
+        total_score += model_scores[i]
+    
+    # 构建结果
+    result = {
+        "id": idx,
+        "question": question,
+        "answer": answer,
+        "total_score": total_score
+    }
+    
+    # 添加各模型评分
+    for model in MODELS:
+        result[model] = scores[model]
+    
+    return result
+
+# 保存结果到文件
+def save_results(results, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+
+# 主处理函数
+async def main(input_file, output_file, score_file, top_n=2000):
+    # 加载数据
+    data = load_data(input_file)
+    
+    results = []
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        tasks = []
+        for item in data:
+            tasks.append(process_question(item, executor))
+        
+        # 显示进度条
+        for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Processing questions"):
+            result = await f
+            if result is not None:
+                results.append(result)
+    
+    # 按总分排序
+    results.sort(key=lambda x: x["total_score"], reverse=True)
+    
+    # 选取前top_n个
+    top_results = results[:top_n]
+    
+    # 保存结果
+    save_results(top_results, output_file)
+    
+    # 保存评分结果
+    score_results = []
+    for item in results:
+        score_item = {
+            "id": item["id"],
+            "question": item["question"],
+            "answer": item["answer"]
+        }
+        # 添加各模型得分
+        for model in MODELS:
+            score_item[model] = item[model]
+        
+        score_results.append(score_item)
+    
+    save_results(score_results, score_file)
+    
+    print(f"处理完成。共有{len(results)}道完整问题，已选取前{len(top_results)}道最难问题。")
+
+if __name__ == "__main__":
+    input_file = "input.json"    # 输入的JSON文件
+    output_file = "top_difficult_questions.json"  # 输出前2000道最难问题
+    score_file = "scores.json"   # 保存所有模型评分
+    
+    # 运行主函数
+    asyncio.run(main(input_file, output_file, score_file, 2000))
--- a/layer2/PGEE/code/dataset.json
+++ b/layer2/PGEE/code/dataset.json
--- a/layer2/PGEE/code/merge.py
+++ b/layer2/PGEE/code/merge.py
@@ -0,0 +1,19 @@
+import json
+
+def merge_and_renumber_json(file1, file2, output_file):
+    with open(file1, 'r', encoding='utf-8') as f1:
+        data1 = json.load(f1)
+    with open(file2, 'r', encoding='utf-8') as f2:
+        data2 = json.load(f2)
+
+    merged_data = data1 + data2
+
+    for new_idx, item in enumerate(merged_data, start=1):
+        item['idx'] = new_idx
+
+    with open(output_file, 'w', encoding='utf-8') as f_out:
+        json.dump(merged_data, f_out, indent=2, ensure_ascii=False)
+
+    print(f"合并完成，输出文件为: {output_file}")
+
+merge_and_renumber_json('/home/ubuntu/50T/fsy/layer2/QA/code/EN-single_select_includes_process.json', '/home/ubuntu/50T/fsy/layer2/QA/code/821_single_select.json', '/home/ubuntu/50T/fsy/layer2/QA/code/merged.json')
--- a/layer2/PGEE/code/prompts.py
+++ b/layer2/PGEE/code/prompts.py
@@ -0,0 +1,44 @@
+CLEAN_PROMPTS="""
+
+"""
+
+SELECT_QUESTION_PROMPT = """
+Given the most unique answer, evaluate the following **questions ** and decide which one best matches the answer. The higher the match between the question and the answer, the higher the score. Please rate each question and answer pairing on a scale from **1 to 5**, with 1 being the worst match and 5 being the best match. Then, give a brief reason why the question best matches the answer.
+
+### # ** Rating Criteria ** :
+- **5** : Perfect match - The question is exactly the same as the answer, covering all the key information for the answer.
+- **4** : High match - The question and answer are mostly consistent, and basically cover the core content of the answer.
+- **3** : Medium match - The question partially agrees with the answer, but does not match exactly, or the answer does not fully cover the requirements of the question.
+- **2** : Low match - There is a gap between the question and the answer, and more details may be needed to match.
+- **1** : Very low match - the question has little to do with the answer, or the answer does not match the question at all.
+
+### Note that you should also include in your evaluation criteria whether the question is asked about the recommended functional group. If so, the score should be higher, if not, the score should be lower.
+
+### ** Inputs: **
+1. ** unique answer **:
+{ANSWER}
+2. **questions **:
+{QUESTIONS}
+
+### ** Output format: **
+- Score how well each question matches the answer in the following JSON format:
+```json
+{
+    "questions": [
+        {
+            "id": 1,
+            "score": xxxx,
+        },
+        {
+            "id": 2,
+            "score": xxxx,
+        },
+        {
+            "id": 3,
+            "score": xxxx,
+        },
+        ...
+    ]
+}
+```
+"""
--- a/layer2/PGEE/code/question_embeddings.pkl
+++ b/layer2/PGEE/code/question_embeddings.pkl
--- a/layer2/PGEE/code/renum.py
+++ b/layer2/PGEE/code/renum.py
@@ -0,0 +1,23 @@
+import json
+
+def renumber_json_indices(input_file, output_file):
+
+    with open(input_file, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    idx = 1
+
+    for item in data:
+        item['idx'] = idx
+        idx = idx + 1
+    
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+    
+    print(f"成功将索引重新编号并保存到 {output_file}")
+    print(f"处理了 {len(data)} 条数据")
+
+if __name__ == "__main__":
+    input_file = "/home/ubuntu/50T/fsy/layer2/QA/single_select.json"    # 替换为你的输入文件名
+    output_file = "/home/ubuntu/50T/fsy/layer2/QA/single_select_renum.json"  # 替换为你想要的输出文件名
+    
+    renumber_json_indices(input_file, output_file)
--- a/layer2/PGEE/code/step0_data.json
+++ b/layer2/PGEE/code/step0_data.json
--- a/layer2/PGEE/code/step0_xlsx2json.py
+++ b/layer2/PGEE/code/step0_xlsx2json.py
@@ -0,0 +1,46 @@
+"""
+0. 将问题从xls提取为json
+1. 将问题进行拆分
+2. 翻译成英文
+3. 去重
+4. 使用大模型进行难度评估和筛选
+"""
+import pandas as pd
+import json
+import os
+
+def process_excel_files(directory):
+    all_data = []
+    
+    # 获取目录下所有xlsx文件
+    excel_files = [f for f in os.listdir(directory) if f.endswith('.xlsx')]
+    
+    for excel_file in excel_files:
+           
+        file_path = os.path.join(directory, excel_file)
+
+        df = pd.read_excel(file_path)
+        
+        if 'Question' in df.columns and 'Answer' in df.columns:
+            # 将每行转换为字典并添加到列表中
+            for _, row in df.iterrows():
+                data_item = {
+                    'question': str(row['Question']).strip(),
+                    'answer': str(row['Answer']).strip()
+                }
+                all_data.append(data_item)
+        else:
+            print(f"警告: {excel_file} 缺少必要的列 (question/answer)")
+
+    # 将数据保存为JSON文件
+    output_file = os.path.join(directory, 'qa_data.json')
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(all_data, f, ensure_ascii=False, indent=2)
+    
+    print(f"处理完成！共处理了 {len(all_data)} 条数据")
+    print(f"数据已保存到: {output_file}")
+
+if __name__ == '__main__':
+    # 指定Excel文件所在的目录
+    directory = os.path.dirname(os.path.abspath(__file__))
+    process_excel_files(directory)
--- a/layer2/PGEE/code/step1_single_select.json
+++ b/layer2/PGEE/code/step1_single_select.json
--- a/layer2/PGEE/code/step1_single_select_includes_process.json
+++ b/layer2/PGEE/code/step1_single_select_includes_process.json
--- a/layer2/PGEE/code/step2_translate.json
+++ b/layer2/PGEE/code/step2_translate.json
--- a/layer2/PGEE/code/step2_translate.py
+++ b/layer2/PGEE/code/step2_translate.py
@@ -0,0 +1,137 @@
+"""
+0. 将问题从xls提取为json
+1. 将问题进行拆分
+2. 翻译成英文
+3. 去重
+4. 使用大模型进行难度评估和筛选
+"""
+import json
+import time
+import threading
+import queue
+from concurrent.futures import ThreadPoolExecutor
+from openai import OpenAI
+import re
+
+result_lock = threading.Lock()
+api_semaphore = threading.Semaphore(5)  
+processed_data =[]
+error_items = []
+API_KEY="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d"
+BASE_URL="https://vip.apiyi.com/v1"
+MODEL_GPT ="deepseek-chat"
+
+client = OpenAI(api_key=API_KEY,base_url=BASE_URL)
+
+def load_qa_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data
+
+def translate_qa_type(question,answer):
+    prompt = f"""
+            
+            Please strictly translate the following Chinese questions and answers into English, and return the results according to the specified JSON format:
+
+            Question: {question}
+            Answer: {answer}
+
+            Translation requirements:
+            - Only translate the Chinese expressions, any additions or modifications to the content are prohibited
+            - Maintain all information points, expressions, and numerical values exactly as in the original text
+            - Keep professional terminology accurate
+            - Return plain text, do not use markdown format
+
+            Return the translation results according to the following JSON format:
+            [
+            {{
+            "question": "Translated English question",
+            "answer": "Translated English answer"
+            }}
+            ]
+            """
+    
+    with api_semaphore:
+        try:
+            response = client.chat.completions.create(
+                model = MODEL_GPT,
+                messages=[
+                    {"role": "system", "content": "You are an expert translator with extensive knowledge of materials science, tasked with translating Chinese texts into highly accurate English, ensuring the correct usage of scientific terminology."},
+                    {"role": "user", "content": prompt}
+                ],
+                stream=False
+            )
+            result = response.choices[0].message.content.strip()
+            print(result)
+            process_result = comfirm_json_string(result)
+            return json.loads(process_result)
+        except Exception as e:
+            print(f"API调用错误: {e}")
+            return "2" 
+
+def comfirm_json_string(json_string):
+    json_string = re.sub(r'[“”]', '"', json_string)
+    json_string = re.sub(r'\\', r'\\\\', json_string)
+    json_string = re.sub(r'\\"', r'\"', json_string)
+    json_string = json_string.replace("\n", "").replace("\r", "")
+    # 去掉 Markdown 的语法包裹
+    if json_string.startswith("```json"):
+        json_string = json_string.strip("`json\n")
+    json_string = json_string.strip('`\n')
+
+    return json_string
+
+def process_item(item, index, total):
+    print(f"处理第 {index+1}/{total} 条数据...")
+    question = item["question"]
+    answer = item["answer"]
+    data = translate_qa_type(question,answer)
+    
+    with result_lock:
+        if isinstance(data, list):
+            processed_data.append({
+                    "idx": item['idx'],
+                    "question": data[0]["question"],
+                    "answer": data[0]["answer"]
+                    })
+        else:
+                error_items.append({
+                "idx": item['idx'],
+                "question": question,
+                "answer": answer
+            })
+
+        
+def save_processed_data(data, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+def main():
+    input_file = "/home/ubuntu/50T/fsy/layer2/QA/single_select.json"
+    output_file = "/home/ubuntu/50T/fsy/layer2/QA/EN-single_select.json"
+    error_file = "/home/ubuntu/50T/fsy/error.json"
+    
+    data = load_qa_data(input_file)
+    total = len(data)
+    
+    with ThreadPoolExecutor(max_workers=10) as executor:
+        futures = []
+        for i, item in enumerate(data):
+            future = executor.submit(process_item, item, i, total)
+            futures.append(future)
+            
+            if (i+1) % 10 == 0:
+                time.sleep(1)
+    
+        for future in futures:
+            future.result()
+    
+    save_processed_data(processed_data, output_file)
+    print(f"处理完成，已保存到 {output_file}")
+    
+    if error_items:
+        save_processed_data(error_items, error_file)
+        print(f"处理出错的条目已保存到 {error_file}")
+
+if __name__ == "__main__":
+    main()
--- a/layer2/PGEE/code/step3_deduplication.json
+++ b/layer2/PGEE/code/step3_deduplication.json
--- a/layer2/PGEE/code/step3_deduplication.py
+++ b/layer2/PGEE/code/step3_deduplication.py
@@ -0,0 +1,130 @@
+"""
+0. 将问题从xls提取为json
+1. 将问题进行拆分
+2. 翻译成英文
+3. 去重
+4. 使用大模型进行难度评估和筛选
+"""
+from openai import OpenAI
+import json
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+import pickle
+from prompts import CLEAN_PROMPTS, SELECT_QUESTION_PROMPT
+
+API_KEY="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d"
+BASE_URL="https://vip.apiyi.com/v1"
+MODEL_GPT="text-embedding-ada-002"
+MODELS = ["deepseek-reasoner", "claude-3-7-sonnet-20250219", "qwen-max", "deepseek-chat", "gemini-pro"]
+
+def get_embedding(text):
+    client = OpenAI(api_key= API_KEY, base_url= BASE_URL)
+    response = client.embeddings.create(
+        model = MODEL_GPT,
+        input = text
+    )
+    return response.data[0].embedding
+
+def compute_embeddings(texts):
+    embeddings = []
+    for i,text in enumerate(texts):
+        print("正在处理第{}/{}条".format(i+1,len(texts)))
+        embeddings.append(get_embedding(text))
+    return np.array(embeddings)
+
+def load_json(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        return json.load(file)
+
+def save_json(data, file_path):
+    with open(file_path, 'w', encoding='utf-8') as file:
+        json.dump(data, file, ensure_ascii=False, indent=2)
+
+def save_embeddings(embeddings, file_path):
+    with open(file_path, 'wb') as file:
+        pickle.dump(embeddings, file)
+
+def load_embeddings(file_path):
+    with open(file_path, 'rb') as file:
+        return pickle.load(file)
+    
+def deduplicate_qa(data, save_vectors=True):
+    questions = [item['question'] for item in data]
+
+    # 生成嵌入向量
+    question_embeddings = compute_embeddings(questions)
+    
+    if save_vectors:
+        print("保存问题的嵌入向量...")
+        save_embeddings(question_embeddings, '/home/ubuntu/50T/fsy/layer2/QA/question_embeddings.pkl')
+    
+    # 去重逻辑
+    filtered_data, duplicate_entries = de_emphasize(question_embeddings,data)
+
+    return filtered_data, duplicate_entries
+
+def deduplicate_qa_pkl(data,pkl_path):
+    
+    question_embeddings = load_embeddings(pkl_path)
+    filtered_data, duplicate_entries = de_emphasize(question_embeddings,data)
+    
+    return filtered_data, duplicate_entries
+
+def de_emphasize(question_embeddings,data,similarity_threshold=0.99):
+    
+    unique_indices = []
+    duplicate_entries = []  # 用来保存重复的问答对信息
+    for i in range(len(data)):
+        print("正在处理第{}/{}条".format(i+1,len(data)))
+        duplicate_found = False
+        for j in unique_indices:
+            # 计算问题的语义相似性
+            question_sim = cosine_similarity([question_embeddings[i]], [question_embeddings[j]])[0][0]
+
+            # 如果相似度均超过阈值，则认为是重复
+            if question_sim > similarity_threshold:
+                duplicate_found = True
+                # 保存重复对的相关信息到 `duplicate_entries`，包括当前问答和匹配到的问答
+                duplicate_entries.append({
+                    "duplicate_question": data[i]['question'],
+                    "duplicate_answer": data[i]['answer'],
+                    "matched_question": data[j]['question'],
+                    "matched_answer": data[j]['answer']
+                })
+                break
+
+        if not duplicate_found:
+            unique_indices.append(i)
+
+    # 构建去重后的数据
+    filtered_data = [data[i] for i in unique_indices]
+    return filtered_data, duplicate_entries
+
+# 主程序
+if __name__ == '__main__':
+    input_file = '/home/ubuntu/50T/fsy/layer2/PGEE/code/dataset.json'  # 输入 JSON 文件路径
+    output_file = '/home/ubuntu/50T/fsy/layer2/PGEE/code/onrepeat_99.json'  # 去重后的输出文件路径
+    duplicates_file = '/home/ubuntu/50T/fsy/layer2/PGEE/codeduplicates_99.json'  # 筛选掉的问答对文件路径
+    pkl_path = "/home/ubuntu/50T/fsy/layer2/PGEE/question_embeddings.pkl"
+    qa_data = load_json(input_file)
+
+    # 进行去重，将获得的向量保存为pkl文件
+    # filtered_data, duplicate_entries = deduplicate_qa(qa_data, similarity_threshold=0.9)
+
+    # 导入pkl文件进行查重
+    filtered_data, duplicate_entries =deduplicate_qa_pkl(qa_data,pkl_path)
+
+    # 按照难度进行问题筛选
+
+
+    # 对于非选择题，选择答案最相近的答案作为错误选项
+
+
+    # 保存处理后的问答对以及重复的问答对
+    save_json(filtered_data, output_file)
+    save_json(duplicate_entries, duplicates_file)
+
+    
+
+    print(f"去重完成！处理前共有 {len(qa_data)} 条问答对，处理后剩余 {len(filtered_data)} 条。")
+    print(f"重复问答对保存到 {duplicates_file}，共保存 {len(duplicate_entries)} 条。")
--- a/layer2/PGEE/data_origin/1.xlsx
+++ b/layer2/PGEE/data_origin/1.xlsx
--- a/layer2/PGEE/data_origin/10.xlsx
+++ b/layer2/PGEE/data_origin/10.xlsx
--- a/layer2/PGEE/data_origin/11.xlsx
+++ b/layer2/PGEE/data_origin/11.xlsx
--- a/layer2/PGEE/data_origin/12.xlsx
+++ b/layer2/PGEE/data_origin/12.xlsx
--- a/layer2/PGEE/data_origin/13.xlsx
+++ b/layer2/PGEE/data_origin/13.xlsx
--- a/layer2/PGEE/data_origin/14.xlsx
+++ b/layer2/PGEE/data_origin/14.xlsx
--- a/layer2/PGEE/data_origin/15.xlsx
+++ b/layer2/PGEE/data_origin/15.xlsx
--- a/layer2/PGEE/data_origin/16.xlsx
+++ b/layer2/PGEE/data_origin/16.xlsx
--- a/layer2/PGEE/data_origin/2.xlsx
+++ b/layer2/PGEE/data_origin/2.xlsx
--- a/layer2/PGEE/data_origin/3.xlsx
+++ b/layer2/PGEE/data_origin/3.xlsx
--- a/layer2/PGEE/data_origin/4.xlsx
+++ b/layer2/PGEE/data_origin/4.xlsx
--- a/layer2/PGEE/data_origin/5.xlsx
+++ b/layer2/PGEE/data_origin/5.xlsx
--- a/layer2/PGEE/data_origin/6.xlsx
+++ b/layer2/PGEE/data_origin/6.xlsx
--- a/layer2/PGEE/data_origin/7&9.xlsx
+++ b/layer2/PGEE/data_origin/7&9.xlsx
--- a/layer2/PGEE/data_origin/8.xlsx
+++ b/layer2/PGEE/data_origin/8.xlsx
--- a/layer2/eval/chatgpt-4o-latest.json
+++ b/layer2/eval/chatgpt-4o-latest.json
--- a/layer2/eval/claude-3-7-sonnet-20250219-thinking.json
+++ b/layer2/eval/claude-3-7-sonnet-20250219-thinking.json
--- a/layer2/eval/claude-3-7-sonnet-20250219_results.json
+++ b/layer2/eval/claude-3-7-sonnet-20250219_results.json
--- a/layer2/eval/dataset.json
+++ b/layer2/eval/dataset.json
--- a/layer2/eval/dataset_origin.json
+++ b/layer2/eval/dataset_origin.json
--- a/layer2/eval/deepseek-r1_results.json
+++ b/layer2/eval/deepseek-r1_results.json
--- a/layer2/eval/deepseekv3_results.json
+++ b/layer2/eval/deepseekv3_results.json
--- a/layer2/eval/eval.py
+++ b/layer2/eval/eval.py
@@ -0,0 +1,100 @@
+#多线程对LLM进行评估
+import json
+import threading
+from tqdm import tqdm
+import concurrent.futures
+from openai import OpenAI
+client = OpenAI(
+    api_key="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d",
+    base_url="https://vip.apiyi.com/v1"
+)
+
+# 创建一个线程锁，用于保护共享资源
+thread_lock = threading.Lock()
+
+def load_json_data(filepath):
+    with open(filepath, 'r') as file:
+        data = json.load(file)
+    return data
+
+def get_response(question,max_retries=10):
+    retries = 0
+    while retries<max_retries:
+        try:
+            response = client.chat.completions.create(
+                #
+                model="claude-3-7-sonnet-20250219-thinking",
+                messages= [
+                    {"role": "system", "content": "You are an expert in the field of materials science, adept at answering questions related to fundamental aspects of materials science, including material structure, properties, processing, and applications."},
+                    {"role": "user", "content": question}
+                ],
+                temperature=0
+            )
+            answer = response.choices[0].message.content
+            return answer
+        except Exception as e:
+            print(f"Error in getting LLM response (Attempt {retries + 1}/{max_retries}): {e}")
+            retries += 1
+    
+    print(f"Failed to get response after {max_retries} attempts, returning None.")
+    return "error!"
+
+def process_item(item, index):
+    question = item['question']
+    expected_answer = item['answer'].strip()
+    llm_answer = get_response(question)
+
+    # 返回处理结果和是否正确
+    is_correct = expected_answer in llm_answer
+    return {
+        'index': index,
+        'question': question,
+        'expected_answer': expected_answer,
+        'llm_answer': llm_answer,
+        'is_correct': is_correct
+    }
+
+def calculate_accuracy_multithreaded(data, max_workers=5):
+    correct_answers = 0
+    results = []
+
+    # 使用进度条跟踪进度
+    with tqdm(total=len(data), desc="Processing items") as pbar:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # 提交所有任务
+            future_to_index = {executor.submit(process_item, item, i): i for i, item in enumerate(data)}
+
+            # 处理结果
+            for future in concurrent.futures.as_completed(future_to_index):
+                result = future.result()
+                results.append(result)
+                if result['is_correct']:
+                    with thread_lock:
+                        correct_answers += 1
+                pbar.update(1)
+
+    # 按原始索引排序结果
+    results.sort(key=lambda x: x['index'])
+
+    # 计算准确率
+    total_questions = len(data)
+    accuracy = (correct_answers / total_questions) * 100
+
+    return accuracy, results
+
+
+def main():
+    filepath = '/home/ubuntu/50T/fsy/benchmark/1200ckjtest/1200ckj.json'
+    data = load_json_data(filepath)
+
+    max_workers = 8
+
+    accuracy, results =calculate_accuracy_multithreaded(data,max_workers)
+    # accuracy = calculate_accuracy(data)
+    print(f"Accuracy of claude-3-7-sonnet-20250219-thinking: {accuracy:.2f}%")
+
+    with open('claude-3-7-sonnet-20250219-thinking.json', 'w') as f:
+        json.dump(results, f, indent=2)
+
+if __name__ == "__main__":
+    main()
--- a/layer2/eval/gemini-2.0-flash.json
+++ b/layer2/eval/gemini-2.0-flash.json
--- a/layer2/eval/gpt4_results.json
+++ b/layer2/eval/gpt4_results.json
--- a/layer2/eval/qwen-max-2025-01-25.json
+++ b/layer2/eval/qwen-max-2025-01-25.json
--- a/layer2/process/prompts.py
+++ b/layer2/process/prompts.py
@@ -0,0 +1,64 @@
+'''
+保留计算题的计算过程：- Fully preserve the step-by-step calculation process along with the final results
+只保留计算题的结果：- Preserve final calculation results
+'''
+SINGLE_QUESTION_PROMPTS="""
+                Follow these instructions strictly to perform question decomposition:
+                Input requirements:
+                - Question text: {question}
+                - Answer text: {answer}
+                Output rules:
+                1. Single issue determination criteria:
+                - Question contains only one clear technical inquiry point
+                - Answer content cannot be divided into independent parts
+                → Return: "It's a single issue."
+                2. Compound question decomposition criteria (must satisfy all):
+                a) Question contains multiple technically independent sub-questions
+                b) Answer contains independent solution paragraphs corresponding to sub-questions
+                c) Each sub-question's answer does not depend on context from other sub-questions
+                3. Decomposition format standards:
+                [
+                    {{
+                        "question": "[Complete sub-question 1] (including necessary shared parameters)",
+                        "answer": "[Corresponding complete answer]"
+                    }},
+                    {{
+                        "question": "[Complete sub-question 2] (including necessary shared parameters)",
+                        "answer": "[Corresponding complete answer]"
+                    }},
+                    ......
+                ]
+                Key control points:
+                1. Context integrity:
+                - Each sub-question must include shared parameters from the original question
+                2. Answer integrity:
+                - Fully preserve the step-by-step calculation process along with the final results
+                - Maintain original units and precision (e.g., 6.02×10²³ cannot be simplified to 6.02e23)
+                3. 
+
+                3. Format prohibitions:
+                - No explanatory text additions
+                - No modifications to original technical terminology
+                - Return data must not use Markdown and Latex formats (like \times, \mathrm)
+                - Use scientific notation for data representation
+                """
+
+QA_TYPE_PROMPTS="""
+                Please analyze the following question and its answer, and classify the question type into one of the following four categories:
+
+                1. Calculation: A question that requires mathematical operations to derive the result.
+                2. Multiple choice: A question that provides multiple options (e.g., A/B/C/D) for the respondent to choose from.
+                3. True/False: A question that only requires answering true/false, yes/no, or correct/incorrect.
+                4. Other: A question that does not fall under the above three categories.
+
+                Question:
+                {question}
+                Answer:
+                {answer}
+
+                Please respond with the corresponding numeric code directly (without any explanation):
+                1. For Calculation, respond: 1
+                2. For Multiple choice, respond: 2
+                3. For True/False, respond: 3
+                4. For Other, respond: 4
+                """
--- a/layer2/process/step0.py
+++ b/layer2/process/step0.py
@@ -0,0 +1,41 @@
+#step0: 将文本拆分成问题和答案两部分
+import json
+
+input_file_path = '/home/ubuntu/50T/fsy/benchmark/dataset-ours/[Solution]qa_segment_all.json'
+with open(input_file_path, 'r', encoding='utf-8') as infile:
+    data = json.load(infile)
+
+# 遍历并处理数据
+processed_data = []
+for item in data:
+    segment = item.get("segment", "")
+    if "Solution" in segment:
+        question, answer = segment.split("Solution", 1)
+        question = question.strip()
+        answer = answer.strip()
+        processed_data.append({
+            "idx": item.get("idx"),
+            "question": question,
+            "answer": answer,
+        })
+    elif "Answer" in segment:
+        question, answer = segment.split("Answer", 1)
+        question = question.strip() 
+        answer = answer.strip() 
+        processed_data.append({
+            "idx": item.get("idx"),
+            "question": question,
+            "answer": answer,
+        })
+    else:
+        processed_data.append({
+            "idx": item.get("idx"),
+            "question": "000",
+            "answer": "000",
+        })
+
+output_file_path = '[Solution]qa_segment.json'
+with open(output_file_path, 'w', encoding='utf-8') as outfile:
+    json.dump(processed_data, outfile, ensure_ascii=False, indent=4)
+
+output_file_path
--- a/layer2/process/step1and2.py
+++ b/layer2/process/step1and2.py
@@ -0,0 +1,132 @@
+"""
+对821道英文问题进行处理
+1. 判断是否包含多个子问题，将问题拆分为完整子问题(去掉推理过程，只保留最后结果)
+2. 判断题目类型
+3. 将题目做成选择题
+   对计算题，在数值附近随机生成三个相似答案作为错误选项
+   对简答题，与标准答案最相近的其他问题的答案作为三个错误选项
+4. 将正确和错误选项随机打乱，生成ABCD选择题的模型
+5. 添加prompt，并将选择题包裹在[ANSWER]<answer>[/ANSWER]标签中
+6. 模型打分
+"""
+import json
+import time
+from openai import OpenAI
+import re
+from prompts import SINGLE_QUESTION_PROMPTS, QA_TYPE_PROMPTS, ONLY_ANSWER_PROMPTS
+
+API_KEY="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d"
+BASE_URL="https://vip.apiyi.com/v1"
+MODEL_DEEPSEEK_V3="deepseek-chat"
+
+def load_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data
+
+def process_response(response):
+    """Extract and parse JSON from a response."""
+    json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', response)
+    json_str = json_match.group(1) if json_match else response.strip()
+    json_str = re.sub(r'(\$[^\$]*\$)', lambda m: m.group(1).replace('\\', '\\\\'), json_str)
+    json_str = json_str.replace('\\"', '"').replace("\\'", "'")
+    return json_str
+
+def save_data(data, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+def split_complex_question(question, answer):
+    client = OpenAI(api_key = API_KEY,base_url = BASE_URL)
+    try:
+        response = client.chat.completions.create(
+            model= MODEL_DEEPSEEK_V3,
+            messages=[
+                {"role": "system", "content": "You are an expert in decomposing complex technical questions into independent sub-questions and providing corresponding complete answers with preserved context, precision, and technical terminology. "},
+                {"role": "user", "content": SINGLE_QUESTION_PROMPTS.replace("{question}",question).replace("{answer}",answer)}
+            ],
+            stream = False,
+            temperature = 0
+        )
+        result = response.choices[0].message.content.strip()
+        # print(result)
+        return 1 if "It's a single issue." in result else json.loads(process_response(result))
+    except Exception as e:
+        print(f"API调用错误: {e}")
+        return [{"question": question, "answer": answer}]
+
+def single_question_process(data):
+    single_question_data = []
+    total = len(data)
+    for i, item in enumerate(data):
+        print(f"处理第 {i+1}/{total} 条数据...")
+        question = item["question"]
+        answer = item["answer"]
+        split_data = split_complex_question(question, answer)
+
+        if isinstance(split_data, list):
+            for q_data in split_data:
+                single_question_data.append({
+                    "idx":item["idx"],
+                    "question": q_data["question"],
+                    "answer": q_data["answer"]
+                })
+        else:
+            single_question_data.append({
+                "idx":item["idx"],
+                "question": question,
+                "answer": answer
+            })
+
+        if (i+1) % 10 == 0:
+            time.sleep(2)
+    return single_question_data
+
+def classify_qa_type(question, answer):
+    client = OpenAI(api_key = API_KEY,base_url = BASE_URL)
+    try:
+        response = client.chat.completions.create(
+            model = MODEL_DEEPSEEK_V3,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": QA_TYPE_PROMPTS.replace("{question}",question).replace("{answer}",answer)}
+            ],
+            stream=False
+        )
+        result = response.choices[0].message.content.strip().lower()
+        # print(result)
+        return {"1": "Calculation", "2": "Multiple choice", "3": "True/False"}.get(result, "Other")
+    except Exception as e:
+        print(f"API调用错误: {e}")
+        return "Other"
+
+def qa_type_process(data):
+    total = len(data)
+    for i, item in enumerate(data):
+        print(f"处理第 {i+1}/{total} 条数据...")
+        question = item["question"]
+        answer = item["answer"]
+        label = classify_qa_type(question, answer)
+        item["type"] = label
+
+        if (i+1) % 10 == 0:
+            time.sleep(2)
+    return data
+
+def main():
+    input_file = "/home/ubuntu/50T/fsy/layer2/QA/code/821.json"
+    output_file = "/home/ubuntu/50T/fsy/layer2/QA/code/processed_data.json"
+    data = load_data(input_file)
+
+    # step:1
+    single_question_data = single_question_process(data)
+    # step:2
+    qa_type_data = qa_type_process(single_question_data)
+    # step:3
+    
+
+    # save_data(processed_data, output_file)
+    print(f"处理完成，结果已保存到 {output_file}")
+
+if __name__ == "__main__":
+    main()
--- a/layer2/process/step3.py
+++ b/layer2/process/step3.py
@@ -0,0 +1,89 @@
+"""
+对821道英文问题进行处理
+1. 判断是否包含多个子问题，将问题拆分为完整子问题(去掉推理过程，只保留最后结果)
+2. 判断题目类型
+3. 将题目做成选择题
+   对计算题，在数值附近随机生成三个相似答案作为错误选项
+   对简答题，与标准答案最相近的其他问题的答案作为三个错误选项
+4. 将正确和错误选项随机打乱，生成ABCD选择题的模型
+5. 添加prompt，并将选择题包裹在[ANSWER]<answer>[/ANSWER]标签中
+6. 模型打分
+"""
+import json
+import re
+import random
+import copy
+
+def generate_wrong_answers(json_file_path):
+    # 读取 JSON 文件
+    with open(json_file_path, 'r', encoding='utf-8') as file:
+        data = json.load(file)
+
+    # 处理每个数据项
+    for item in data:
+        if item['type'] == 1:  # 判断是否为计算题
+            answer = item['answer']
+            if any(char.isdigit() for char in answer):
+                wrong_answers = []
+                for _ in range(3):
+                    wrong_answers.append(generate_wrong_answer(answer))
+                item['wrong_answers_1'] = wrong_answers[0]
+                item['wrong_answers_2'] = wrong_answers[1]
+                item['wrong_answers_3'] = wrong_answers[2]
+
+    with open(json_file_path.replace('.json', '_with_wrong_answers.json'), 'w', encoding='utf-8') as file:
+        json.dump(data, file, ensure_ascii=False, indent=2)
+
+    return data
+
+def generate_wrong_answer(correct_answer):
+    # 强化版正则表达式：支持普通数、科学计数法、Unicode负号、LaTeX指数、千位逗号
+    number_pattern = (
+        r'([-+]?\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d*\.?\d+)'         # 主数字部分
+        r'(?:\s*[×x*]?\s*10(?:\^|\^{|{)?[-−⁻]?\d+(?:\})?)?'      # 科学计数部分，可选
+    )
+    matches = list(re.finditer(number_pattern, correct_answer, re.IGNORECASE))
+    if not matches:
+        return correct_answer  # 没找到数字，返回原文
+
+    wrong_answer = correct_answer
+    for match in matches[::-1]:  # 反向替换防止位置偏移
+        full_match = match.group(0)
+        base = match.group(1).replace(',', '')  # 去除逗号用于数值运算
+
+        try:
+            # 转换成 float
+            base_value = float(base)
+            perturbed_value = perturb_number(base_value)
+
+            # 保留原来的指数部分（如 x 10^6），只替换数字
+            wrong_value_str = full_match.replace(match.group(1), format_similar(base, perturbed_value))
+            start, end = match.span()
+            wrong_answer = wrong_answer[:start] + wrong_value_str + wrong_answer[end:]
+        except:
+            continue
+
+    return wrong_answer
+
+def perturb_number(value):
+    # 根据数量级添加扰动（高斯扰动 + 偏差）
+    magnitude = abs(value)
+    noise = random.uniform(0.03, 0.15)  # 扰动比例 3%~15%
+    direction = random.choice([-1, 1])
+    new_value = value + direction * magnitude * noise
+
+    # 防止扰动结果为 0 或变号
+    if abs(new_value) < 1e-10:
+        new_value = value * 1.1
+    return new_value
+
+def format_similar(original_str, value):
+    # 保留与原始字符串小数位一致
+    if '.' in original_str:
+        decimal_places = len(original_str.split('.')[-1].rstrip('^}'))  # 忽略 ^10^6 中的后缀
+        return f"{value:.{decimal_places}f}"
+    else:
+        return str(int(round(value)))
+
+if __name__ == "__main__":
+    data = generate_wrong_answers('/home/ubuntu/50T/fsy/benchmark/4is_type.json')
--- a/layer2/process/step4.py
+++ b/layer2/process/step4.py
@@ -0,0 +1,78 @@
+"""
+对821道英文问题进行处理
+1. 判断是否包含多个子问题，将问题拆分为完整子问题(去掉推理过程，只保留最后结果)
+2. 判断题目类型
+3. 将题目做成选择题
+   对计算题，在数值附近随机生成三个相似答案作为错误选项
+   对简答题，与标准答案最相近的其他问题的答案作为三个错误选项
+4. 将正确和错误选项随机打乱，生成ABCD选择题的模型
+5. 添加prompt，并将选择题包裹在[ANSWER]<answer>[/ANSWER]标签中
+6. 模型打分
+"""
+import json
+import random
+from typing import List, Dict
+
+def process_json_file(file_path: str) -> List[Dict]:
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    for item in data:
+        # 收集所有选项
+        options = [
+            item['answer'],
+            item.get('wrong_answers_1', ''),
+            item.get('wrong_answers_2', ''),
+            item.get('wrong_answers_3', '')
+        ]
+
+        # 过滤掉空选项
+        options = [opt for opt in options if opt]
+
+        # 打乱选项
+        random.shuffle(options)
+
+        # 找出正确答案的位置
+        correct_answer_index = options.index(item['answer'])
+        correct_answer_letter = chr(65 + correct_answer_index)  # A, B, C, or D
+
+        # 构建选项文本
+        options_text = ""
+        for i, option in enumerate(options):
+            letter = chr(65 + i)  # A, B, C, or D
+            options_text += f"({letter}){option}"
+            if i < len(options) - 1:
+                options_text += " "
+
+        # 更新问题和答案
+        item['question'] = f"{"The following is a question about Fundamentals of Materials Science"}{item['question']} {options_text}{"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER].\nFor example, '[ANSWER]<answer>[/ANSWER]', where <answer> is comma- or space-separated list of the correct letters. Always answer in exactly this format of comma-separated letters between the two tags, even if you are unsure. We require this because we use automatic parsing."}"
+        item['answer'] = f"[ANSWER]{correct_answer_letter}[/ANSWER]"
+
+        # 删除原始的错误选项
+        if 'wrong_answers_1' in item:
+            del item['wrong_answers_1']
+        if 'wrong_answers_2' in item:
+            del item['wrong_answers_2']
+        if 'wrong_answers_3' in item:
+            del item['wrong_answers_3']
+
+    return data
+
+def save_processed_data(data: List[Dict], output_path: str) -> None:
+    """
+    保存处理后的数据到新的JSON文件
+    """
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+# 使用示例
+if __name__ == "__main__":
+    input_file = "/home/ubuntu/50T/fsy/5_1.json"  # 替换为你的输入文件路径
+    output_file = "output.json"  # 替换为你想要的输出文件路径
+
+    try:
+        processed_data = process_json_file(input_file)
+        save_processed_data(processed_data, output_file)
+        print(f"处理完成！结果已保存到 {output_file}")
+    except Exception as e:
+        print(f"处理过程中出现错误: {e}")
--- a/layer2/process/step6.py
+++ b/layer2/process/step6.py
@@ -0,0 +1,177 @@
+"""
+对821道英文问题进行处理
+1. 判断是否包含多个子问题，将问题拆分为完整子问题(去掉推理过程，只保留最后结果)
+2. 判断题目类型
+3. 将题目做成选择题
+   对计算题，在数值附近随机生成三个相似答案作为错误选项
+   对简答题，与标准答案最相近的其他问题的答案作为三个错误选项
+4. 将正确和错误选项随机打乱，生成ABCD选择题的模型
+5. 添加prompt，并将选择题包裹在[ANSWER]<answer>[/ANSWER]标签中
+6. 模型打分
+"""
+import json
+import threading
+from tqdm import tqdm
+import concurrent.futures
+from openai import OpenAI
+import numpy as np
+from sklearn.metrics import precision_score, recall_score, f1_score
+import re
+
+client = OpenAI(
+    api_key="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d",
+    base_url="https://vip.apiyi.com/v1"
+)
+
+thread_lock = threading.Lock()
+
+def load_json_data(filepath):
+    with open(filepath, 'r') as file:
+        data = json.load(file)
+    return data
+
+def get_response(input,max_retries=10):
+    retries = 0
+    while retries<max_retries:
+        try:
+            response = client.chat.completions.create(
+                #
+                model="qwen-max-2025-01-25",
+                messages= [
+                    {"role": "system", "content": "You are an expert in the field of materials science, adept at answering questions related to fundamental aspects of materials science, including material structure, properties, processing, and applications."},
+                    {"role": "user", "content": input}
+                ],
+                temperature=0
+            )
+            answer = response.choices[0].message.content
+            return answer
+        except Exception as e:
+            print(f"Error in getting LLM response (Attempt {retries + 1}/{max_retries}): {e}")
+            retries += 1
+    
+    print(f"Failed to get response after {max_retries} attempts, returning None.")
+    return "error!"
+
+def process_item(item, index):
+    question = item['question']
+    text = item['choices']['text']
+    label = item['choices']['label']
+    prompt = item['prompt']
+    expected_answer = item['answer'].strip()
+
+    formatted_choices = " ".join([f"({label}) {text}" for label, text in zip(label, text)])
+    input = f"{question} {formatted_choices}. {prompt}"
+    
+    llm_answer = get_response(input)
+
+    return {
+        'index': index,
+        'question': question,
+        'choices': item['choices'],
+        'answer': expected_answer,
+        'llm_answer': llm_answer
+    }
+
+def extract_answer(answer_string):
+    match = re.search(r'\[ANSWER\](.*?)\[/ANSWER\]', answer_string)
+    if match:
+        return match.group(1).strip()
+    return None
+
+
+def parse_answer(answer):
+    if answer is None:
+        return []
+    return [a.strip() for a in answer.split(',')]
+
+def compute_metrics(data):
+
+    true_answers = []
+    pred_answers = []
+    
+    for item in data:
+        true_ans = extract_answer(item["answer"])
+        pred_ans = extract_answer(item["llm_answer"])
+        
+        true_answers.append(parse_answer(true_ans))
+        pred_answers.append(parse_answer(pred_ans))
+    
+    correct_counts = []
+    for true_ans, pred_ans in zip(true_answers, pred_answers):
+        if true_ans and pred_ans and set(true_ans) == set(pred_ans):
+            correct_counts.append(1)
+        else:
+            correct_counts.append(0)
+
+    accuracy = np.mean(correct_counts)
+
+    y_true_multi = []
+    y_pred_multi = []
+    all_labels = set()
+    
+    for item in data:
+        choices = item["choices"]["label"]
+        for label in choices:
+            all_labels.add(label)
+    
+    all_labels = sorted(list(all_labels))
+    
+    for true_ans, pred_ans in zip(true_answers, pred_answers):
+        true_vector = [1 if label in true_ans else 0 for label in all_labels]
+        pred_vector = [1 if label in pred_ans else 0 for label in all_labels]
+        y_true_multi.append(true_vector)
+        y_pred_multi.append(pred_vector)
+    
+    y_true_multi = np.array(y_true_multi)
+    y_pred_multi = np.array(y_pred_multi)
+
+    precision_micro = precision_score(y_true_multi, y_pred_multi, average='micro', zero_division=0)
+    recall_micro = recall_score(y_true_multi, y_pred_multi, average='micro', zero_division=0)
+    f1_micro = f1_score(y_true_multi, y_pred_multi, average='micro', zero_division=0)
+    
+    precision_macro = precision_score(y_true_multi, y_pred_multi, average='macro', zero_division=0)
+    recall_macro = recall_score(y_true_multi, y_pred_multi, average='macro', zero_division=0)
+    f1_macro = f1_score(y_true_multi, y_pred_multi, average='macro', zero_division=0)
+    
+    return {
+        "accuracy": accuracy,
+        "precision_micro": precision_micro,
+        "recall_micro": recall_micro,
+        "f1_micro": f1_micro,
+        "precision_macro": precision_macro,
+        "recall_macro": recall_macro,
+        "f1_macro": f1_macro
+    }
+
+def calculate_accuracy_multithreaded(data, max_workers=5):
+    results = []
+
+    with tqdm(total=len(data), desc="Processing items") as pbar:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+
+            future_to_index = {executor.submit(process_item, item, i): i for i, item in enumerate(data)}
+
+            for future in concurrent.futures.as_completed(future_to_index):
+                result = future.result()
+                results.append(result)
+                pbar.update(1)
+
+    results.sort(key=lambda x: x['index'])
+
+    metric = compute_metrics(results)
+
+    return metric, results
+
+def main():
+    filepath = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL-merge/merged.json'
+    data = load_json_data(filepath)
+    max_workers = 8
+
+    metric, results = calculate_accuracy_multithreaded(data,max_workers)
+    print(f"Accuracy of qwen-max-2025-01-25: {metric}")
+
+    with open('qwen-max-2025-01-25.json', 'w') as f:
+        json.dump(results, f, indent=2)
+
+if __name__ == "__main__":
+    main()
--- a/layer2/rubbish/.nfs00000000052c8394000001bd
+++ b/layer2/rubbish/.nfs00000000052c8394000001bd
@@ -0,0 +1,898 @@
+处理第 1/832 条数据...
+[
+    {
+        "question": "How does a temperature indicator made from a coiled metal strip work when the temperature increases?",
+        "answer": "Bimetallic materials are produced by bonding two materials having different coefficients of thermal expansion to one another, forming a laminar composite. When the temperature changes, one of the materials will expand or contract more than the other material. This difference in expansion or contraction causes the bimetallic material to change shape; if the original shape is that of a coil, then the device will coil or uncoil, depending on the direction of the temperature change."
+    },
+    {
+        "question": "From what kind of material would the temperature indicator be made?",
+        "answer": "The temperature indicator is made from bimetallic materials, which consist of two materials with different coefficients of thermal expansion bonded together."
+    },
+    {
+        "question": "What are the important properties that the material in the temperature indicator must possess?",
+        "answer": "The two materials must have very different coefficients of thermal expansion and should have high enough modulus of elasticity so that no permanent deformation of the material occurs."
+    }
+]
+处理第 2/832 条数据...
+[
+    {
+        "question": "What properties should the head of a carpenter's hammer possess?",
+        "answer": "The striking face and claws of the hammer should be hard-the metal should not dent or deform when driving or removing nails. Yet these portions must also possess some impact resistance, particularly so that chips do not flake off the striking face and cause injuries."
+    },
+    {
+        "question": "How would you manufacture a hammer head?",
+        "answer": "The head for a carpenter's hammer is produced by forging, a metalworking process; a simple steel shape is heated and formed in several steps while hot into the required shape. The head is then heat treated to produce the required mechanical and physical properties."
+    }
+]
+处理第 3/832 条数据...
+It's a single issue.
+处理第 4/832 条数据...
+[
+    {
+        "question": "Using the densities and atomic weights, calculate the number of atoms per cubic centimeter in lead.",
+        "answer": "3.3 × 10^22 atoms/cm^3"
+    },
+    {
+        "question": "Using the densities and atomic weights, calculate the number of atoms per cubic centimeter in lithium.",
+        "answer": "4.63 × 10^22 atoms/cm^3"
+    }
+]
+处理第 5/832 条数据...
+It's a single issue.
+处理第 6/832 条数据...
+It's a single issue.
+处理第 7/832 条数据...
+```json
+[
+    {
+        "question": "In order to plate a steel part having a surface area of 200 in.² with a 0.002 in. thick layer of nickel, how many atoms of nickel are required?",
+        "answer": "5.98 × 10²³ atoms"
+    },
+    {
+        "question": "In order to plate a steel part having a surface area of 200 in.² with a 0.002 in. thick layer of nickel, how many moles of nickel are required?",
+        "answer": "0.994 mol ni required"
+    }
+]
+```
+处理第 8/832 条数据...
+It's a single issue.
+处理第 9/832 条数据...
+It's a single issue.
+处理第 10/832 条数据...
+It's a single issue.
+处理第 11/832 条数据...
+[
+    {
+        "question": "Calculate the atomic radius in cm for a BCC metal with a0=0.3294 nm and one atom per lattice point.",
+        "answer": "1.426 x 10^-8 cm"
+    },
+    {
+        "question": "Calculate the atomic radius in cm for an FCC metal with a0=4.0862 A and one atom per lattice point.",
+        "answer": "1.4447 x 10^-8 cm"
+    }
+]
+处理第 12/832 条数据...
+```json
+[
+    {
+        "question": "Determine the crystal structure for a metal with a0=4.9489 Å, r=1.75 Å and one atom per lattice point.",
+        "answer": "fcc."
+    },
+    {
+        "question": "Determine the crystal structure for a metal with a0=0.42906 nm, r=0.1858 nm and one atom per lattice point.",
+        "answer": "bcc."
+    }
+]
+```
+处理第 13/832 条数据...
+```json
+[
+    {
+        "question": "The density of potassium, which has the BCC structure and one atom per lattice point, is 0.855 g/cm³. The atomic weight of potassium is 39.09 g/mol. Calculate the lattice parameter.",
+        "answer": "5.3355 × 10⁻⁸ cm"
+    },
+    {
+        "question": "The density of potassium, which has the BCC structure and one atom per lattice point, is 0.855 g/cm³. The atomic weight of potassium is 39.09 g/mol. Calculate the atomic radius of potassium.",
+        "answer": "2.3103 × 10⁻⁸ cm"
+    }
+]
+```
+处理第 14/832 条数据...
+[
+    {
+        "question": "The density of thorium, which has the FCC structure and one atom per lattice point, is 11.72 g/cm³. The atomic weight of thorium is 232 g/mol. Calculate the lattice parameter.",
+        "answer": "5.0856 × 10⁻⁸ cm"
+    },
+    {
+        "question": "The density of thorium, which has the FCC structure and one atom per lattice point, is 11.72 g/cm³. The atomic weight of thorium is 232 g/mol. Calculate the atomic radius of thorium.",
+        "answer": "1.7980 × 10⁻⁸ cm"
+    }
+]
+处理第 15/832 条数据...
+It's a single issue.
+处理第 16/832 条数据...
+It's a single issue.
+处理第 17/832 条数据...
+It's a single issue.
+处理第 18/832 条数据...
+```json
+[
+    {
+        "question": "Bismuth has a hexagonal structure, with a0=0.4546 nm and c0=1.186 nm. The density is 9.808 g/cm3 and the atomic weight is 208.98 g/mol. Determine the volume of the unit cell.",
+        "answer": "0.21226 nm3 or 2.1226 x 10^-22 cm3"
+    },
+    {
+        "question": "Bismuth has a hexagonal structure, with a0=0.4546 nm and c0=1.186 nm. The density is 9.808 g/cm3 and the atomic weight is 208.98 g/mol. Determine the number of atoms in each unit cell.",
+        "answer": "6 atoms/cell"
+    }
+]
+```
+处理第 19/832 条数据...
+[
+    {
+        "question": "Gallium has an orthorhombic structure, with a0=0.45258 nm, b0=0.45186 nm, and c0=0.76570 nm. The atomic radius is 0.1218 nm. The density is 5.904 g/cm3 and the atomic weight is 69.72 g/mol. Determine the number of atoms in each unit cell.",
+        "answer": "8 atoms/cell."
+    },
+    {
+        "question": "Gallium has an orthorhombic structure, with a0=0.45258 nm, b0=0.45186 nm, and c0=0.76570 nm. The atomic radius is 0.1218 nm. The density is 5.904 g/cm3 and the atomic weight is 69.72 g/mol. Determine the packing factor in the unit cell.",
+        "answer": "0.387."
+    }
+]
+处理第 20/832 条数据...
+```json
+[
+    {
+        "question": "Beryllium has a hexagonal crystal structure, with a0=0.22858 nm and c0=0.35842 nm. The atomic radius is 0.1143 nm, the density is 1.848 g/cm3, and the atomic weight is 9.01 g/mol. Determine the number of atoms in each unit cell.",
+        "answer": "2 atoms/cell."
+    },
+    {
+        "question": "Beryllium has a hexagonal crystal structure, with a0=0.22858 nm and c0=0.35842 nm. The atomic radius is 0.1143 nm, the density is 1.848 g/cm3, and the atomic weight is 9.01 g/mol. Determine the packing factor in the unit cell.",
+        "answer": "0.77"
+    }
+]
+```
+处理第 21/832 条数据...
+```json
+[
+    {
+        "question": "A typical paper clip weighs 0.59 g and consists of BCC iron. Calculate the number of unit cells in the paper clip.",
+        "answer": "3.185 x 10^21 cells"
+    },
+    {
+        "question": "A typical paper clip weighs 0.59 g and consists of BCC iron. Calculate the number of iron atoms in the paper clip.",
+        "answer": "6.37 x 10^21 atoms"
+    }
+]
+```
+处理第 22/832 条数据...
+```json
+[
+    {
+        "question": "Determine the planar density for BCC lithium in the (100) plane.",
+        "answer": "planar density = 0.0812 x 10^16 points/cm^2"
+    },
+    {
+        "question": "Determine the packing fraction for BCC lithium in the (100) plane.",
+        "answer": "packing fraction = 0.589"
+    },
+    {
+        "question": "Determine the planar density for BCC lithium in the (110) plane.",
+        "answer": "planar density = 0.1149 x 10^16 points/cm^2"
+    },
+    {
+        "question": "Determine the packing fraction for BCC lithium in the (110) plane.",
+        "answer": "packing fraction = 0.833"
+    },
+    {
+        "question": "Determine the planar density for BCC lithium in the (111) plane.",
+        "answer": "planar density = 0.0469 x 10^16 points/cm^2"
+    },
+    {
+        "question": "Determine the packing fraction for BCC lithium in the (111) plane.",
+        "answer": "packing fraction = 1/2 / 0.866 a0^2 sqrt(3) a0 / 4"
+    },
+    {
+        "question": "Which, if any, of these planes is close packed in BCC lithium?",
+        "answer": "there is no close-packed plane in bcc structures."
+    }
+]
+```
+处理第 23/832 条数据...
+It's a single issue.
+处理第 24/832 条数据...
+It's a single issue.
+处理第 25/832 条数据...
+```json
+[
+    {
+        "question": "Determine the minimum radius of an atom that will just fit into the tetrahedral interstitial site in FCC nickel.",
+        "answer": "0.2797 \aa"
+    },
+    {
+        "question": "Determine the minimum radius of an atom that will just fit into the octahedral interstitial site in BCC lithium.",
+        "answer": "0.629 \aa"
+    }
+]
+```
+API调用错误: Invalid \escape: line 4 column 27 (char 170)
+处理第 26/832 条数据...
+It's a single issue.
+处理第 27/832 条数据...
+It's a single issue.
+处理第 28/832 条数据...
+It's a single issue.
+处理第 29/832 条数据...
+It's a single issue.
+处理第 30/832 条数据...
+It's a single issue.
+处理第 31/832 条数据...
+It's a single issue.
+处理第 32/832 条数据...
+It's a single issue.
+处理第 33/832 条数据...
+It's a single issue.
+处理第 34/832 条数据...
+It's a single issue.
+处理第 35/832 条数据...
+It's a single issue.
+处理第 36/832 条数据...
+It's a single issue.
+处理第 37/832 条数据...
+It's a single issue.
+处理第 38/832 条数据...
+It's a single issue.
+处理第 39/832 条数据...
+It's a single issue.
+处理第 40/832 条数据...
+It's a single issue.
+处理第 41/832 条数据...
+It's a single issue.
+处理第 42/832 条数据...
+It's a single issue.
+处理第 43/832 条数据...
+[
+    {
+        "question": "Would you expect BeO to have the sodium chloride, zinc blende, or fluorite structure?",
+        "answer": "BeO has the zinc blende structure."
+    },
+    {
+        "question": "Based on the zinc blende structure of BeO, determine the lattice parameter.",
+        "answer": "The lattice parameter is 3.8567 Å."
+    },
+    {
+        "question": "Based on the zinc blende structure of BeO, determine the density.",
+        "answer": "The density is 2.897 g/cm³."
+    },
+    {
+        "question": "Based on the zinc blende structure of BeO, determine the packing factor.",
+        "answer": "The packing factor is 0.684."
+    }
+]
+处理第 44/832 条数据...
+```json
+[
+    {
+        "question": "Would you expect CsBr to have the sodium chloride, zinc blende, fluorite, or cesium chloride structure?",
+        "answer": "CsBr is expected to have the cesium chloride structure."
+    },
+    {
+        "question": "Based on the cesium chloride structure, determine the lattice parameter for CsBr.",
+        "answer": "The lattice parameter a0 = 4.1916 Å."
+    },
+    {
+        "question": "Based on the cesium chloride structure, determine the density for CsBr.",
+        "answer": "The density ρ = 4.8 g/cm³."
+    },
+    {
+        "question": "Based on the cesium chloride structure, determine the packing factor for CsBr.",
+        "answer": "The packing factor pf = 0.693."
+    }
+]
+```
+处理第 45/832 条数据...
+[
+    {
+        "question": "Sketch the ion arrangement on the (110) plane of ZnS (with the zinc blende structure).",
+        "answer": ""
+    },
+    {
+        "question": "Sketch the ion arrangement on the (110) plane of CaF2 (with the fluorite structure).",
+        "answer": ""
+    },
+    {
+        "question": "Compare the ion arrangement on the (110) plane of ZnS (with the zinc blende structure) to that on the (110) plane of CaF2 (with the fluorite structure).",
+        "answer": ""
+    },
+    {
+        "question": "Calculate the planar packing fraction (ppf) on the (110) plane for ZnS (with the zinc blende structure).",
+        "answer": "for zns on the (110) plane, the planar packing fraction (ppf) is 0.492."
+    },
+    {
+        "question": "Calculate the planar packing fraction (ppf) on the (110) plane for CaF2 (with the fluorite structure).",
+        "answer": "for caf2 on the (110) plane, the planar packing fraction (ppf) is 0.699."
+    },
+    {
+        "question": "Compare the planar packing fraction on the (110) planes for ZnS (with the zinc blende structure) and CaF2 (with the fluorite structure).",
+        "answer": ""
+    }
+]
+处理第 46/832 条数据...
+[
+    {
+        "question": "MgO, which has the sodium chloride structure, has a lattice parameter of 0.396 nm. Determine the planar density and the planar packing fraction for the (111) plane of MgO. What ions are present on this plane?",
+        "answer": "(111) plane:\n- planar density (p.d.): 0.1473 x 10^16 points/cm^2\n- planar packing fraction (ppf): 0.202\n- ions present: mg^2+ and o^2-"
+    },
+    {
+        "question": "MgO, which has the sodium chloride structure, has a lattice parameter of 0.396 nm. Determine the planar density and the planar packing fraction for the (222) plane of MgO. What ions are present on this plane?",
+        "answer": "(222) plane:\n- planar density (p.d.): 0.1473 x 10^16 points/cm^2\n- planar packing fraction (ppf): 0.806\n- ions present: mg^2+ and o^2-"
+    }
+]
+处理第 47/832 条数据...
+It's a single issue.
+处理第 48/832 条数据...
+It's a single issue.
+处理第 49/832 条数据...
+It's a single issue.
+处理第 50/832 条数据...
+```json
+[
+    {
+        "question": "The density of a sample of HCP beryllium is 1.844 g/cm^3 and the lattice parameters are a_0=0.22858 nm and c_0=0.35842 nm. Calculate the fraction of the lattice points that contain vacancies.",
+        "answer": "0.0008"
+    },
+    {
+        "question": "The density of a sample of HCP beryllium is 1.844 g/cm^3 and the lattice parameters are a_0=0.22858 nm and c_0=0.35842 nm. Calculate the total number of vacancies in a cubic centimeter.",
+        "answer": "0.986 x 10^20 vacancies/cm^3"
+    }
+]
+```
+处理第 51/832 条数据...
+[
+    {
+        "question": "BCC lithium has a lattice parameter of 3.5089 × 10^-8 cm and contains one vacancy per 200 unit cells. Calculate the number of vacancies per cubic centimeter.",
+        "answer": "1.157 × 10^20 vacancies/cm³"
+    },
+    {
+        "question": "BCC lithium has a lattice parameter of 3.5089 × 10^-8 cm and contains one vacancy per 200 unit cells. Calculate the density of Li.",
+        "answer": "0.532 g/cm³"
+    }
+]
+处理第 52/832 条数据...
+[
+    {
+        "question": "FCC lead has a lattice parameter of 0.4949 nm and contains one vacancy per 500 Pb atoms. Calculate the density.",
+        "answer": "the density is 11.335 g/cm3."
+    },
+    {
+        "question": "FCC lead has a lattice parameter of 0.4949 nm and contains one vacancy per 500 Pb atoms. Calculate the number of vacancies per gram of Pb.",
+        "answer": "the number of vacancies per gram of pb is 5.82 × 10^18 vacancies/g."
+    }
+]
+处理第 53/832 条数据...
+It's a single issue.
+处理第 54/832 条数据...
+It's a single issue.
+处理第 55/832 条数据...
+It's a single issue.
+处理第 56/832 条数据...
+[
+    {
+        "question": "Suppose we introduce one carbon atom for every 100 iron atoms in an interstitial position in BCC iron, giving a lattice parameter of 0.2867 nm. For the Fe-C alloy, find the density.",
+        "answer": "7.89 g/cm3"
+    },
+    {
+        "question": "Suppose we introduce one carbon atom for every 100 iron atoms in an interstitial position in BCC iron, giving a lattice parameter of 0.2867 nm. For the Fe-C alloy, find the packing factor.",
+        "answer": "0.681"
+    }
+]
+处理第 57/832 条数据...
+```json
+[
+    {
+        "question": "The density of BCC iron is 7.882 g/cm³ and the lattice parameter is 0.2886 nm when hydrogen atoms are introduced at interstitial positions. Calculate the atomic fraction of hydrogen atoms.",
+        "answer": "0.004"
+    },
+    {
+        "question": "The density of BCC iron is 7.882 g/cm³ and the lattice parameter is 0.2886 nm when hydrogen atoms are introduced at interstitial positions. Calculate the number of unit cells required on average that contain hydrogen atoms.",
+        "answer": "123.5"
+    }
+]
+```
+处理第 58/832 条数据...
+[
+    {
+        "question": "Suppose one Schottky defect is present in every tenth unit cell of MgO. MgO has the sodium chloride crystal structure and a lattice parameter of 0.396 nm. Calculate the number of anion vacancies per cm3.",
+        "answer": "the number of anion vacancies per cm3 is 1.61 × 10^21 vacancies/cm^3."
+    },
+    {
+        "question": "Suppose one Schottky defect is present in every tenth unit cell of MgO. MgO has the sodium chloride crystal structure and a lattice parameter of 0.396 nm. Calculate the density of the ceramic.",
+        "answer": "the density of the ceramic is 4.205 g/cm^3."
+    }
+]
+处理第 59/832 条数据...
+[
+    {
+        "question": "ZnS has the zinc blende structure. If the density is 3.02 g/cm³ and the lattice parameter is 0.59583 nm, determine the number of Schottky defects per unit cell.",
+        "answer": "0.0535 defects per unit cell."
+    },
+    {
+        "question": "ZnS has the zinc blende structure. If the density is 3.02 g/cm³ and the lattice parameter is 0.59583 nm, determine the number of Schottky defects per cubic centimeter.",
+        "answer": "2.517 × 10²⁰ defects per cm³."
+    }
+]
+处理第 60/832 条数据...
+[
+    {
+        "question": "Calculate the length of the Burgers vector in BCC niobium.",
+        "answer": "b = 2.853 \u00c5"
+    },
+    {
+        "question": "Calculate the length of the Burgers vector in FCC silver.",
+        "answer": "b = 2.889 \u00c5"
+    },
+    {
+        "question": "Calculate the length of the Burgers vector in diamond cubic silicon.",
+        "answer": "b = 3.840 \u00c5"
+    }
+]
+处理第 61/832 条数据...
+[
+    {
+        "question": "A single crystal of an FCC metal is oriented so that the direction is parallel to an applied stress of 5000 psi. Calculate the resolved shear stress acting on the (111) slip plane in the [T slip direction.",
+        "answer": "the resolved shear stress acting on the (111) slip plane in the [T slip direction is 0."
+    },
+    {
+        "question": "A single crystal of an FCC metal is oriented so that the direction is parallel to an applied stress of 5000 psi. Calculate the resolved shear stress acting on the (111) slip plane in the [T] slip direction.",
+        "answer": "the resolved shear stress acting on the (111) slip plane in the [T] slip direction is 2040 psi (active)."
+    },
+    {
+        "question": "A single crystal of an FCC metal is oriented so that the direction is parallel to an applied stress of 5000 psi. Which slip system(s) will become active first?",
+        "answer": "the slip systems that will become active first are λ011 and λ101."
+    }
+]
+处理第 62/832 条数据...
+```json
+[
+    {
+        "question": "A single crystal of a BCC metal is oriented so that the direction is parallel to the applied stress. If the critical resolved shear stress required for slip is 12,000 psi, calculate the magnitude of the applied stress required to cause slip to begin in the [1,-1,1] direction on the (110) slip plane.",
+        "answer": "the magnitude of the applied stress required to cause slip to begin in the [1,-1,1] direction is: sigma = infinity"
+    },
+    {
+        "question": "A single crystal of a BCC metal is oriented so that the direction is parallel to the applied stress. If the critical resolved shear stress required for slip is 12,000 psi, calculate the magnitude of the applied stress required to cause slip to begin in the [1,-1,1] direction on the (011) slip plane.",
+        "answer": "the magnitude of the applied stress required to cause slip to begin in the [1,-1,1] direction is: sigma = 29,412 psi"
+    },
+    {
+        "question": "A single crystal of a BCC metal is oriented so that the direction is parallel to the applied stress. If the critical resolved shear stress required for slip is 12,000 psi, calculate the magnitude of the applied stress required to cause slip to begin in the [1,-1,1] direction on the (101) slip plane.",
+        "answer": "the magnitude of the applied stress required to cause slip to begin in the [1,-1,1] direction is: sigma = 29,412 psi"
+    }
+]
+```
+处理第 63/832 条数据...
+```json
+[
+    {
+        "question": "The strength of titanium is found to be 65,000 psi when the grain size is 17 x 10^-6 m and 82,000 psi when the grain size is 0.8 x 10^-6 m. Determine the constants in the Hall-Petch equation.",
+        "answer": "the constants in the hall-petch equation are k = 19.4 psi/sqrt(d) and sigma_o = 60,290 psi."
+    },
+    {
+        "question": "The strength of titanium is found to be 65,000 psi when the grain size is 17 x 10^-6 m and 82,000 psi when the grain size is 0.8 x 10^-6 m. Determine the strength of the titanium when the grain size is reduced to 0.2 x 10^-6 m.",
+        "answer": "the strength of the titanium when the grain size is reduced to 0.2 x 10^-6 m is 103,670 psi."
+    }
+]
+```
+处理第 64/832 条数据...
+```json
+[
+    {
+        "question": "For an ASTM grain size number of 8, calculate the number of grains per square inch at a magnification of 100.",
+        "answer": "128 grains/in.^2"
+    },
+    {
+        "question": "For an ASTM grain size number of 8, calculate the number of grains per square inch with no magnification.",
+        "answer": "1.28 × 10^6 grains/in.^2"
+    }
+]
+```
+处理第 65/832 条数据...
+It's a single issue.
+处理第 66/832 条数据...
+It's a single issue.
+处理第 67/832 条数据...
+It's a single issue.
+处理第 68/832 条数据...
+It's a single issue.
+处理第 69/832 条数据...
+It's a single issue.
+处理第 70/832 条数据...
+[
+    {
+        "question": "The diffusion coefficient for Cr+3 in Cr2O3 is 6x10-15 cm2/s at 727C and is 1x10-9 cm2/s at 1400C. Calculate (a) the activation energy.",
+        "answer": "The activation energy q is 59,230 cal/mol."
+    },
+    {
+        "question": "The diffusion coefficient for Cr+3 in Cr2O3 is 6x10-15 cm2/s at 727C and is 1x10-9 cm2/s at 1400C. Calculate (b) the constant D0.",
+        "answer": "The constant D0 is 0.055 cm2/s."
+    }
+]
+处理第 71/832 条数据...
+[
+    {
+        "question": "A 0.2-mm thick wafer of silicon is treated so that a uniform concentration gradient of antimony is produced. One surface contains 1 Sb atom per 10^8 Si atoms and the other surface contains 500 Sb atoms per 10^8 Si atoms. The lattice parameter for Si is 5.407 A (Appendix A). Calculate the concentration gradient in (a) atomic percent Sb per cm.",
+        "answer": "-0.02495 at% sb/cm"
+    },
+    {
+        "question": "A 0.2-mm thick wafer of silicon is treated so that a uniform concentration gradient of antimony is produced. One surface contains 1 Sb atom per 10^8 Si atoms and the other surface contains 500 Sb atoms per 10^8 Si atoms. The lattice parameter for Si is 5.407 A (Appendix A). Calculate the concentration gradient in (b) Sb atoms/cm^3.cm.",
+        "answer": "-1.246 x 10^19 sb atoms/cm^3.cm"
+    }
+]
+处理第 72/832 条数据...
+[
+    {
+        "question": "When a Cu-Zn alloy solidifies, one portion of the structure contains 25 atomic percent zinc and another portion 0.025 mm away contains 20 atomic percent zinc. The lattice parameter for the FCC alloy is 3.63 x 10^-8 cm. Determine the concentration gradient in (a) atomic percent Zn per cm.",
+        "answer": "-2000 at% zn/cm"
+    },
+    {
+        "question": "When a Cu-Zn alloy solidifies, one portion of the structure contains 25 atomic percent zinc and another portion 0.025 mm away contains 20 atomic percent zinc. The lattice parameter for the FCC alloy is 3.63 x 10^-8 cm. Determine the concentration gradient in (b) weight percent Zn per cm.",
+        "answer": "-2032 wt% zn/cm"
+    }
+]
+处理第 73/832 条数据...
+[
+    {
+        "question": "A 0.001 -in. BCC iron foil is used to separate a high hydrogen gas from a low hydrogen gas at 650^{\circ} C. 5 x 10^8 H atoms/cm^3 are in equilibrium with the hot side of the foil, while 2 x 10^3 H atoms/cm^3 are in equilibrium with the cold side. Determine the concentration gradient of hydrogen.",
+        "answer": "-1969 x 10^8 h atoms/cm^3.cm"
+    },
+    {
+        "question": "A 0.001 -in. BCC iron foil is used to separate a high hydrogen gas from a low hydrogen gas at 650^{\circ} C. 5 x 10^8 H atoms/cm^3 are in equilibrium with the hot side of the foil, while 2 x 10^3 H atoms/cm^3 are in equilibrium with the cold side. Determine the flux of hydrogen through the foil.",
+        "answer": "0.33 x 10^8 h atoms/cm^2.s"
+    }
+]
+API调用错误: Invalid \escape: line 3 column 121 (char 128)
+处理第 74/832 条数据...
+It's a single issue.
+处理第 75/832 条数据...
+It's a single issue.
+处理第 76/832 条数据...
+It's a single issue.
+处理第 77/832 条数据...
+It's a single issue.
+处理第 78/832 条数据...
+It's a single issue.
+处理第 79/832 条数据...
+It's a single issue.
+处理第 80/832 条数据...
+[
+    {
+        "question": "Iron containing 0.05 % C is heated to 912 degrees C in an atmosphere that produces 1.20 % C at the surface and is held for 24 h. Calculate the carbon content at 0.05 cm beneath the surface if the iron is BCC.",
+        "answer": "for bcc iron, the carbon content at 0.05 cm beneath the surface is 0.95 % c."
+    },
+    {
+        "question": "Iron containing 0.05 % C is heated to 912 degrees C in an atmosphere that produces 1.20 % C at the surface and is held for 24 h. Calculate the carbon content at 0.05 cm beneath the surface if the iron is FCC.",
+        "answer": "for fcc iron, the carbon content at 0.05 cm beneath the surface is 0.95 % c."
+    },
+    {
+        "question": "Explain the difference in carbon content between BCC and FCC iron under the given conditions.",
+        "answer": "faster diffusion occurs in the looser packed bcc structure, leading to the higher carbon content at point x."
+    }
+]
+处理第 81/832 条数据...
+It's a single issue.
+处理第 82/832 条数据...
+It's a single issue.
+处理第 83/832 条数据...
+It's a single issue.
+处理第 84/832 条数据...
+It's a single issue.
+处理第 85/832 条数据...
+It's a single issue.
+处理第 86/832 条数据...
+It's a single issue.
+处理第 87/832 条数据...
+It's a single issue.
+处理第 88/832 条数据...
+It's a single issue.
+处理第 89/832 条数据...
+It's a single issue.
+处理第 90/832 条数据...
+[
+    {
+        "question": "A ceramic part made of MgO is sintered successfully at 1700 degrees C in 90 minutes. To minimize thermal stresses during the process, we plan to reduce the temperature to 1500 degrees C. Which will limit the rate at which sintering can be done: diffusion of magnesium ions or diffusion of oxygen ions?",
+        "answer": "diffusion of oxygen is the slower of the two, due to the larger ionic radius of the oxygen."
+    },
+    {
+        "question": "A ceramic part made of MgO is sintered successfully at 1700 degrees C in 90 minutes. To minimize thermal stresses during the process, we plan to reduce the temperature to 1500 degrees C. What time will be required at the lower temperature?",
+        "answer": "the time required at the lower temperature is 955 min or 15.9 h."
+    }
+]
+处理第 91/832 条数据...
+[
+    {
+        "question": "A 850-lb force is applied to a 0.15-in. diameter nickel wire having a yield strength of 45,000 psi and a tensile strength of 55,000 psi. Determine whether the wire will plastically deform.",
+        "answer": "the wire will plastically deform (48,100 psi)."
+    },
+    {
+        "question": "A 850-lb force is applied to a 0.15-in. diameter nickel wire having a yield strength of 45,000 psi and a tensile strength of 55,000 psi. Determine whether the wire will experience necking.",
+        "answer": "no necking will occur (48,100 psi)."
+    }
+]
+处理第 92/832 条数据...
+[
+    {
+        "question": "A force of 100000 N is applied to a 10 mm x 20 mm iron bar having a yield strength of 400 MPa. Determine whether the bar will plastically deform.",
+        "answer": "the bar will plastically deform because the applied stress of 500 MPa exceeds the yield strength of 400 MPa."
+    },
+    {
+        "question": "A force of 100000 N is applied to a 10 mm x 20 mm iron bar having a tensile strength of 480 MPa. Determine whether the bar will experience necking.",
+        "answer": "the bar will experience necking because the applied stress of 500 MPa exceeds the tensile strength of 480 MPa."
+    }
+]
+处理第 93/832 条数据...
+It's a single issue.
+处理第 94/832 条数据...
+[
+    {
+        "question": "A force of 20,000 N will cause a 1 cm × 1 cm bar of magnesium to stretch from 10 cm to 10.045 cm. Calculate the modulus of elasticity in GPa.",
+        "answer": "the modulus of elasticity is 44.4 GPa."
+    },
+    {
+        "question": "A force of 20,000 N will cause a 1 cm × 1 cm bar of magnesium to stretch from 10 cm to 10.045 cm. Calculate the modulus of elasticity in psi.",
+        "answer": "the modulus of elasticity is 6.44 × 10^6 psi."
+    }
+]
+处理第 95/832 条数据...
+It's a single issue.
+处理第 96/832 条数据...
+It's a single issue.
+处理第 97/832 条数据...
+It's a single issue.
+处理第 98/832 条数据...
+[
+    {
+        "question": "A 0.4-in. diameter, 12-in-long titanium bar has a yield strength of 50,000 psi, a modulus of elasticity of 16x10^6 psi, and Poisson's ratio of 0.30. Determine the length of the bar when a 500-lb load is applied.",
+        "answer": "the final length of the bar is 12.00298 in."
+    },
+    {
+        "question": "A 0.4-in. diameter, 12-in-long titanium bar has a yield strength of 50,000 psi, a modulus of elasticity of 16x10^6 psi, and Poisson's ratio of 0.30. Determine the diameter of the bar when a 500-lb load is applied.",
+        "answer": "the final diameter of the bar is 0.39997 in."
+    }
+]
+处理第 99/832 条数据...
+```json
+[
+    {
+        "question": "A three-point bend test is performed on a block of ZrO2 that is 8 in. long, 0.50 in. wide, and 0.25 in. thick and is resting on two supports 4 in. apart. When a force of 400 lb is applied, the specimen deflects 0.037 in. and breaks. Calculate the flexural strength.",
+        "answer": "flexural strength = 76,800 psi"
+    },
+    {
+        "question": "A three-point bend test is performed on a block of ZrO2 that is 8 in. long, 0.50 in. wide, and 0.25 in. thick and is resting on two supports 4 in. apart. When a force of 400 lb is applied, the specimen deflects 0.037 in. and breaks. Calculate the flexural modulus, assuming that no plastic deformation occurs.",
+        "answer": "flexural modulus = 22.14 × 10^6 psi"
+    }
+]
+```
+处理第 100/832 条数据...
+```json
+[
+    {
+        "question": "A three-point bend test is performed on a block of silicon carbide that is 10 cm long, 1.5 cm wide, and 0.6 cm thick and is resting on two supports 7.5 cm apart. The sample breaks when a deflection of 0.09 mm is recorded. Calculate the force that caused the fracture. The flexural modulus for silicon carbide is 480 GPa. Assume that no plastic deformation occurs.",
+        "answer": "the force that caused the fracture is 1327 n."
+    },
+    {
+        "question": "A three-point bend test is performed on a block of silicon carbide that is 10 cm long, 1.5 cm wide, and 0.6 cm thick and is resting on two supports 7.5 cm apart. The sample breaks when a deflection of 0.09 mm is recorded. Calculate the flexural strength. The flexural modulus for silicon carbide is 480 GPa. Assume that no plastic deformation occurs.",
+        "answer": "the flexural strength is 276 mpa."
+    }
+]
+```
+处理第 101/832 条数据...
+[
+    {
+        "question": "(a) A thermosetting polymer containing glass beads is required to deflect 0.5 mm when a force of 500 N is applied. The polymer part is 2 cm wide, 0.5 cm thick, and 10 cm long. If the flexural modulus is 6.9 GPa, determine the minimum distance between the supports.",
+        "answer": "the minimum distance between the supports is 41 mm."
+    },
+    {
+        "question": "(a) Will the polymer fracture if its flexural strength is 85 MPa? Assume that no plastic deformation occurs.",
+        "answer": "the applied stress is 61.5 MPa, which is less than the flexural strength of 85 MPa; the polymer is not expected to fracture."
+    }
+]
+处理第 102/832 条数据...
+It's a single issue.
+处理第 103/832 条数据...
+It's a single issue.
+处理第 104/832 条数据...
+It's a single issue.
+处理第 105/832 条数据...
+It's a single issue.
+处理第 106/832 条数据...
+It's a single issue.
+处理第 107/832 条数据...
+It's a single issue.
+处理第 108/832 条数据...
+It's a single issue.
+处理第 109/832 条数据...
+[
+    {
+        "question": "To survive for one million cycles under conditions that provide for equal compressive and tensile stresses, what is the fatigue strength, or maximum stress amplitude, required?",
+        "answer": "the fatigue strength at one million cycles is 22 mpa."
+    },
+    {
+        "question": "What are the maximum stress, the minimum stress, and the mean stress on the part during its use under conditions that provide for equal compressive and tensile stresses?",
+        "answer": "the maximum stress is +22 mpa, the minimum stress is -22 mpa, and the mean stress is 0 mpa."
+    },
+    {
+        "question": "What effect would the frequency of the stress application have on the fatigue strength, maximum stress, minimum stress, and mean stress on the part during its use?",
+        "answer": "a high frequency will cause heating of the polymer. as the temperature of the polymer increases, the fatigue strength will decrease. if the applied stress is not reduced, then the polymer will fail in a shorter time."
+    }
+]
+处理第 110/832 条数据...
+It's a single issue.
+处理第 111/832 条数据...
+It's a single issue.
+处理第 112/832 条数据...
+[
+    {
+        "question": "Using the data in Figure 7-27 for an iron-chromium-nickel alloy, determine the activation energy \(Q_{r}\) for rupture in the temperature range 980 to \(1090^{\circ} \mathrm{C}\).",
+        "answer": "the activation energy \(q_{r}\) is \(117,000 \text{ cal/mol}\)."
+    },
+    {
+        "question": "Using the data in Figure 7-27 for an iron-chromium-nickel alloy, determine the constant \(m\) for rupture in the temperature range 980 to \(1090^{\circ} \mathrm{C}\).",
+        "answer": "the constant \(m\) for rupture is \(3.9\)."
+    }
+]
+API调用错误: Invalid \escape: line 3 column 119 (char 126)
+处理第 113/832 条数据...
+It's a single issue.
+处理第 114/832 条数据...
+It's a single issue.
+处理第 115/832 条数据...
+[
+    {
+        "question": "A 2-in.-diameter copper rod is reduced to 1.5 in. diameter, then reduced again to a final diameter of 1 in. Calculate the % CW for this case.",
+        "answer": "75%"
+    },
+    {
+        "question": "A 2-in.-diameter copper rod is reduced in one step from 2 in. to a 1 in. diameter. Calculate the % CW for this case.",
+        "answer": "75%"
+    }
+]
+处理第 116/832 条数据...
+[
+    {
+        "question": "Suppose that liquid nickel is undercooled until homogeneous nucleation occurs. Calculate the critical radius of the nucleus required. Assume that the lattice parameter of the solid FCC nickel is 0.356 nm.",
+        "answer": "the critical radius of the nucleus required is 6.65 x 10^-8 cm."
+    },
+    {
+        "question": "Suppose that liquid nickel is undercooled until homogeneous nucleation occurs. Calculate the number of nickel atoms in the nucleus. Assume that the lattice parameter of the solid FCC nickel is 0.356 nm.",
+        "answer": "the number of nickel atoms in the nucleus is 109 atoms."
+    }
+]
+处理第 117/832 条数据...
+[
+    {
+        "question": "Suppose that liquid iron is undercooled until homogeneous nucleation occurs. Calculate the critical radius of the nucleus required. Assume that the lattice parameter of the solid BCC iron is 2.92 Å.",
+        "answer": "the critical radius of the nucleus required is 10.128 × 10^(-8) cm."
+    },
+    {
+        "question": "Suppose that liquid iron is undercooled until homogeneous nucleation occurs. Calculate the number of iron atoms in the nucleus. Assume that the lattice parameter of the solid BCC iron is 2.92 Å.",
+        "answer": "the number of iron atoms in the nucleus is 350 atoms."
+    }
+]
+处理第 118/832 条数据...
+It's a single issue.
+处理第 119/832 条数据...
+It's a single issue.
+处理第 120/832 条数据...
+```json
+[
+    {
+        "question": "Calculate the fraction of solidification that occurs dendritically when iron nucleates at 10 degrees Celsius undercooling. The specific heat of iron is 5.78 J/cm3·°C.",
+        "answer": "0.0333"
+    },
+    {
+        "question": "Calculate the fraction of solidification that occurs dendritically when iron nucleates at 100 degrees Celsius undercooling. The specific heat of iron is 5.78 J/cm3·°C.",
+        "answer": "0.333"
+    },
+    {
+        "question": "Calculate the fraction of solidification that occurs dendritically when iron nucleates homogeneously. The specific heat of iron is 5.78 J/cm3·°C.",
+        "answer": "all dendritically."
+    }
+]
+```
+处理第 121/832 条数据...
+[
+    {
+        "question": "Calculate the fraction of solidification that occurs dendritically when silver nucleates at 10 degrees Celsius undercooling. The specific heat of silver is 3.25 J/cm^3·°C.",
+        "answer": "0.0237"
+    },
+    {
+        "question": "Calculate the fraction of solidification that occurs dendritically when silver nucleates at 100 degrees Celsius undercooling. The specific heat of silver is 3.25 J/cm^3·°C.",
+        "answer": "0.337"
+    },
+    {
+        "question": "Calculate the fraction of solidification that occurs dendritically when silver nucleates homogeneously. The specific heat of silver is 3.25 J/cm^3·°C.",
+        "answer": "0.842"
+    }
+]
+处理第 122/832 条数据...
+[
+    {
+        "question": "A 2-in. cube solidifies in 4.6 min. Calculate (a) the mold constant in Chvorinov's rule. Assume that n=2.",
+        "answer": "the mold constant in chvorinov's rule is 41.48 min/in^2."
+    },
+    {
+        "question": "A 0.5 in. x 0.5 in. x 6 in. bar cast under the same conditions. Calculate (b) the solidification time for the bar. Assume that n=2.",
+        "answer": "the solidification time for the bar is 0.60 min."
+    }
+]
+处理第 123/832 条数据...
+It's a single issue.
+处理第 124/832 条数据...
+[
+    {
+        "question": "Calculate the diameter of the cylindrical riser required to prevent shrinkage in a 4 in. × 10 in. × 20 in. casting if the H / D of the riser is 1.5.",
+        "answer": "the diameter of the riser d ≥ 6.67 in."
+    },
+    {
+        "question": "Calculate the height of the cylindrical riser required to prevent shrinkage in a 4 in. × 10 in. × 20 in. casting if the H / D of the riser is 1.5.",
+        "answer": "the height of the riser h ≥ 10 in."
+    },
+    {
+        "question": "Calculate the volume of the cylindrical riser required to prevent shrinkage in a 4 in. × 10 in. × 20 in. casting if the H / D of the riser is 1.5.",
+        "answer": "the volume of the riser v ≥ 349 in.^3"
+    }
+]
+处理第 125/832 条数据...
+[
+    {
+        "question": "Calculate the diameter of the cylindrical riser required to prevent shrinkage in a 1 in. x 6 in. x 6 in. casting if the H/D of the riser is 1.0.",
+        "answer": "the diameter of the riser must be at least 2.25 in."
+    },
+    {
+        "question": "Calculate the height of the cylindrical riser required to prevent shrinkage in a 1 in. x 6 in. x 6 in. casting if the H/D of the riser is 1.0.",
+        "answer": "the height of the riser must be at least 2.25 in."
+    },
+    {
+        "question": "Calculate the volume of the cylindrical riser required to prevent shrinkage in a 1 in. x 6 in. x 6 in. casting if the H/D of the riser is 1.0.",
+        "answer": "the volume of the riser must be at least 8.95 in^3."
+    }
+]
+处理第 126/832 条数据...
+[
+    {
+        "question": "A 4-in-diameter sphere of liquid copper is allowed to solidify, producing a spherical shrinkage cavity in the center of the casting. What is the shrinkage volume and diameter of the shrinkage cavity in the copper casting?",
+        "answer": "for copper:\n- shrinkage volume: 1.709 in.^{3}\n- diameter of shrinkage cavity: 1.30 in."
+    },
+    {
+        "question": "A 4-in-diameter sphere of liquid iron is allowed to solidify, producing a spherical shrinkage cavity in the center of the casting. What is the shrinkage volume and diameter of the shrinkage cavity in the iron casting?",
+        "answer": "for iron:\n- shrinkage volume: 1.139 in.^{3}\n- diameter of shrinkage cavity: 1.30 in."
+    }
+]
+处理第 127/832 条数据...
+It's a single issue.
+处理第 128/832 条数据...
+[
+    {
+        "question": "A 2 cm × 4 cm × 6 cm magnesium casting is produced. After cooling to room temperature, what is the volume of the shrinkage cavity at the center of the casting?",
+        "answer": "the volume of the shrinkage cavity at the center of the casting is 46.03 cm³."
+    },
+    {
+        "question": "A 2 cm × 4 cm × 6 cm magnesium casting is produced. After cooling to room temperature, what is the percent shrinkage that must have occurred during solidification?",
+        "answer": "the percent shrinkage that must have occurred during solidification is 4.1%."
+    }
+]
+处理第 129/832 条数据...
+[
+    {
+        "question": "A 2 in. x 8 in. x 10 in. iron casting is produced and, after cooling to room temperature, is found to weigh 43.9 lb. Determine the percent shrinkage that must have occurred during solidification.",
+        "answer": "The percent shrinkage that must have occurred during solidification is 3.4%."
+    },
+    {
+        "question": "A 2 in. x 8 in. x 10 in. iron casting is produced and, after cooling to room temperature, is found to weigh 43.9 lb. Determine the number of shrinkage pores in the casting if all of the shrinkage occurs as pores with a diameter of 0.05 in.",
+        "answer": "The number of shrinkage pores in the casting is 83,354 pores."
+    }
+]
+处理第 130/832 条数据...
+It's a single issue.
+处理第 131/832 条数据...
+It's a single issue.
+处理第 132/832 条数据...
+It's a single issue.
+处理第 133/832 条数据...
--- a/layer2/rubbish/0Splitting.py
+++ b/layer2/rubbish/0Splitting.py
@@ -0,0 +1,42 @@
+#将文本拆分成问题和答案两部分
+
+import json
+
+input_file_path = '/home/ubuntu/50T/fsy/benchmark/dataset-ours/[Solution]qa_segment_all.json'  # 修改为文件的实际路径
+with open(input_file_path, 'r', encoding='utf-8') as infile:
+    data = json.load(infile)
+
+# 遍历并处理数据
+processed_data = []
+for item in data:
+    segment = item.get("segment", "")
+    if "Solution" in segment:
+        question, answer = segment.split("Solution", 1)  # 按照 "Solution" 切分
+        question = question.strip()
+        answer = answer.strip()
+        processed_data.append({
+            "idx": item.get("idx"),
+            "question": question,
+            "answer": answer,
+        })
+    elif "Answer" in segment:
+        question, answer = segment.split("Answer", 1)  # 按照 "Solution" 切分
+        question = question.strip() 
+        answer = answer.strip() 
+        processed_data.append({
+            "idx": item.get("idx"),
+            "question": question,
+            "answer": answer,
+        })
+    else:
+        processed_data.append({
+            "idx": item.get("idx"),
+            "question": "000",
+            "answer": "000",
+        })
+
+output_file_path = '[Solution]qa_segment.json'  # 输出文件路径
+with open(output_file_path, 'w', encoding='utf-8') as outfile:
+    json.dump(processed_data, outfile, ensure_ascii=False, indent=4)
+
+output_file_path
--- a/layer2/rubbish/1.py
+++ b/layer2/rubbish/1.py
@@ -0,0 +1,91 @@
+import json
+import time
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d",
+    base_url="https://vip.apiyi.com/v1"
+)
+
+def load_qa_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data
+
+# 判断是否为计算题
+def classify_qa_type(question, answer):
+    prompt = f"""
+                Please analyze the following question and its answer, and classify the question type into one of the following four categories:
+
+                1. Calculation: A question that requires mathematical operations to derive the result.
+                2. Multiple choice: A question that provides multiple options (e.g., A/B/C/D) for the respondent to choose from.
+                3. True/False: A question that only requires answering true/false, yes/no, or correct/incorrect.
+                3. Other: A question that does not fall under the above three categories.
+
+                Question:
+                {question}
+                Answer:
+                {answer}
+
+                Please respond with the corresponding numeric code directly (without any explanation):
+                2. For Calculation, respond: 1
+                2. For Multiple choice, respond: 2
+                3. For True/False, respond: 3
+                4. For Other, respond: 4
+                """
+
+    try:
+        response = client.chat.completions.create(
+            model="deepseek-chat",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": prompt}
+            ],
+            stream=False
+        )
+        result = response.choices[0].message.content.strip().lower()
+        print(result)
+        if "1" in result:
+            return 1
+        elif "2" in result:
+            return 2
+        elif "3" in result:
+            return 3
+        else:
+            return 4
+    except Exception as e:
+        print(f"API调用错误: {e}")
+        # 如果API调用失败，默认为非简答题
+        return 0
+
+# 处理整个数据集并添加标签
+def process_dataset(data):
+    total = len(data)
+    for i, item in enumerate(data):
+        print(f"处理第 {i+1}/{total} 条数据...")
+        question = item["question"]
+        answer = item["answer"]
+        label = classify_qa_type(question, answer)
+
+        item["type"] = label
+
+        if (i+1) % 10 == 0:
+            time.sleep(2)
+
+    return data
+
+# 保存处理后的数据
+def save_processed_data(data, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+def main():
+    input_file = "/home/ubuntu/50T/fsy/benchmark/3single_select.json"
+    output_file = "4is_type.json"
+    data = load_qa_data(input_file)
+    processed_data = process_dataset(data)
+    save_processed_data(processed_data, output_file)
+    print(f"处理完成，结果已保存到 {output_file}")
+
+if __name__ == "__main__":
+    main()
--- a/layer2/rubbish/1700ckj_single_select.log
+++ b/layer2/rubbish/1700ckj_single_select.log
--- a/layer2/rubbish/1is_select.json
+++ b/layer2/rubbish/1is_select.json
--- a/layer2/rubbish/2.log
+++ b/layer2/rubbish/2.log
--- a/layer2/rubbish/2.py
+++ b/layer2/rubbish/2.py
@@ -0,0 +1,91 @@
+import json
+import time
+from openai import OpenAI
+
+
+client = OpenAI(
+    api_key="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d",
+    base_url="https://vip.apiyi.com/v1"
+)
+
+def load_qa_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data
+
+# 处理计算题，仅保留计算结果。
+def classify_qa_type(question, answer):
+    prompt = f"""
+                Process the given `question` and `answer` data, retaining the question and its corresponding answer while removing the calculation steps.
+
+                Question:
+                {question}
+
+                Original Answer:
+                {answer}
+
+                Requirements:
+                1. In the answer section, keep only the final result and its corresponding unit, removing any calculation steps.
+                2. If the answer involves multiple parts, use clear paragraph breaks or numbering to distinguish them.
+
+                Note:
+                - If the original answer contains LaTeX formulas (e.g., `\\(6.02 \times 10^{23}\\)`), preserve the formula format but remove irrelevant derivation symbols (e.g., `\mathrm`).
+
+                - Output only the processed answer content.
+                """
+
+    try:
+        response = client.chat.completions.create(
+            model="deepseek-chat",  # DeepSeek-v3模型
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": prompt}
+            ],
+            stream=False
+        )
+        result = response.choices[0].message.content.strip().lower()
+        return result
+        # if "1" in result:
+        #     print("1")
+        #     return 1
+        # else:
+        #     print("0")
+        #     return 0
+    except Exception as e:
+        print(f"API调用错误: {e}")
+        # 如果API调用失败，默认为非简答题
+        return 0
+
+# 处理整个数据集并添加标签
+def process_dataset(data):
+    total = len(data)
+    for i, item in enumerate(data):
+        print(f"处理第 {i+1}/{total} 条数据...")
+        question = item["question"]
+        answer = item["answer"]
+        sel = item["is_select"]
+        if sel == 1 :
+            a1 = classify_qa_type(question, answer)
+            print(a1)
+            item["answer"] = a1
+
+        if (i+1) % 10 == 0:
+            time.sleep(2)
+
+    return data
+
+# 保存处理后的数据
+def save_processed_data(data, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+def main():
+    input_file = "/home/ubuntu/50T/fsy/benchmark/is_select.json"
+    output_file = "only_answer.json"
+    data = load_qa_data(input_file)
+    processed_data = process_dataset(data)
+    save_processed_data(processed_data, output_file)
+    print(f"处理完成，结果已保存到 {output_file}")
+
+if __name__ == "__main__":
+    main()
--- a/layer2/rubbish/2only_answer.json
+++ b/layer2/rubbish/2only_answer.json
--- a/layer2/rubbish/3single_select.json
+++ b/layer2/rubbish/3single_select.json
--- a/layer2/rubbish/4_1.py
+++ b/layer2/rubbish/4_1.py
@@ -0,0 +1,36 @@
+import json
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+
+def process_json(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    type_4_items = [item for item in data if item.get("type") == 4]
+
+    answers = [item["answer"] for item in type_4_items]
+
+    # 使用TF-IDF表示答案
+    vectorizer = TfidfVectorizer()
+    tfidf_matrix = vectorizer.fit_transform(answers)
+
+    # 计算每个答案的余弦相似度
+    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
+
+    for i, item in enumerate(type_4_items):
+        # 获取当前答案的相似度列表，跳过自身（对角线）
+        similarities = cosine_sim[i]
+        similarities[i] = -1  # 避免自身
+
+        # 获取与当前答案最相近的三个答案索引
+        most_similar_indices = similarities.argsort()[-3:][::-1]
+
+        # 存储错误答案
+        item["wrong_answers_1"] = type_4_items[most_similar_indices[0]]["answer"]
+        item["wrong_answers_2"] = type_4_items[most_similar_indices[1]]["answer"]
+        item["wrong_answers_3"] = type_4_items[most_similar_indices[2]]["answer"]
+
+    with open('5_type4.json', 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+
+process_json('/home/ubuntu/50T/fsy/benchmark/4is_type_with_wrong_answers.json')
--- a/layer2/rubbish/4is_type.json
+++ b/layer2/rubbish/4is_type.json
--- a/layer2/rubbish/4is_type_with_wrong_answers.json
+++ b/layer2/rubbish/4is_type_with_wrong_answers.json
--- a/layer2/rubbish/5_type4.json
+++ b/layer2/rubbish/5_type4.json
--- a/layer2/rubbish/821_single_select_includes_process.json
+++ b/layer2/rubbish/821_single_select_includes_process.json
--- a/layer2/rubbish/821_single_select_includes_process.log
+++ b/layer2/rubbish/821_single_select_includes_process.log
--- a/layer2/rubbish/single_select.log
+++ b/layer2/rubbish/single_select.log
--- a/layer2/rubbish/single_select.py
+++ b/layer2/rubbish/single_select.py
@@ -0,0 +1,129 @@
+import json
+import time
+from openai import OpenAI
+import re
+client = OpenAI(
+    api_key="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d",
+    base_url="https://vip.apiyi.com/v1"
+)
+
+def load_qa_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data
+
+# 对问题进行拆分
+def split_complex_question(question, answer):
+    prompt = f"""
+                Follow these instructions strictly to perform question decomposition:
+                Input requirements:
+                - Question text: {question}
+                - Answer text: {answer}
+                Output rules:
+                1. Single issue determination criteria:
+                - Question contains only one clear technical inquiry point
+                - Answer content cannot be divided into independent parts
+                → Return: "It's a single issue."
+                2. Compound question decomposition criteria (must satisfy all):
+                a) Question contains multiple technically independent sub-questions
+                b) Answer contains independent solution paragraphs corresponding to sub-questions
+                c) Each sub-question's answer does not depend on context from other sub-questions
+                3. Decomposition format standards:
+                [
+                {{
+                    "question": "[Complete sub-question 1] (including necessary shared parameters)",
+                    "answer": "[Corresponding complete answer]"
+                }},
+                {{
+                    "question": "[Complete sub-question 2] (including necessary shared parameters)",
+                    "answer": "[Corresponding complete answer]"
+                }},
+                ......
+                ]
+                Key control points:
+                1. Context integrity:
+                - Each sub-question must include shared parameters from the original question
+                2. Answer integrity:
+                - Preserve final calculation results
+                - Maintain original units and precision (e.g., 6.02×10²³ cannot be simplified to 6.02e23)
+
+                3. Format prohibitions:
+                - No explanatory text additions
+                - No modifications to original technical terminology
+                - Return data must not use Markdown and Latex formats (like \times, \mathrm)
+                - Use scientific notation for data representation
+                """
+
+    try:
+        response = client.chat.completions.create(
+            model="deepseek-chat",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": prompt}
+            ],
+            stream=False
+        )
+        result = response.choices[0].message.content.strip()
+        print(result)
+        if "It's a single issue." in result:
+            return 1
+        else:
+            return json.loads(process_response(result))
+    except Exception as e:
+        print(f"API调用错误: {e}")
+        # 如果API调用失败，返回原问题
+        return [{"question": question, "answer": answer}]
+
+def process_response(response_text):
+    if response_text.strip().startswith("```json") and response_text.strip().endswith("```"):
+        json_text = response_text.strip()[7:-3].strip()
+        return json_text
+    else:
+        return response_text
+
+def process_dataset(data):
+    processed_data = []
+    idx = 1
+    total = len(data)
+
+    for i, item in enumerate(data):
+        print(f"处理第 {i+1}/{total} 条数据...")
+        question = item["question"]
+        answer = item["answer"]
+        split_data = split_complex_question(question, answer)
+
+        if isinstance(split_data, list):
+            for q_data in split_data:
+                processed_data.append({
+                    "idx": idx,
+                    "question": q_data["question"],
+                    "answer": q_data["answer"]
+                })
+                idx += 1
+        else:  # 如果API返回了意外的数据格式作为后备处理
+            processed_data.append({
+                "idx": idx,
+                "question": question,
+                "answer": answer
+            })
+            idx += 1
+
+        if (i+1) % 10 == 0:
+            time.sleep(2)
+
+    return processed_data
+
+def save_processed_data(data, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+def main():
+    input_file = "/home/ubuntu/50T/fsy/benchmark/only_answer.json"
+    output_file = "single_select.json"
+    data = load_qa_data(input_file)
+    processed_data = process_dataset(data)
+    save_processed_data(processed_data, output_file)
+    print(f"处理完成，结果已保存到 {output_file}")
+
+if __name__ == "__main__":
+    main()