layer2 commit

2025-05-28 11:00:24 +08:00
parent 6a6b09ae20
commit 9f5318c23d
66 changed files with 286574 additions and 0 deletions
--- a/layer2/rubbish/1.py
+++ b/layer2/rubbish/1.py
@@ -0,0 +1,91 @@
+import json
+import time
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d",
+    base_url="https://vip.apiyi.com/v1"
+)
+
+def load_qa_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data
+
+# 判断是否为计算题
+def classify_qa_type(question, answer):
+    prompt = f"""
+                Please analyze the following question and its answer, and classify the question type into one of the following four categories:
+
+                1. Calculation: A question that requires mathematical operations to derive the result.
+                2. Multiple choice: A question that provides multiple options (e.g., A/B/C/D) for the respondent to choose from.
+                3. True/False: A question that only requires answering true/false, yes/no, or correct/incorrect.
+                3. Other: A question that does not fall under the above three categories.
+
+                Question:
+                {question}
+                Answer:
+                {answer}
+
+                Please respond with the corresponding numeric code directly (without any explanation):
+                2. For Calculation, respond: 1
+                2. For Multiple choice, respond: 2
+                3. For True/False, respond: 3
+                4. For Other, respond: 4
+                """
+
+    try:
+        response = client.chat.completions.create(
+            model="deepseek-chat",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": prompt}
+            ],
+            stream=False
+        )
+        result = response.choices[0].message.content.strip().lower()
+        print(result)
+        if "1" in result:
+            return 1
+        elif "2" in result:
+            return 2
+        elif "3" in result:
+            return 3
+        else:
+            return 4
+    except Exception as e:
+        print(f"API调用错误: {e}")
+        # 如果API调用失败，默认为非简答题
+        return 0
+
+# 处理整个数据集并添加标签
+def process_dataset(data):
+    total = len(data)
+    for i, item in enumerate(data):
+        print(f"处理第 {i+1}/{total} 条数据...")
+        question = item["question"]
+        answer = item["answer"]
+        label = classify_qa_type(question, answer)
+
+        item["type"] = label
+
+        if (i+1) % 10 == 0:
+            time.sleep(2)
+
+    return data
+
+# 保存处理后的数据
+def save_processed_data(data, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+def main():
+    input_file = "/home/ubuntu/50T/fsy/benchmark/3single_select.json"
+    output_file = "4is_type.json"
+    data = load_qa_data(input_file)
+    processed_data = process_dataset(data)
+    save_processed_data(processed_data, output_file)
+    print(f"处理完成，结果已保存到 {output_file}")
+
+if __name__ == "__main__":
+    main()