import json import time from openai import OpenAI client = OpenAI( api_key="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d", base_url="https://vip.apiyi.com/v1" ) def load_qa_data(file_path): with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) return data # 判断是否为计算题 def classify_qa_type(question, answer): prompt = f""" Please analyze the following question and its answer, and classify the question type into one of the following four categories: 1. Calculation: A question that requires mathematical operations to derive the result. 2. Multiple choice: A question that provides multiple options (e.g., A/B/C/D) for the respondent to choose from. 3. True/False: A question that only requires answering true/false, yes/no, or correct/incorrect. 3. Other: A question that does not fall under the above three categories. Question: {question} Answer: {answer} Please respond with the corresponding numeric code directly (without any explanation): 2. For Calculation, respond: 1 2. For Multiple choice, respond: 2 3. For True/False, respond: 3 4. For Other, respond: 4 """ try: response = client.chat.completions.create( model="deepseek-chat", messages=[ {"role": "system", "content": "You are a helpful assistant"}, {"role": "user", "content": prompt} ], stream=False ) result = response.choices[0].message.content.strip().lower() print(result) if "1" in result: return 1 elif "2" in result: return 2 elif "3" in result: return 3 else: return 4 except Exception as e: print(f"API调用错误: {e}") # 如果API调用失败,默认为非简答题 return 0 # 处理整个数据集并添加标签 def process_dataset(data): total = len(data) for i, item in enumerate(data): print(f"处理第 {i+1}/{total} 条数据...") question = item["question"] answer = item["answer"] label = classify_qa_type(question, answer) item["type"] = label if (i+1) % 10 == 0: time.sleep(2) return data # 保存处理后的数据 def save_processed_data(data, output_file): with open(output_file, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) def main(): input_file = "/home/ubuntu/50T/fsy/benchmark/3single_select.json" output_file = "4is_type.json" data = load_qa_data(input_file) processed_data = process_dataset(data) save_processed_data(processed_data, output_file) print(f"处理完成,结果已保存到 {output_file}") if __name__ == "__main__": main()