""" 对821道英文问题进行处理 1. 判断是否包含多个子问题,将问题拆分为完整子问题(去掉推理过程,只保留最后结果) 2. 判断题目类型 3. 将题目做成选择题 对计算题,在数值附近随机生成三个相似答案作为错误选项 对简答题,与标准答案最相近的其他问题的答案作为三个错误选项 4. 将正确和错误选项随机打乱,生成ABCD选择题的模型 5. 添加prompt,并将选择题包裹在[ANSWER][/ANSWER]标签中 6. 模型打分 """ import json import random from typing import List, Dict def process_json_file(file_path: str) -> List[Dict]: with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) for item in data: # 收集所有选项 options = [ item['answer'], item.get('wrong_answers_1', ''), item.get('wrong_answers_2', ''), item.get('wrong_answers_3', '') ] # 过滤掉空选项 options = [opt for opt in options if opt] # 打乱选项 random.shuffle(options) # 找出正确答案的位置 correct_answer_index = options.index(item['answer']) correct_answer_letter = chr(65 + correct_answer_index) # A, B, C, or D # 构建选项文本 options_text = "" for i, option in enumerate(options): letter = chr(65 + i) # A, B, C, or D options_text += f"({letter}){option}" if i < len(options) - 1: options_text += " " # 更新问题和答案 item['question'] = f"{"The following is a question about Fundamentals of Materials Science"}{item['question']} {options_text}{"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER].\nFor example, '[ANSWER][/ANSWER]', where is comma- or space-separated list of the correct letters. Always answer in exactly this format of comma-separated letters between the two tags, even if you are unsure. We require this because we use automatic parsing."}" item['answer'] = f"[ANSWER]{correct_answer_letter}[/ANSWER]" # 删除原始的错误选项 if 'wrong_answers_1' in item: del item['wrong_answers_1'] if 'wrong_answers_2' in item: del item['wrong_answers_2'] if 'wrong_answers_3' in item: del item['wrong_answers_3'] return data def save_processed_data(data: List[Dict], output_path: str) -> None: """ 保存处理后的数据到新的JSON文件 """ with open(output_path, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) # 使用示例 if __name__ == "__main__": input_file = "/home/ubuntu/50T/fsy/5_1.json" # 替换为你的输入文件路径 output_file = "output.json" # 替换为你想要的输出文件路径 try: processed_data = process_json_file(input_file) save_processed_data(processed_data, output_file) print(f"处理完成!结果已保存到 {output_file}") except Exception as e: print(f"处理过程中出现错误: {e}")