import json # 读取 JSON 文件 def convert_label_to_letter(label): mapping = {"1": "A", "2": "B", "3": "C", "4": "D"} return mapping.get(label, label) def transform_json(file_path): with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) new_json=[] for item in data: question = item["question_stem"] choices = item["choices"] answer_index = item["answerKey"] choices["label"] = [convert_label_to_letter(label) for label in choices["label"]] transformed_answer = convert_label_to_letter(answer_index) # 构造新的 JSON 数据 transformed_data = { "question": question, "choices":choices, "answer": f"[ANSWER]{transformed_answer}[/ANSWER]", "prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER][/ANSWER]'. We require this because we use automatic parsing." } new_json.append(transformed_data) return new_json input_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/openbookqa-test-mat.json' output_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/9-openbookqa.json' transformed_data = transform_json(input_path) with open(output_path, 'w', encoding='utf-8') as f: json.dump(transformed_data, f, ensure_ascii= False, indent=2)