import json def generate_labels(choice_count): # 根据选项数量生成 A-Z return [chr(ord('A') + i) for i in range(choice_count)] # 将数字答案转换为字母答案 def convert_answer_to_letter(answer): return chr(ord('A') + answer) def transform_json(file_path): with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) new_json=[] for i,item in data.items(): question = item["question"] choices = item["choices"] answer_index = item["answer"] new_choices = { "text": choices, "label": generate_labels(len(choices)) } transformed_answer = convert_answer_to_letter(answer_index) # 构造新的 JSON 数据 transformed_data = { "question": question, "choices":new_choices, "answer": f"[ANSWER]{transformed_answer}[/ANSWER]", "prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER][/ANSWER]'. We require this because we use automatic parsing." } new_json.append(transformed_data) return new_json input_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/ScienceQA-mat-noimage.json' output_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/11-ScienceQA.json' transformed_data = transform_json(input_path) with open(output_path, 'w', encoding='utf-8') as f: json.dump(transformed_data, f, ensure_ascii= False, indent=2)