import json # 读取 JSON 文件 def transform_choices(old_choices): # 提取所有 "text" 值 text_list = [choice["text"] for choice in old_choices] # 提取所有 "label" 值 label_list = [choice["label"] for choice in old_choices] return { "text": text_list, "label": label_list } def transform_json(file_path): with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) new_json=[] for item in data: question = item["question"]["stem"] choices = item["question"]["choices"] answer_index = item["answerKey"] new_choices =transform_choices(choices) # 构造新的 JSON 数据 transformed_data = { "question": question, "choices":new_choices, "answer": f"[ANSWER]{answer_index}[/ANSWER]", "prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER][/ANSWER]'. We require this because we use automatic parsing." } new_json.append(transformed_data) return new_json input_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/QASC-dev-mat.json' output_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/10-QASC.json' transformed_data = transform_json(input_path) with open(output_path, 'w', encoding='utf-8') as f: json.dump(transformed_data, f, ensure_ascii= False, indent=2)