import json import random def process_json_file(input_file, output_file): new_json=[] with open(input_file, 'r', encoding='utf-8') as f: data = json.load(f) for item in data: choices={} options = [ item['correct_answer'], item['distractor1'], item['distractor2'], item['distractor3'] ] # 打乱选项顺序 random.shuffle(options) # 找出正确答案的位置 correct_index = options.index(item['correct_answer']) correct_letter = chr(65 + correct_index) # 65是ASCII码中'A'的值 # 拼接选项到问题中 labels = [] for i, option in enumerate(options): letter = chr(65 + i) # A, B, C, D labels.append(letter) choices['text']=options choices['label'] =labels transformed_data = { "question": item["question"], "choices":choices, "answer": f"[ANSWER]{correct_letter}[/ANSWER]", "prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER][/ANSWER]'. We require this because we use automatic parsing." } new_json.append(transformed_data) with open(output_file, 'w', encoding='utf-8') as f: json.dump(new_json, f, indent=2, ensure_ascii=False) return new_json # 示例使用 if __name__ == "__main__": input_file = "/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/sciq-val-mat.json" output_file = "/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/15-sciq-val.json" processed_data = process_json_file(input_file, output_file)