53 lines
1.8 KiB
Python
53 lines
1.8 KiB
Python
import json
|
|
import random
|
|
|
|
def process_json_file(input_file, output_file):
|
|
new_json=[]
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
for item in data:
|
|
choices={}
|
|
options = [
|
|
item['correct_answer'],
|
|
item['distractor1'],
|
|
item['distractor2'],
|
|
item['distractor3']
|
|
]
|
|
# 打乱选项顺序
|
|
random.shuffle(options)
|
|
|
|
# 找出正确答案的位置
|
|
correct_index = options.index(item['correct_answer'])
|
|
correct_letter = chr(65 + correct_index) # 65是ASCII码中'A'的值
|
|
|
|
# 拼接选项到问题中
|
|
labels = []
|
|
for i, option in enumerate(options):
|
|
letter = chr(65 + i) # A, B, C, D
|
|
labels.append(letter)
|
|
|
|
choices['text']=options
|
|
choices['label'] =labels
|
|
|
|
transformed_data = {
|
|
"question": item["question"],
|
|
"choices":choices,
|
|
"answer": f"[ANSWER]{correct_letter}[/ANSWER]",
|
|
"prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER]<answer>[/ANSWER]'. We require this because we use automatic parsing."
|
|
|
|
}
|
|
new_json.append(transformed_data)
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(new_json, f, indent=2, ensure_ascii=False)
|
|
|
|
return new_json
|
|
|
|
# 示例使用
|
|
if __name__ == "__main__":
|
|
|
|
input_file = "/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/sciq-val-mat.json"
|
|
output_file = "/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/15-sciq-val.json"
|
|
|
|
processed_data = process_json_file(input_file, output_file) |