39 lines
1.5 KiB
Python
39 lines
1.5 KiB
Python
import json
|
|
|
|
# 读取 JSON 文件
|
|
def convert_label_to_letter(label):
|
|
mapping = {"1": "A", "2": "B", "3": "C", "4": "D"}
|
|
return mapping.get(label, label)
|
|
|
|
def transform_json(file_path):
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
new_json=[]
|
|
|
|
for item in data:
|
|
question = item["question"]
|
|
choices = item["choices"]
|
|
answer_index = item["answerKey"]
|
|
|
|
choices["label"] = [convert_label_to_letter(label) for label in choices["label"]]
|
|
transformed_answer = convert_label_to_letter(answer_index)
|
|
|
|
# 构造新的 JSON 数据
|
|
transformed_data = {
|
|
"question": question,
|
|
"choices":choices,
|
|
"answer": f"[ANSWER]{transformed_answer}[/ANSWER]",
|
|
"prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER]<answer>[/ANSWER]'. We require this because we use automatic parsing."
|
|
|
|
}
|
|
new_json.append(transformed_data)
|
|
|
|
return new_json
|
|
|
|
input_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/ARC-validation-mat.json'
|
|
output_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/4-ARC-validation.json'
|
|
transformed_data = transform_json(input_path)
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(transformed_data, f, ensure_ascii= False, indent=2) |