second commit

This commit is contained in:
lzy
2025-05-28 10:55:34 +08:00
parent 0f80316f8b
commit ef9355f2f5
73 changed files with 485583 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
import json
def generate_labels(choice_count):
# 根据选项数量生成 A-Z
return [chr(ord('A') + i) for i in range(choice_count)]
# 将数字答案转换为字母答案
def convert_answer_to_letter(answer):
return chr(ord('A') + answer)
def transform_json(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
new_json=[]
for i,item in data.items():
question = item["question"]
choices = item["choices"]
answer_index = item["answer"]
new_choices = {
"text": choices,
"label": generate_labels(len(choices))
}
transformed_answer = convert_answer_to_letter(answer_index)
# 构造新的 JSON 数据
transformed_data = {
"question": question,
"choices":new_choices,
"answer": f"[ANSWER]{transformed_answer}[/ANSWER]",
"prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER]<answer>[/ANSWER]'. We require this because we use automatic parsing."
}
new_json.append(transformed_data)
return new_json
input_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/ScienceQA-mat-noimage.json'
output_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/11-ScienceQA.json'
transformed_data = transform_json(input_path)
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(transformed_data, f, ensure_ascii= False, indent=2)