second commit
This commit is contained in:
45
layer1/ALL/QASC-process.py
Normal file
45
layer1/ALL/QASC-process.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import json
|
||||
|
||||
# 读取 JSON 文件
|
||||
def transform_choices(old_choices):
|
||||
# 提取所有 "text" 值
|
||||
text_list = [choice["text"] for choice in old_choices]
|
||||
# 提取所有 "label" 值
|
||||
label_list = [choice["label"] for choice in old_choices]
|
||||
|
||||
return {
|
||||
"text": text_list,
|
||||
"label": label_list
|
||||
}
|
||||
|
||||
def transform_json(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
new_json=[]
|
||||
|
||||
for item in data:
|
||||
question = item["question"]["stem"]
|
||||
choices = item["question"]["choices"]
|
||||
answer_index = item["answerKey"]
|
||||
|
||||
new_choices =transform_choices(choices)
|
||||
|
||||
# 构造新的 JSON 数据
|
||||
transformed_data = {
|
||||
"question": question,
|
||||
"choices":new_choices,
|
||||
"answer": f"[ANSWER]{answer_index}[/ANSWER]",
|
||||
"prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER]<answer>[/ANSWER]'. We require this because we use automatic parsing."
|
||||
|
||||
}
|
||||
new_json.append(transformed_data)
|
||||
|
||||
return new_json
|
||||
|
||||
input_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/QASC-dev-mat.json'
|
||||
output_path = '/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/10-QASC.json'
|
||||
transformed_data = transform_json(input_path)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(transformed_data, f, ensure_ascii= False, indent=2)
|
||||
Reference in New Issue
Block a user