second commit
This commit is contained in:
53
layer1/ALL/sciq-process.py
Normal file
53
layer1/ALL/sciq-process.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import json
|
||||
import random
|
||||
|
||||
def process_json_file(input_file, output_file):
|
||||
new_json=[]
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
for item in data:
|
||||
choices={}
|
||||
options = [
|
||||
item['correct_answer'],
|
||||
item['distractor1'],
|
||||
item['distractor2'],
|
||||
item['distractor3']
|
||||
]
|
||||
# 打乱选项顺序
|
||||
random.shuffle(options)
|
||||
|
||||
# 找出正确答案的位置
|
||||
correct_index = options.index(item['correct_answer'])
|
||||
correct_letter = chr(65 + correct_index) # 65是ASCII码中'A'的值
|
||||
|
||||
# 拼接选项到问题中
|
||||
labels = []
|
||||
for i, option in enumerate(options):
|
||||
letter = chr(65 + i) # A, B, C, D
|
||||
labels.append(letter)
|
||||
|
||||
choices['text']=options
|
||||
choices['label'] =labels
|
||||
|
||||
transformed_data = {
|
||||
"question": item["question"],
|
||||
"choices":choices,
|
||||
"answer": f"[ANSWER]{correct_letter}[/ANSWER]",
|
||||
"prompt":"You MUST include the letter(s) of the correct answer (separated by comma if there are many) within the following tags: [ANSWER] and [/ANSWER]. No explanations and other information. Only return the '[ANSWER]<answer>[/ANSWER]'. We require this because we use automatic parsing."
|
||||
|
||||
}
|
||||
new_json.append(transformed_data)
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(new_json, f, indent=2, ensure_ascii=False)
|
||||
|
||||
return new_json
|
||||
|
||||
# 示例使用
|
||||
if __name__ == "__main__":
|
||||
|
||||
input_file = "/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/sciq-val-mat.json"
|
||||
output_file = "/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL/15-sciq-val.json"
|
||||
|
||||
processed_data = process_json_file(input_file, output_file)
|
||||
Reference in New Issue
Block a user