#将文本拆分成问题和答案两部分 import json input_file_path = '/home/ubuntu/50T/fsy/benchmark/dataset-ours/[Solution]qa_segment_all.json' # 修改为文件的实际路径 with open(input_file_path, 'r', encoding='utf-8') as infile: data = json.load(infile) # 遍历并处理数据 processed_data = [] for item in data: segment = item.get("segment", "") if "Solution" in segment: question, answer = segment.split("Solution", 1) # 按照 "Solution" 切分 question = question.strip() answer = answer.strip() processed_data.append({ "idx": item.get("idx"), "question": question, "answer": answer, }) elif "Answer" in segment: question, answer = segment.split("Answer", 1) # 按照 "Solution" 切分 question = question.strip() answer = answer.strip() processed_data.append({ "idx": item.get("idx"), "question": question, "answer": answer, }) else: processed_data.append({ "idx": item.get("idx"), "question": "000", "answer": "000", }) output_file_path = '[Solution]qa_segment.json' # 输出文件路径 with open(output_file_path, 'w', encoding='utf-8') as outfile: json.dump(processed_data, outfile, ensure_ascii=False, indent=4) output_file_path