""" 0. 将问题从xls提取为json 1. 将问题进行拆分 2. 翻译成英文 3. 去重 4. 使用大模型进行难度评估和筛选 """ import pandas as pd import json import os def process_excel_files(directory): all_data = [] # 获取目录下所有xlsx文件 excel_files = [f for f in os.listdir(directory) if f.endswith('.xlsx')] for excel_file in excel_files: file_path = os.path.join(directory, excel_file) df = pd.read_excel(file_path) if 'Question' in df.columns and 'Answer' in df.columns: # 将每行转换为字典并添加到列表中 for _, row in df.iterrows(): data_item = { 'question': str(row['Question']).strip(), 'answer': str(row['Answer']).strip() } all_data.append(data_item) else: print(f"警告: {excel_file} 缺少必要的列 (question/answer)") # 将数据保存为JSON文件 output_file = os.path.join(directory, 'qa_data.json') with open(output_file, 'w', encoding='utf-8') as f: json.dump(all_data, f, ensure_ascii=False, indent=2) print(f"处理完成!共处理了 {len(all_data)} 条数据") print(f"数据已保存到: {output_file}") if __name__ == '__main__': # 指定Excel文件所在的目录 directory = os.path.dirname(os.path.abspath(__file__)) process_excel_files(directory)