import os import json import glob def merge_json_files(input_directory, output_file): all_data = [] json_files = glob.glob(os.path.join(input_directory, "*.json")) for json_file in json_files: try: with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) # 处理不同类型的JSON数据 if isinstance(data, list): # 检查每个元素是否符合预期格式 for item in data: if isinstance(item, dict) and 'question' in item and 'answer' in item: # 确保数据格式统一 # standardized_item = { # 'question': item['question'], # 'answer': item['answer'] # } all_data.append(item) else: print(f"警告: 跳过不符合格式的项 {item}") elif isinstance(data, dict) and 'question' in data and 'answer' in data: # 确保数据格式统一 # standardized_item = { # 'question': data['question'], # 'answer': data['answer'] # } all_data.append(data) else: print(f"警告: 无法处理文件 {json_file} 中的数据,格式不符合预期") print(f"成功处理: {json_file}") except json.JSONDecodeError: print(f"错误: 文件 {json_file} 不是有效的JSON格式") except Exception as e: print(f"处理文件 {json_file} 时出错: {str(e)}") # 将合并后的数据写入输出文件 try: with open(output_file, 'w', encoding='utf-8') as f: json.dump(all_data, f, indent=2, ensure_ascii=False) print(f"成功将 {len(json_files)} 个JSON文件中的 {len(all_data)} 个问答对合并到 {output_file}") except Exception as e: print(f"写入输出文件时出错: {str(e)}") # 使用示例 if __name__ == "__main__": # 替换为你的实际目录和输出文件路径 input_dir = "/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL-merge" output_file = "/home/ubuntu/50T/fsy/benchmark-dataset-third/ALL-merge/merged.json" merge_json_files(input_dir, output_file)