import os import glob import mysql.connector import bibtexparser import tqdm TABLE_NAME = 'phosphorus_synthesis_info' input('你确定TABLE_NAME是{}吗?'.format(TABLE_NAME)) # phosphorus_synthesis bibs_dir = os.path.join(os.path.dirname(__file__), 'synthesis23-25') if_file_path = os.path.join(os.path.dirname(__file__), '2023JCR.xlsx') input('你确定导入文件夹是{}吗?'.format(bibs_dir)) # MySQL connection setup connection = mysql.connector.connect( host='localhost', user='metadata_mat_papers', password='siat-mic', database='metadata_mat_papers' ) cursor = connection.cursor() # Function to check if a table exists def check_table_exists(table_name): cursor.execute(f""" SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = DATABASE() AND table_name = '{table_name}' """) return cursor.fetchone()[0] == 1 # Function to create the table if it doesn't exist def create_table(table_name): if not check_table_exists(table_name): query = f""" CREATE TABLE IF NOT EXISTS `{table_name}` ( doi VARCHAR(255) PRIMARY KEY, unique_id VARCHAR(255), author TEXT, title TEXT, journal VARCHAR(255), year INT, volume VARCHAR(50), number VARCHAR(50), pages VARCHAR(50), month VARCHAR(50), issn VARCHAR(50), eissn VARCHAR(50), researcher_id TEXT, if2023 VARCHAR(50), if5 VARCHAR(50), journal_index VARCHAR(50), jcr_quartile VARCHAR(50), orcid TEXT, early_access_date VARCHAR(50), scihub_downlowded VARCHAR(50), convert2md VARCHAR(50), pdf_url TEXT, md_url TEXT, abstract TEXT, image_url JSON, text_content LONGTEXT ); """ cursor.execute(query) def record_exists(doi, table_name): query = f"SELECT COUNT(*) FROM `{table_name}` WHERE doi = %s" cursor.execute(query, (doi,)) count = cursor.fetchone()[0] return count > 0 # Function to insert a record into the MySQL database def insert_record(entry, table_name): # 定义列名列表 columns = [ 'doi', 'unique_id', 'author', 'title', 'journal', 'year', 'volume', 'number', 'pages', 'month', 'issn', 'eissn', 'researcher_id', 'if2023', 'if5', 'journal_index', 'jcr_quartile', 'orcid', 'early_access_date', 'scihub_downlowded', 'convert2md', 'pdf_url', 'md_url', 'abstract', 'image_url', 'text_content' ] # 构建SQL查询语句 placeholders = ', '.join(['%s'] * len(columns)) query = f""" INSERT INTO `{table_name}` ({', '.join(columns)}) VALUES ({placeholders}) """ values = ( entry.get('doi'), entry.get('unique-id'), entry.get('author'), entry.get('title'), entry.get('journal'), entry.get('year'), entry.get('volume'), entry.get('number', None), entry.get('pages', None), entry.get('month', None), entry.get('issn', None), entry.get('eissn', None), entry.get('researcherid-numbers', None), entry.get('if2023', None), entry.get('if5', None), entry.get('journal_index', None), entry.get('jcr_quartile', None), entry.get('ocrid-numbers', None), entry.get('earlyaccessdate', None), entry.get('scihub_downlowded', None), entry.get('convert2md', None), entry.get('pdf_url', None), entry.get('md_url', None), entry.get('abstract', None), entry.get('image_url', None), entry.get('text_content', None) ) cursor.execute(query, values) # 用pandas打开excel文件 import pandas as pd df = pd.read_excel(if_file_path) # 替换所有的nan为None df = df.replace({pd.NA: None}) # Create the table if it doesn't exist create_table(TABLE_NAME) bib_files = sorted(glob.glob(os.path.join(bibs_dir, '*.bib'))) for bib_file in tqdm.tqdm(bib_files): # Read and parse the .bib file with open(bib_file, 'r') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) for entry in bib_database.entries: entry = {k.lower(): v for k, v in entry.items()} journal = entry.get('journal') if journal is not None: journal_lower = journal.lower() # 将期刊名称转为小写以进行不区分大小写的匹配 matching_journal = df[df['JournalName'].str.lower() == journal_lower] # 在DataFrame中查找该期刊 if not matching_journal.empty: entry['if2023'] = matching_journal['IF2023'].values[0] entry['if5'] = matching_journal['IF5'].values[0] entry['journal_index'] = matching_journal['INDEX'].values[0] entry['jcr_quartile'] = matching_journal['Quartile'].values[0] doi = entry.get('doi') # 先检查记录是否存在,同时doi不能为空 if not record_exists(doi, TABLE_NAME) and doi is not None: insert_record(entry, TABLE_NAME) # Commit the changes and close the connection connection.commit() cursor.close() connection.close() print("Data has been inserted into the database!")