import sqlite3 import mysql.connector import tqdm import os TABLE_NAME = 'mp_cif_info' input('TABLE_NAME = {} ?'.format(TABLE_NAME)) cur_dir = os.path.dirname(os.path.abspath(__file__)) # MySQL connection setup mysql_connection = mysql.connector.connect( host='100.84.94.73', user='metadata_mat_papers', password='siat-mic', database='metadata_mat_papers' ) try: mysql_cursor = mysql_connection.cursor() # 获取所有下载为 success 的 doi query = f"SELECT doi, pdf_url FROM {TABLE_NAME} WHERE scihub_downloaded = 'success';" mysql_cursor.execute(query) results = mysql_cursor.fetchall() dois = [row[0] for row in results] pdf_urls = [row[1] for row in results] for doi, pdf_url in tqdm.tqdm(zip(dois, pdf_urls), total=len(dois)): # 若是已经修改过的,则直接跳过 if pdf_url is not None and pdf_url.split('/')[0] == 'mp_cif' and pdf_url.split('/')[1] == 'pdfs': continue # pdf = doi.replace('/','_').replace('<','_').replace('>','_').replace(':','_') + '.pdf' pdf = doi + '.pdf' # 新的路径 pdf_path = os.path.join('mp_cif/pdfs', pdf) query = f"UPDATE {TABLE_NAME} SET pdf_url = '{pdf_path}' WHERE doi = '{doi}';" mysql_cursor.execute(query) mysql_connection.commit() # 提交更改到数据库 mysql_connection.commit() except mysql.connector.Error as error: print("Failed to insert record into MySQL table: {}".format(error)) # 如果发生错误,撤回事务 mysql_connection.rollback() finally: # 关闭游标和连接 mysql_cursor.close() mysql_connection.close()