52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
import sqlite3
|
|
import mysql.connector
|
|
import tqdm
|
|
import os
|
|
|
|
TABLE_NAME = 'mp_cif_info'
|
|
input('TABLE_NAME = {} ?'.format(TABLE_NAME))
|
|
|
|
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
# MySQL connection setup
|
|
mysql_connection = mysql.connector.connect(
|
|
host='100.84.94.73',
|
|
user='metadata_mat_papers',
|
|
password='siat-mic',
|
|
database='metadata_mat_papers'
|
|
)
|
|
|
|
try:
|
|
mysql_cursor = mysql_connection.cursor()
|
|
|
|
# 获取所有下载为 success 的 doi
|
|
query = f"SELECT doi, pdf_url FROM {TABLE_NAME} WHERE scihub_downloaded = 'success';"
|
|
mysql_cursor.execute(query)
|
|
results = mysql_cursor.fetchall()
|
|
dois = [row[0] for row in results]
|
|
pdf_urls = [row[1] for row in results]
|
|
|
|
for doi, pdf_url in tqdm.tqdm(zip(dois, pdf_urls), total=len(dois)):
|
|
# 若是已经修改过的,则直接跳过
|
|
if pdf_url is not None and pdf_url.split('/')[0] == 'mp_cif' and pdf_url.split('/')[1] == 'pdfs':
|
|
continue
|
|
# pdf = doi.replace('/','_').replace('<','_').replace('>','_').replace(':','_') + '.pdf'
|
|
pdf = doi + '.pdf'
|
|
# 新的路径
|
|
pdf_path = os.path.join('mp_cif/pdfs', pdf)
|
|
query = f"UPDATE {TABLE_NAME} SET pdf_url = '{pdf_path}' WHERE doi = '{doi}';"
|
|
mysql_cursor.execute(query)
|
|
mysql_connection.commit()
|
|
|
|
# 提交更改到数据库
|
|
mysql_connection.commit()
|
|
|
|
except mysql.connector.Error as error:
|
|
print("Failed to insert record into MySQL table: {}".format(error))
|
|
# 如果发生错误,撤回事务
|
|
mysql_connection.rollback()
|
|
|
|
finally:
|
|
# 关闭游标和连接
|
|
mysql_cursor.close()
|
|
mysql_connection.close() |