Files
datapipe/clean/step4.2_modify_md_with_database.py
2025-01-18 17:09:51 +08:00

51 lines
1.6 KiB
Python

import mysql.connector
import tqdm
import os
TABLE_NAME = 'phosphorus_synthesis_info_new'
input('TABLE_NAME = {} ?'.format(TABLE_NAME))
cur_dir = os.path.dirname(os.path.abspath(__file__))
# MySQL connection setup
mysql_connection = mysql.connector.connect(
host='100.84.94.73',
user='metadata_mat_papers',
password='siat-mic',
database='metadata_mat_papers'
)
try:
mysql_cursor = mysql_connection.cursor()
# 获取所有已转换的 doi
query = f"SELECT doi, md_url FROM {TABLE_NAME} WHERE en_text_content IS NOT NULL;"
mysql_cursor.execute(query)
results = mysql_cursor.fetchall()
dois = [row[0] for row in results]
md_urls = [row[1] for row in results]
for doi, md_url in tqdm.tqdm(zip(dois, md_urls), total=len(dois)):
# 若是已经修改过的,则直接跳过
dir_name = 'phosphorus'
if md_url is not None and md_url.split('/')[0] == dir_name and md_url.split('/')[1] == 'mds':
continue
md_name = doi.replace('/','_').replace('<','_').replace('>','_').replace(':','_')
md = md_name + '.md'
md_path = os.path.join(dir_name+'/mds', md_name, md)
query = f"UPDATE {TABLE_NAME} SET md_url = '{md_path}', convert2md = 'success' WHERE doi = '{doi}';"
mysql_cursor.execute(query)
mysql_connection.commit()
# 提交更改到数据库
mysql_connection.commit()
except mysql.connector.Error as error:
print("Failed to insert record into MySQL table: {}".format(error))
# 如果发生错误,撤回事务
mysql_connection.rollback()
finally:
# 关闭游标和连接
mysql_cursor.close()
mysql_connection.close()