Files
datapipe/clean/step2_reserve_success_pdf_with_database.py
2025-01-18 17:09:51 +08:00

47 lines
1.4 KiB
Python

import sqlite3
import mysql.connector
import tqdm
import os
TABLE_NAME = 'mp_synthesis_papers_info'
input('TABLE_NAME = {} ?'.format(TABLE_NAME))
cur_dir = os.path.dirname(os.path.abspath(__file__))
# MySQL connection setup
mysql_connection = mysql.connector.connect(
host='100.84.94.73',
user='metadata_mat_papers',
password='siat-mic',
database='metadata_mat_papers'
)
try:
mysql_cursor = mysql_connection.cursor()
# 编写query语句
# query = f"SELECT pdf_url FROM {TABLE_NAME} WHERE scihub_downlowded IN ('broken', 'timeout', 'failed') and pdf_url IS NOT NULL;"
query = f"SELECT pdf_url FROM {TABLE_NAME} WHERE scihub_downlowded IS NULL AND pdf_url IS NOT NULL;"
mysql_cursor.execute(query)
records = mysql_cursor.fetchall()
for record in tqdm.tqdm(records):
# pdf_path = os.path.join(cur_dir, record[0])
# if os.path.exists(pdf_path):
# os.remove(pdf_path)
query = f"UPDATE {TABLE_NAME} SET pdf_url = NULL WHERE pdf_url = '{record[0]}';"
mysql_cursor.execute(query)
mysql_connection.commit()
# 提交更改到数据库
mysql_connection.commit()
except mysql.connector.Error as error:
print("Failed to insert record into MySQL table: {}".format(error))
# 如果发生错误,撤回事务
mysql_connection.rollback()
finally:
# 关闭游标和连接
mysql_cursor.close()
mysql_connection.close()