第一次合并clean代码
This commit is contained in:
28
clean/stp2.2_remove_broken_pdf.py
Normal file
28
clean/stp2.2_remove_broken_pdf.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import sqlite3
|
||||
import mysql.connector
|
||||
import tqdm
|
||||
import os
|
||||
|
||||
TABLE_NAME = 'phosphorus_synthesis_info'
|
||||
input('TABLE_NAME = {} ?'.format(TABLE_NAME))
|
||||
|
||||
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# MySQL connection setup
|
||||
mysql_connection = mysql.connector.connect(
|
||||
host='100.84.94.73',
|
||||
user='metadata_mat_papers',
|
||||
password='siat-mic',
|
||||
database='metadata_mat_papers'
|
||||
)
|
||||
mysql_cursor = mysql_connection.cursor()
|
||||
|
||||
|
||||
# 编写query语句
|
||||
query = f"SELECT pdf_url FROM {TABLE_NAME} WHERE scihub_downlowded = 'broken'"
|
||||
mysql_cursor.execute(query)
|
||||
records = mysql_cursor.fetchall()
|
||||
|
||||
for record in tqdm.tqdm(records):
|
||||
pdf_path = os.path.join(cur_dir, record[0])
|
||||
os.remove(pdf_path)
|
||||
Reference in New Issue
Block a user