Files
datapipe/clean/step0_pdfs2sql.py
2025-01-18 17:09:51 +08:00

33 lines
792 B
Python

import os
import tqdm
import sqlite3
import mysql.connector
def main():
cur_path = os.path.dirname(os.path.abspath(__file__))
TABLE_NAME = 'mp_cif_info'
mysql_connection = mysql.connector.connect(
host='100.84.94.73',
user='metadata_mat_papers',
password='siat-mic',
database='metadata_mat_papers'
)
mysql_cursor = mysql_connection.cursor()
pdf_list = os.listdir(os.path.join(cur_path, 'mp_cif/pdfs'))
doi_list = [pdf.replace('.pdf', '') for pdf in pdf_list]
try:
for doi in doi_list:
sql = f"INSERT INTO {TABLE_NAME} (doi) VALUES (%s)"
mysql_cursor.execute(sql, (doi,))
mysql_connection.commit()
finally:
mysql_connection.close()
if __name__ == "__main__":
main()