layer2
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,3 +2,6 @@ MatBench/layer3/articles_fsy
|
|||||||
*.zip
|
*.zip
|
||||||
*.temp
|
*.temp
|
||||||
*.pyc
|
*.pyc
|
||||||
|
*.pdf
|
||||||
|
*.md
|
||||||
|
layer3/articles/*
|
||||||
12286
layer3/downloaded_failed_papers.json
Normal file
12286
layer3/downloaded_failed_papers.json
Normal file
File diff suppressed because it is too large
Load Diff
547824
layer3/paper-mat.json
547824
layer3/paper-mat.json
File diff suppressed because it is too large
Load Diff
56
layer3/src/rename.py
Normal file
56
layer3/src/rename.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
import glob
|
||||||
|
|
||||||
|
with open('/home/ubuntu/50T/LYT/MatBench/layer3/downloaded_failed_papers.json', 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
dois = []
|
||||||
|
for item in data:
|
||||||
|
if 'DOI' in item:
|
||||||
|
doi = item['DOI']
|
||||||
|
if doi not in dois:
|
||||||
|
dois.append(doi.replace('/', '_')) # 替换斜杠以避免文件名问题
|
||||||
|
else:
|
||||||
|
print(f"Duplicate DOI found: {doi}")
|
||||||
|
|
||||||
|
# 从/home/ubuntu/50T/LYT/MatBench/layer3/articles/pdfs目录中获取所有命名范围[1-170]的PDF文件
|
||||||
|
pdf_files = []
|
||||||
|
for i in range(1, 170):
|
||||||
|
pattern = f'/home/ubuntu/50T/LYT/MatBench/layer3/articles/pdfs/{i}.pdf'
|
||||||
|
pdf_files.extend(glob.glob(pattern))
|
||||||
|
|
||||||
|
|
||||||
|
# assert len(pdf_files) == len(dois), f"Number of PDF files ({len(pdf_files)}) does not match number of DOIs ({len(dois)})"
|
||||||
|
# # 对每个PDF文件进行重命名,从dois列表中获取对应的DOI作为新文件名
|
||||||
|
# for i, pdf_file in enumerate(pdf_files):
|
||||||
|
# if i < len(dois):
|
||||||
|
# new_name = dois[i] + '.pdf'
|
||||||
|
# new_path = os.path.join('/home/ubuntu/50T/LYT/MatBench/layer3/articles/pdfs', new_name)
|
||||||
|
# os.rename(pdf_file, new_path)
|
||||||
|
# print(f'Renamed {pdf_file} to {new_path}')
|
||||||
|
# else:
|
||||||
|
# print(f'No DOI available for file: {pdf_file}')
|
||||||
|
|
||||||
|
# 从/home/ubuntu/50T/LYT/MatBench/layer3/articles/mds目录中获取所有命名范围[1-170]的MD文件
|
||||||
|
md_dirs = []
|
||||||
|
for i in range(16, 110):
|
||||||
|
pattern = f'/home/ubuntu/50T/LYT/MatBench/layer3/articles/mds/{i}'
|
||||||
|
md_dirs.extend([pattern])
|
||||||
|
print(len(md_dirs))
|
||||||
|
print()
|
||||||
|
dois = dois[16:110] # 确保DOI列表与MD目录数量匹配
|
||||||
|
assert len(md_dirs) == len(dois), f"Number of MD directories ({len(md_dirs)}) does not match number of DOIs ({len(dois)})"
|
||||||
|
|
||||||
|
# 对每个MD目录进行重命名,从dois列表中获取对应的DOI作为新目录名
|
||||||
|
for i, md_dir in enumerate(md_dirs):
|
||||||
|
if i < len(dois):
|
||||||
|
new_name = dois[i]
|
||||||
|
new_path = os.path.join('/home/ubuntu/50T/LYT/MatBench/layer3/articles/mds', new_name)
|
||||||
|
if not os.path.exists(new_path):
|
||||||
|
os.rename(md_dir, new_path)
|
||||||
|
print(f'Renamed {md_dir} to {new_path}')
|
||||||
|
else:
|
||||||
|
print(f'Directory already exists: {new_path}')
|
||||||
|
else:
|
||||||
|
print(f'No DOI available for directory: {md_dir}')
|
||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Reference in New Issue
Block a user