This commit is contained in:
lzy
2025-06-12 20:02:48 +08:00
parent 1786688911
commit 65248c1e04
7 changed files with 12347 additions and 602292 deletions

5
.gitignore vendored
View File

@@ -1,4 +1,7 @@
MatBench/layer3/articles_fsy
*.zip
*.temp
*.pyc
*.pyc
*.pdf
*.md
layer3/articles/*

View File

@@ -518,7 +518,7 @@ def main():
# 难度选择比例配置
SELECTION_RATIOS = {
"hard_early_stop": 1.0, # 困难题选择10%
"easy_all_correct": 0.0, # 简单题选择3.5%
"easy_all_correct": 0.0, # 简单题选择3.5%
"mixed": 0.0, # 混合题选择0%
"unknown": 0.0 # 未知难度不选择
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

56
layer3/src/rename.py Normal file
View File

@@ -0,0 +1,56 @@
import os
import json
import glob
with open('/home/ubuntu/50T/LYT/MatBench/layer3/downloaded_failed_papers.json', 'r') as f:
data = json.load(f)
dois = []
for item in data:
if 'DOI' in item:
doi = item['DOI']
if doi not in dois:
dois.append(doi.replace('/', '_')) # 替换斜杠以避免文件名问题
else:
print(f"Duplicate DOI found: {doi}")
# 从/home/ubuntu/50T/LYT/MatBench/layer3/articles/pdfs目录中获取所有命名范围[1-170]的PDF文件
pdf_files = []
for i in range(1, 170):
pattern = f'/home/ubuntu/50T/LYT/MatBench/layer3/articles/pdfs/{i}.pdf'
pdf_files.extend(glob.glob(pattern))
# assert len(pdf_files) == len(dois), f"Number of PDF files ({len(pdf_files)}) does not match number of DOIs ({len(dois)})"
# # 对每个PDF文件进行重命名从dois列表中获取对应的DOI作为新文件名
# for i, pdf_file in enumerate(pdf_files):
# if i < len(dois):
# new_name = dois[i] + '.pdf'
# new_path = os.path.join('/home/ubuntu/50T/LYT/MatBench/layer3/articles/pdfs', new_name)
# os.rename(pdf_file, new_path)
# print(f'Renamed {pdf_file} to {new_path}')
# else:
# print(f'No DOI available for file: {pdf_file}')
# 从/home/ubuntu/50T/LYT/MatBench/layer3/articles/mds目录中获取所有命名范围[1-170]的MD文件
md_dirs = []
for i in range(16, 110):
pattern = f'/home/ubuntu/50T/LYT/MatBench/layer3/articles/mds/{i}'
md_dirs.extend([pattern])
print(len(md_dirs))
print()
dois = dois[16:110] # 确保DOI列表与MD目录数量匹配
assert len(md_dirs) == len(dois), f"Number of MD directories ({len(md_dirs)}) does not match number of DOIs ({len(dois)})"
# 对每个MD目录进行重命名从dois列表中获取对应的DOI作为新目录名
for i, md_dir in enumerate(md_dirs):
if i < len(dois):
new_name = dois[i]
new_path = os.path.join('/home/ubuntu/50T/LYT/MatBench/layer3/articles/mds', new_name)
if not os.path.exists(new_path):
os.rename(md_dir, new_path)
print(f'Renamed {md_dir} to {new_path}')
else:
print(f'Directory already exists: {new_path}')
else:
print(f'No DOI available for directory: {md_dir}')

File diff suppressed because it is too large Load Diff

Binary file not shown.