118 lines
3.8 KiB
Python
Executable File
118 lines
3.8 KiB
Python
Executable File
"""
|
||
CIF Utilities Module
|
||
|
||
This module provides basic functions for handling CIF (Crystallographic Information File) files,
|
||
which are commonly used in materials science for representing crystal structures.
|
||
"""
|
||
|
||
import json
|
||
import logging
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def read_cif_txt_file(file_path):
|
||
"""
|
||
Read the CIF file and return its content.
|
||
|
||
Args:
|
||
file_path: Path to the CIF file
|
||
|
||
Returns:
|
||
String content of the CIF file or None if an error occurs
|
||
"""
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
return f.read()
|
||
except Exception as e:
|
||
logger.error(f"Error reading file {file_path}: {e}")
|
||
return None
|
||
|
||
def extract_cif_info(path: str, fields_name: list):
|
||
"""
|
||
Extract specific fields from the CIF description JSON file.
|
||
|
||
Args:
|
||
path: Path to the JSON file containing CIF information
|
||
fields_name: List of field categories to extract. Use 'all_fields' to extract all fields.
|
||
Other options include 'basic_fields', 'energy_electronic_fields', 'metal_magentic_fields'
|
||
|
||
Returns:
|
||
Dictionary containing the extracted fields
|
||
"""
|
||
basic_fields = ['formula_pretty', 'chemsys', 'composition', 'elements', 'symmetry', 'nsites', 'volume', 'density']
|
||
energy_electronic_fields = ['formation_energy_per_atom', 'energy_above_hull', 'is_stable', 'efermi', 'cbm', 'vbm', 'band_gap', 'is_gap_direct']
|
||
metal_magentic_fields = ['is_metal', 'is_magnetic', "ordering", 'total_magnetization', 'num_magnetic_sites']
|
||
|
||
selected_fields = []
|
||
if fields_name[0] == 'all_fields':
|
||
selected_fields = basic_fields + energy_electronic_fields + metal_magentic_fields
|
||
else:
|
||
for field in fields_name:
|
||
selected_fields.extend(locals().get(field, []))
|
||
|
||
with open(path, 'r') as f:
|
||
docs = json.load(f)
|
||
|
||
new_docs = {}
|
||
for field_name in selected_fields:
|
||
new_docs[field_name] = docs.get(field_name, '')
|
||
|
||
return new_docs
|
||
|
||
def remove_symmetry_equiv_xyz(cif_content):
|
||
"""
|
||
Remove symmetry operations section from CIF file content.
|
||
|
||
This is often useful when working with CIF files in certain visualization tools
|
||
or when focusing on the basic structure without symmetry operations.
|
||
|
||
Args:
|
||
cif_content: CIF file content string
|
||
|
||
Returns:
|
||
Cleaned CIF content string with symmetry operations removed
|
||
"""
|
||
lines = cif_content.split('\n')
|
||
output_lines = []
|
||
|
||
i = 0
|
||
while i < len(lines):
|
||
line = lines[i].strip()
|
||
|
||
# 检测循环开始
|
||
if line == 'loop_':
|
||
# 查看下一行,检查是否是对称性循环
|
||
next_lines = []
|
||
j = i + 1
|
||
while j < len(lines) and lines[j].strip().startswith('_'):
|
||
next_lines.append(lines[j].strip())
|
||
j += 1
|
||
|
||
# 检查是否包含对称性操作标签
|
||
if any('_symmetry_equiv_pos_as_xyz' in tag for tag in next_lines):
|
||
# 跳过整个循环块
|
||
while i < len(lines):
|
||
if i + 1 >= len(lines):
|
||
break
|
||
|
||
next_line = lines[i + 1].strip()
|
||
# 检查是否到达下一个循环或数据块
|
||
if next_line == 'loop_' or next_line.startswith('data_'):
|
||
break
|
||
|
||
# 检查是否到达原子位置部分
|
||
if next_line.startswith('_atom_site_'):
|
||
break
|
||
|
||
i += 1
|
||
else:
|
||
# 不是对称性循环,保留loop_行
|
||
output_lines.append(lines[i])
|
||
else:
|
||
# 非循环开始行,直接保留
|
||
output_lines.append(lines[i])
|
||
|
||
i += 1
|
||
|
||
return '\n'.join(output_lines)
|