Files
mars-mcp/mars_toolkit/core/cif_utils.py
2025-04-02 16:24:50 +08:00

118 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
CIF Utilities Module
This module provides basic functions for handling CIF (Crystallographic Information File) files,
which are commonly used in materials science for representing crystal structures.
"""
import json
import logging
logger = logging.getLogger(__name__)
def read_cif_txt_file(file_path):
"""
Read the CIF file and return its content.
Args:
file_path: Path to the CIF file
Returns:
String content of the CIF file or None if an error occurs
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
except Exception as e:
logger.error(f"Error reading file {file_path}: {e}")
return None
def extract_cif_info(path: str, fields_name: list):
"""
Extract specific fields from the CIF description JSON file.
Args:
path: Path to the JSON file containing CIF information
fields_name: List of field categories to extract. Use 'all_fields' to extract all fields.
Other options include 'basic_fields', 'energy_electronic_fields', 'metal_magentic_fields'
Returns:
Dictionary containing the extracted fields
"""
basic_fields = ['formula_pretty', 'chemsys', 'composition', 'elements', 'symmetry', 'nsites', 'volume', 'density']
energy_electronic_fields = ['formation_energy_per_atom', 'energy_above_hull', 'is_stable', 'efermi', 'cbm', 'vbm', 'band_gap', 'is_gap_direct']
metal_magentic_fields = ['is_metal', 'is_magnetic', "ordering", 'total_magnetization', 'num_magnetic_sites']
selected_fields = []
if fields_name[0] == 'all_fields':
selected_fields = basic_fields + energy_electronic_fields + metal_magentic_fields
else:
for field in fields_name:
selected_fields.extend(locals().get(field, []))
with open(path, 'r') as f:
docs = json.load(f)
new_docs = {}
for field_name in selected_fields:
new_docs[field_name] = docs.get(field_name, '')
return new_docs
def remove_symmetry_equiv_xyz(cif_content):
"""
Remove symmetry operations section from CIF file content.
This is often useful when working with CIF files in certain visualization tools
or when focusing on the basic structure without symmetry operations.
Args:
cif_content: CIF file content string
Returns:
Cleaned CIF content string with symmetry operations removed
"""
lines = cif_content.split('\n')
output_lines = []
i = 0
while i < len(lines):
line = lines[i].strip()
# 检测循环开始
if line == 'loop_':
# 查看下一行,检查是否是对称性循环
next_lines = []
j = i + 1
while j < len(lines) and lines[j].strip().startswith('_'):
next_lines.append(lines[j].strip())
j += 1
# 检查是否包含对称性操作标签
if any('_symmetry_equiv_pos_as_xyz' in tag for tag in next_lines):
# 跳过整个循环块
while i < len(lines):
if i + 1 >= len(lines):
break
next_line = lines[i + 1].strip()
# 检查是否到达下一个循环或数据块
if next_line == 'loop_' or next_line.startswith('data_'):
break
# 检查是否到达原子位置部分
if next_line.startswith('_atom_site_'):
break
i += 1
else:
# 不是对称性循环保留loop_行
output_lines.append(lines[i])
else:
# 非循环开始行,直接保留
output_lines.append(lines[i])
i += 1
return '\n'.join(output_lines)