Files
sci-gui-agent-benchmark/desktop_env/evaluators/metrics/docs.py

126 lines
3.9 KiB
Python

import xml.etree.ElementTree as ET
from docx import Document
def find_default_font(expected, config_file_path):
"""Find the default font in LibreOffice Writer."""
default_font = None
try:
tree = ET.parse(config_file_path)
root = tree.getroot()
# Define the XML namespace used in the file
namespace = {'oor': 'http://openoffice.org/2001/registry'}
# Search for the node containing the default font setting for LibreOffice Writer
for elem in root.findall('.//item[@oor:path="/org.openoffice.Office.Writer/DefaultFont"]', namespace):
for prop in elem.findall('.//prop[@oor:name="Standard"]', namespace):
for value in prop.findall('value', namespace):
default_font = value.text
except Exception as e:
print(f"Error: {e}")
return 1 if default_font == expected else 0
def contains_page_break(docx_file):
doc = Document(docx_file)
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
for paragraph in doc.paragraphs:
for run in paragraph.runs:
br_elems = run.element.findall('.//w:br', namespaces)
for br in br_elems:
if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page':
return 1
return 0
def compare_docx_files(file1, file2):
doc1 = Document(file1)
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
if len(doc1_paragraphs) != len(doc2_paragraphs):
return 0
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if p1 != p2:
return 0
return 1
def compare_docx_tables(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
# get list of tables in docx
tables1 = doc1.tables
tables2 = doc2.tables
if len(tables1) != len(tables2):
return 0
# Compare each table content
for table1, table2 in zip(tables1, tables2):
if len(table1.rows) != len(table2.rows) or len(table1.columns) != len(table2.columns):
return 0
# Compare each cell
for i in range(len(table1.rows)):
for j in range(len(table1.columns)):
if table1.cell(i, j).text != table2.cell(i, j).text:
return 0
return 1
def compare_line_spacing(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
if len(doc1.paragraphs) != len(doc2.paragraphs):
return 0
# Compare each paragraph line spacing
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
spacing1 = para1.paragraph_format.line_spacing
spacing2 = para2.paragraph_format.line_spacing
if spacing1 != spacing2:
return 0
return 1
def compare_insert_equation(docx_file1, docx_file2):
if not compare_docx_files(docx_file1, docx_file2):
return False
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
# Compare each paragraph if it contains equation
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
for run1, run2 in zip(para1.runs, para2.runs):
if run1.element.xpath('.//w:object') and run2.element.xpath('.//w:object'):
return True
return False
# file1 = 'path/to/file1.docx'
# file2 = 'path/to/file2.docx'
# print(are_docx_files_same(file1, file2))
# Replace 'your_document.docx' with the path to your document
# result = contains_page_break('your_document.docx')
# print(result)
#config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
#print(find_default_font("Ani", config_path))