Loading libreoffice writer examples and find few problems, will do another round tomorrow for the rest

This commit is contained in:
Timothyxxx
2024-01-02 17:50:05 +08:00
parent 8ac88e9617
commit 03e99a68fb
22 changed files with 191 additions and 84 deletions

View File

@@ -1,15 +1,20 @@
import xml.etree.ElementTree as ET
import os
from typing import List, Dict, Any
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
def find_default_font(expected, config_file_path):
def find_default_font(config_file_path, rules):
"""Find the default font in LibreOffice Writer."""
default_font = None
expected_font = rules["font_name"]
try:
tree = ET.parse(config_file_path)
root = tree.getroot()
# Define the XML namespace used in the file
# Define the XML namespace used in the file
namespace = {'oor': 'http://openoffice.org/2001/registry'}
# Search for the node containing the default font setting for LibreOffice Writer
@@ -19,24 +24,26 @@ def find_default_font(expected, config_file_path):
default_font = value.text
except Exception as e:
print(f"Error: {e}")
return 1 if default_font == expected else 0
return 1 if default_font == expected_font else 0
def contains_page_break(docx_file):
doc = Document(docx_file)
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
for paragraph in doc.paragraphs:
for run in paragraph.runs:
br_elems = run.element.findall('.//w:br', namespaces)
for br in br_elems:
if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page':
if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and \
br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page':
return 1
return 0
def compare_docx_files(file1, file2):
def compare_docx_files(file1, file2):
doc1 = Document(file1)
doc2 = Document(file2)
@@ -53,6 +60,7 @@ def compare_docx_files(file1, file2):
return 1
def compare_docx_tables(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -78,8 +86,8 @@ def compare_docx_tables(docx_file1, docx_file2):
return 1
def compare_line_spacing(docx_file1, docx_file2):
def compare_line_spacing(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -96,12 +104,12 @@ def compare_line_spacing(docx_file1, docx_file2):
return 0
return 1
def compare_insert_equation(docx_file1, docx_file2):
if not compare_docx_files(docx_file1, docx_file2):
return 0
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -112,8 +120,10 @@ def compare_insert_equation(docx_file1, docx_file2):
return 1
return 0
def compare_font_names(expected_font, docx_file):
def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
doc = Document(docx_file)
expected_font = rules["font_name"]
for paragraph in doc.paragraphs:
for run in paragraph.runs:
@@ -122,6 +132,7 @@ def compare_font_names(expected_font, docx_file):
return 0
return 1
def compare_subscript_contains(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -133,6 +144,7 @@ def compare_subscript_contains(docx_file1, docx_file2):
return 1
return 0
def has_page_numbers_in_footers(docx_file):
doc = Document(docx_file)
@@ -146,7 +158,6 @@ def has_page_numbers_in_footers(docx_file):
return 0
return 1
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
def is_first_line_centered(docx_file):
doc = Document(docx_file)
@@ -155,19 +166,20 @@ def is_first_line_centered(docx_file):
# check if the first line is center justified
return 1 if first_paragraph.paragraph_format.alignment == WD_PARAGRAPH_ALIGNMENT.CENTER else 0
import os
def check_file_exists(directory, filename):
def check_file_exists(directory, filename):
file_path = os.path.join(directory, filename)
return 1 if os.path.isfile(file_path) else 0
def compare_contains_image(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
for run1, run2 in zip(para1.runs, para2.runs):
if ('graphicData' in run1._element.xml and 'graphicData' not in run2._element.xml) or ('graphicData' not in run1._element.xml and 'graphicData' in run2._element.xml):
if ('graphicData' in run1._element.xml and 'graphicData' not in run2._element.xml) or (
'graphicData' not in run1._element.xml and 'graphicData' in run2._element.xml):
return 0
return 1
@@ -178,6 +190,6 @@ def compare_contains_image(docx_file1, docx_file2):
# Replace 'your_document.docx' with the path to your document
# result = contains_page_break('your_document.docx')
# print(result)
#config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
#print(find_default_font("Ani", config_path))
# config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
# print(find_default_font("Ani", config_path))