add eval libreoffice write compare content

This commit is contained in:
tsuky_chen
2023-12-30 18:21:39 +08:00
parent 24f33dc9bf
commit 2d493759e3
6 changed files with 110 additions and 11 deletions

View File

@@ -1,3 +1,3 @@
from .table import compare_table, compare_with_sparklines, compare_with_charts
from .table import check_sheet_list, check_xlsx_freeze
from .docs import find_default_font, contains_page_break
from .docs import find_default_font, contains_page_break, compare_docx_files

View File

@@ -2,8 +2,6 @@ import xml.etree.ElementTree as ET
from docx import Document
from lxml import etree
def find_default_font(expected, config_file_path):
"""Find the default font in LibreOffice Writer."""
default_font = None
@@ -36,6 +34,29 @@ def contains_page_break(docx_file):
if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page':
return 1
return 0
def compare_docx_files(file1, file2):
doc1 = Document(file1)
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
if len(doc1_paragraphs) != len(doc2_paragraphs):
return 0
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if p1 != p2:
return 0
return 1
# file1 = 'path/to/file1.docx'
# file2 = 'path/to/file2.docx'
# print(are_docx_files_same(file1, file2))
# Replace 'your_document.docx' with the path to your document
# result = contains_page_break('your_document.docx')
# print(result)