Merge remote-tracking branch 'origin/main'

This commit is contained in:
Timothyxxx
2024-01-29 20:15:01 +08:00
10 changed files with 228 additions and 534 deletions

View File

@@ -50,24 +50,30 @@ def contains_page_break(docx_file):
return 0
def compare_docx_files(file1, file2):
def compare_docx_files(file1, file2, ignore_blanks=True):
doc1 = Document(file1)
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
if len(doc1_paragraphs) != len(doc2_paragraphs):
# print(len(doc1_paragraphs))
# print(len(doc2_paragraphs))
return 0
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if p1 != p2:
# print(p1)
# print(p2)
if ignore_blanks:
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if text1 != text2:
return 0
else:
if len(doc1_paragraphs) != len(doc2_paragraphs):
# print(len(doc1_paragraphs))
# print(len(doc2_paragraphs))
return 0
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if p1 != p2:
# print(p1)
# print(p2)
return 0
return 1