Merge remote-tracking branch 'origin/main'
This commit is contained in:
@@ -50,24 +50,30 @@ def contains_page_break(docx_file):
|
||||
return 0
|
||||
|
||||
|
||||
def compare_docx_files(file1, file2):
|
||||
def compare_docx_files(file1, file2, ignore_blanks=True):
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
|
||||
doc1_paragraphs = [p.text for p in doc1.paragraphs]
|
||||
doc2_paragraphs = [p.text for p in doc2.paragraphs]
|
||||
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
# print(len(doc1_paragraphs))
|
||||
# print(len(doc2_paragraphs))
|
||||
return 0
|
||||
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if p1 != p2:
|
||||
# print(p1)
|
||||
# print(p2)
|
||||
if ignore_blanks:
|
||||
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
|
||||
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
|
||||
if text1 != text2:
|
||||
return 0
|
||||
else:
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
# print(len(doc1_paragraphs))
|
||||
# print(len(doc2_paragraphs))
|
||||
return 0
|
||||
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if p1 != p2:
|
||||
# print(p1)
|
||||
# print(p2)
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
Reference in New Issue
Block a user