add multi-apps 5 examples by ruisheng 2024-03-06

This commit is contained in:
rhythmcao
2024-03-06 21:20:26 +08:00
parent 69ef653a7c
commit da0dafc32c
14 changed files with 645 additions and 15 deletions

View File

@@ -58,6 +58,8 @@ def contains_page_break(docx_file):
def compare_docx_files(file1, file2, **options):
ignore_blanks = options.get('ignore_blanks', True)
ignore_case = options.get('ignore_case', False)
ignore_order = options.get('ignore_order', False)
content_only = options.get('content_only', False)
def get_paragraph_texts_odt(document):
@@ -82,11 +84,17 @@ def compare_docx_files(file1, file2, **options):
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
if ignore_order:
doc1_paragraphs = sorted(doc1_paragraphs)
doc2_paragraphs = sorted(doc2_paragraphs)
elif file1.endswith('.odt') and file2.endswith('.odt'):
doc1 = load(file1)
doc2 = load(file2)
doc1_paragraphs = get_paragraph_texts_odt(doc1)
doc2_paragraphs = get_paragraph_texts_odt(doc2)
if ignore_order:
doc1_paragraphs = sorted(doc1_paragraphs)
doc2_paragraphs = sorted(doc2_paragraphs)
else:
# Unsupported file types or mismatch
print("Unsupported file types or mismatch between file types.")
@@ -96,6 +104,8 @@ def compare_docx_files(file1, file2, **options):
# Compare the content of the documents
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if ignore_case:
text1, text2 = text1.lower(), text2.lower()
similarity = fuzz.ratio(text1, text2) / 100.0
return similarity
@@ -103,6 +113,8 @@ def compare_docx_files(file1, file2, **options):
if ignore_blanks:
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if ignore_case:
text1, text2 = text1.lower(), text2.lower()
if text1 != text2:
return 0
else:
@@ -111,6 +123,8 @@ def compare_docx_files(file1, file2, **options):
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if ignore_case:
p1, p2 = p1.lower(), p2.lower()
if p1 != p2:
return 0