Update multiple-apps examples and eval (WIP)

This commit is contained in:
Timothyxxx
2024-03-02 20:41:01 +08:00
parent 0554703c01
commit 9c5269be3a
7 changed files with 316 additions and 16 deletions

View File

@@ -13,6 +13,7 @@ from odf.text import P
from odf.text import Span
from skimage.color import deltaE_ciede2000
from skimage.color import rgb2lab
from fuzzywuzzy import fuzz
logger = logging.getLogger("desktopenv.metric.docs")
@@ -57,6 +58,8 @@ def contains_page_break(docx_file):
def compare_docx_files(file1, file2, **options):
ignore_blanks = options.get('ignore_blanks', True)
content_only = options.get('content_only', False)
def get_paragraph_texts_odt(document):
paragraphs = document.getElementsByType(P)
paragraph_texts = []
@@ -89,6 +92,13 @@ def compare_docx_files(file1, file2, **options):
print("Unsupported file types or mismatch between file types.")
return 0
if content_only:
# Compare the content of the documents
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
similarity = fuzz.ratio(text1, text2) / 100.0
return similarity
# Process and compare documents
if ignore_blanks:
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()