Update multiple-apps examples and eval (WIP)
This commit is contained in:
@@ -13,6 +13,7 @@ from odf.text import P
|
||||
from odf.text import Span
|
||||
from skimage.color import deltaE_ciede2000
|
||||
from skimage.color import rgb2lab
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
logger = logging.getLogger("desktopenv.metric.docs")
|
||||
|
||||
@@ -57,6 +58,8 @@ def contains_page_break(docx_file):
|
||||
|
||||
def compare_docx_files(file1, file2, **options):
|
||||
ignore_blanks = options.get('ignore_blanks', True)
|
||||
content_only = options.get('content_only', False)
|
||||
|
||||
def get_paragraph_texts_odt(document):
|
||||
paragraphs = document.getElementsByType(P)
|
||||
paragraph_texts = []
|
||||
@@ -89,6 +92,13 @@ def compare_docx_files(file1, file2, **options):
|
||||
print("Unsupported file types or mismatch between file types.")
|
||||
return 0
|
||||
|
||||
if content_only:
|
||||
# Compare the content of the documents
|
||||
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
|
||||
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
|
||||
similarity = fuzz.ratio(text1, text2) / 100.0
|
||||
return similarity
|
||||
|
||||
# Process and compare documents
|
||||
if ignore_blanks:
|
||||
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
|
||||
|
||||
Reference in New Issue
Block a user