fix: fix multiapps tasks (#231)
* Update JSON example for multi_apps: change snapshot name and specify presenter in instructions for clarity. * Enhance PDF image comparison in chrome.py by adding existence checks for input files and improving image extraction logic. Introduce image hashing for similarity scoring with a configurable threshold. Update docs.py to support fuzzy matching in DOCX file comparisons, allowing for similarity scoring based on text content. Modify example JSON to enable fuzzy matching option. --------- Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
@@ -86,6 +86,7 @@ def compare_docx_files(file1, file2, **options):
|
||||
ignore_case = options.get('ignore_case', False)
|
||||
ignore_order = options.get('ignore_order', False)
|
||||
content_only = options.get('content_only', False)
|
||||
fuzzy_match = options.get('fuzzy_match', False)
|
||||
|
||||
if not file1 or not file2:
|
||||
return 0
|
||||
@@ -151,29 +152,48 @@ def compare_docx_files(file1, file2, **options):
|
||||
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
|
||||
if ignore_case:
|
||||
text1, text2 = text1.lower(), text2.lower()
|
||||
if text1 != text2:
|
||||
return 0
|
||||
|
||||
if fuzzy_match:
|
||||
similarity = fuzz.ratio(text1, text2) / 100.0
|
||||
return similarity
|
||||
else:
|
||||
if text1 != text2:
|
||||
return 0
|
||||
else:
|
||||
print("ignore_blanks=false")
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
print(doc1_paragraphs)
|
||||
print(doc2_paragraphs)
|
||||
print(len(doc1_paragraphs))
|
||||
print(len(doc2_paragraphs))
|
||||
return 0
|
||||
print("in compare")
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if ignore_case:
|
||||
p1, p2 = p1.lower(), p2.lower()
|
||||
if p1 != p2:
|
||||
# show the difference
|
||||
print("=== First Paragraph ===")
|
||||
print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars
|
||||
print("=== Second Paragraph ===")
|
||||
print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars
|
||||
print("=" * 50) # Clear boundary
|
||||
return 0
|
||||
|
||||
if fuzzy_match:
|
||||
total_similarity = 0
|
||||
if not doc1_paragraphs:
|
||||
return 1.0
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if ignore_case:
|
||||
p1, p2 = p1.lower(), p2.lower()
|
||||
total_similarity += fuzz.ratio(p1, p2) / 100.0
|
||||
|
||||
if len(doc1_paragraphs) == 0:
|
||||
return 1.0 if len(doc2_paragraphs) == 0 else 0.0
|
||||
|
||||
avg_similarity = total_similarity / len(doc1_paragraphs)
|
||||
return avg_similarity
|
||||
else:
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if ignore_case:
|
||||
p1, p2 = p1.lower(), p2.lower()
|
||||
if p1 != p2:
|
||||
# show the difference
|
||||
print("=== First Paragraph ===")
|
||||
print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars
|
||||
print("=== Second Paragraph ===")
|
||||
print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars
|
||||
print("=" * 50) # Clear boundary
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
Reference in New Issue
Block a user