fix: fix multiapps tasks (#231)

* Update JSON example for multi_apps: change snapshot name and specify presenter in instructions for clarity. * Enhance PDF image comparison in chrome.py by adding existence checks for input files and improving image extraction logic. Introduce image hashing for similarity scoring with a configurable threshold. Update docs.py to support fuzzy matching in DOCX file comparisons, allowing for similarity scoring based on text content. Modify example JSON to enable fuzzy matching option. --------- Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
2025-07-03 16:58:43 +08:00
parent e9c657b714
commit bba367b8bc
4 changed files with 91 additions and 37 deletions
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -86,6 +86,7 @@ def compare_docx_files(file1, file2, **options):
    ignore_case = options.get('ignore_case', False)
    ignore_order = options.get('ignore_order', False)
    content_only = options.get('content_only', False)
+    fuzzy_match = options.get('fuzzy_match', False)

    if not file1 or not file2:
        return 0
@@ -151,29 +152,48 @@ def compare_docx_files(file1, file2, **options):
        text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
        if ignore_case:
            text1, text2 = text1.lower(), text2.lower()
-        if text1 != text2:
-            return 0
+
+        if fuzzy_match:
+            similarity = fuzz.ratio(text1, text2) / 100.0
+            return similarity
+        else:
+            if text1 != text2:
+                return 0
    else:
-        print("ignore_blanks=false")
        if len(doc1_paragraphs) != len(doc2_paragraphs):
            print(doc1_paragraphs)
            print(doc2_paragraphs)
            print(len(doc1_paragraphs))
            print(len(doc2_paragraphs))
            return 0
-        print("in compare")
-        # Compare each paragraph
-        for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
-            if ignore_case:
-                p1, p2 = p1.lower(), p2.lower()
-            if p1 != p2:
-                # show the difference
-                print("=== First Paragraph ===")
-                print(f"\033[92m{repr(p1)}\033[0m")  # Green color for p1, repr() shows hidden chars
-                print("=== Second Paragraph ===") 
-                print(f"\033[91m{repr(p2)}\033[0m")  # Red color for p2, repr() shows hidden chars
-                print("=" * 50)  # Clear boundary
-                return 0
+        
+        if fuzzy_match:
+            total_similarity = 0
+            if not doc1_paragraphs:
+                return 1.0
+            for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
+                if ignore_case:
+                    p1, p2 = p1.lower(), p2.lower()
+                total_similarity += fuzz.ratio(p1, p2) / 100.0
+            
+            if len(doc1_paragraphs) == 0:
+                return 1.0 if len(doc2_paragraphs) == 0 else 0.0
+
+            avg_similarity = total_similarity / len(doc1_paragraphs)
+            return avg_similarity
+        else:
+            # Compare each paragraph
+            for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
+                if ignore_case:
+                    p1, p2 = p1.lower(), p2.lower()
+                if p1 != p2:
+                    # show the difference
+                    print("=== First Paragraph ===")
+                    print(f"\033[92m{repr(p1)}\033[0m")  # Green color for p1, repr() shows hidden chars
+                    print("=== Second Paragraph ===")
+                    print(f"\033[91m{repr(p2)}\033[0m")  # Red color for p2, repr() shows hidden chars
+                    print("=" * 50)  # Clear boundary
+                    return 0

    return 1