Enhance image text comparison functionality with detailed logging

- Added logging for OCR results and text matching outcomes in compare_image_text function. - Updated JSON examples to support multiple expected results and improved structure for evaluator functions. - Enhanced handling of expected text rules to include multiple variations for better matching accuracy.
2025-07-10 22:32:53 +00:00
parent 4e3446d6fe
commit 6897e5320d
4 changed files with 161 additions and 18 deletions
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -297,8 +297,24 @@ def compare_image_text(image_path, rule):
    reader = easyocr.Reader(['en'])
    result = reader.readtext(image_path)
    extracted_text = ' '.join([entry[1] for entry in result])
+    
+    # Log OCR results
+    logger.info(f"OCR extracted texts: {[entry[1] for entry in result]}")
+    logger.info(f"Combined extracted text: {extracted_text}")
+    
    if rule['type'] == 'text':
-        return 1 if rule['text'] in extracted_text else 0
+        target_text = rule['text']
+        match_found = target_text in extracted_text
+        
+        # Log matching results
+        logger.info(f"Target text: '{target_text}'")
+        logger.info(f"Match found: {match_found}")
+        if match_found:
+            logger.info("✅ Text matching successful!")
+        else:
+            logger.info("❌ Text matching failed!")
+        
+        return 1 if match_found else 0
    else:
        raise ValueError("Unsupported rule type")

@@ -986,3 +1002,10 @@ def compare_unique_train_records(processed_file, expected_files, **kwargs):
        return 0

    return 1
+
+if __name__ == "__main__":
+    image_path = "/home/ubuntu/OSWorld/cache/02ce9a50-7af2-47ed-8596-af0c230501f8/ls.png"
+    print(compare_image_text(image_path, {
+        "type": "text",
+        "text": "ls"
+      }))
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -402,12 +402,17 @@ def check_direct_json_object(result, rules) -> float:
                    expected_value_list = expected_json.get(key)
                    logger.info(f"[DEBUG] Checking list key '{key}': expected_list={expected_value_list}, actual='{result.get(key)}'")
                    for each_expected_value in expected_value_list:
+                        # Handle both list and string cases
                        if isinstance(result.get(key), list) and each_expected_value in result.get(key):
                            flag = 1
                            logger.info(f"[DEBUG] Found expected value '{each_expected_value}' in result list for key '{key}'")
                            break
+                        elif isinstance(result.get(key), str) and each_expected_value == result.get(key):
+                            flag = 1
+                            logger.info(f"[DEBUG] Found expected value '{each_expected_value}' matches result string for key '{key}'")
+                            break
                    if flag == 0:
-                        logger.info(f"[DEBUG] No expected values found in result list for key '{key}', returning 0.0")
+                        logger.info(f"[DEBUG] No expected values found in result for key '{key}', returning 0.0")
                        return 0.
                elif isinstance(expected_json.get(key), str):
                    expected_str = expected_json.get(key)
--- a/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json
+++ b/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json
@@ -44,19 +44,52 @@
    "os"
  ],
  "evaluator": {
-    "func": "compare_image_text",
-    "result": {
-      "type": "vm_file",
-      "path": "/home/user/Desktop/ls.png",
-      "dest": "ls.png"
-    },
-    "expected": {
-      "type": "rule",
-      "rules": {
-        "type": "text",
-        "text": "ls"
+    "func": [
+      "compare_image_text",
+      "compare_image_text",
+      "compare_image_text"
+    ],
+    "conj": "or",
+    "result": [
+      {
+        "type": "vm_file",
+        "path": "/home/user/Desktop/ls.png",
+        "dest": "ls.png"
+      },
+      {
+        "type": "vm_file",
+        "path": "/home/user/Desktop/ls.png",
+        "dest": "ls.png"
+      },
+      {
+        "type": "vm_file",
+        "path": "/home/user/Desktop/ls.png",
+        "dest": "ls.png"
      }
-    }
+    ],
+    "expected": [
+      {
+        "type": "rule",
+        "rules": {
+          "type": "text",
+          "text": "ls"
+        }
+      },
+      {
+        "type": "rule",
+        "rules": {
+          "type": "text",
+          "text": "1s"
+        }
+      },
+      {
+        "type": "rule",
+        "rules": {
+          "type": "text",
+          "text": "1s"
+        }
+      }
+    ]
  },
  "proxy": false
 }
--- a/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json
+++ b/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json
@@ -69,16 +69,98 @@
      "rules": {
        "expected": {
          "ec076282f61ba74642e94b5a6a1250c6988204d59d9b02936606b6b8ef1e4433": [
-            "Kilimanjaro"
+            "Kili",
+            "kili",
+            "Kili.jpg",
+            "kili.jpg",
+            "Kilimanjaro",
+            "kilimanjaro",
+            "Kilimanjaro.jpg",
+            "kilimanjaro.jpg",
+            "Mount Kilimanjaro",
+            "mount kilimanjaro",
+            "Mount Kilimanjaro.jpg",
+            "mount kilimanjaro.jpg",
+            "Kilimanjaro Mountain",
+            "kilimanjaro mountain",
+            "Kilimanjaro Mountain.jpg",
+            "kilimanjaro mountain.jpg"
          ],
          "6ed4239ecc2be3ec15ad65a78c5c823b9004d640b8cc83a6a7af5930f354de91": [
-            "Himalayas",
            "Everest",
-            "Sagarmatha"
+            "everest",
+            "Everest.jpg",
+            "everest.jpg",
+            "Mount Everest",
+            "mount everest",
+            "Mount Everest.jpg",
+            "mount everest.jpg",
+            "Everest Mountain",
+            "everest mountain",
+            "Everest Mountain.jpg",
+            "everest mountain.jpg",
+            "Sagarmatha",
+            "sagarmatha",
+            "Sagarmatha.jpg",
+            "sagarmatha.jpg",
+            "Sagarmatha Mountain",
+            "sagarmatha mountain",
+            "Sagarmatha Mountain.jpg",
+            "sagarmatha mountain.jpg",
+            "Chomolungma",
+            "chomolungma",
+            "Chomolungma.jpg",
+            "chomolungma.jpg",
+            "Qomolangma",
+            "qomolangma",
+            "Qomolangma.jpg",
+            "qomolangma.jpg",
+            "Himalayas",
+            "himalayas",
+            "Himalayas.jpg",
+            "himalayas.jpg",
+            "Himalayas Mountain",
+            "himalayas mountain",
+            "Himalayas Mountain.jpg",
+            "himalayas mountain.jpg",
+            "Himalaya",
+            "himalaya",
+            "Himalaya.jpg",
+            "himalaya.jpg",
+            "Himalaya Mountain",
+            "himalaya mountain",
+            "Himalaya Mountain.jpg",
+            "himalaya mountain.jpg"
          ],
          "79f45d40d8413d4e81f1b9734ea39e58622cafd79e12bab32959643fc245147c": [
            "Hua",
-            "hua"
+            "hua",
+            "Hua.jpg",
+            "hua.jpg",
+            "Mount Hua",
+            "mount hua",
+            "Mount Hua.jpg",
+            "mount hua.jpg",
+            "Hua Mountain",
+            "hua mountain",
+            "Hua Mountain.jpg",
+            "hua mountain.jpg",
+            "Huashan",
+            "huashan",
+            "Huashan.jpg",
+            "huashan.jpg",
+            "Hua Shan",
+            "hua shan",
+            "Hua Shan.jpg",
+            "hua shan.jpg",
+            "Huashan Mountain",
+            "huashan mountain",
+            "Huashan Mountain.jpg",
+            "huashan mountain.jpg",
+            "Hua Shan Mountain",
+            "hua shan mountain",
+            "Hua Shan Mountain.jpg",
+            "hua shan mountain.jpg"
          ]
        },
        "expect_in_result": true