Enhance image text comparison functionality with detailed logging

- Added logging for OCR results and text matching outcomes in compare_image_text function. - Updated JSON examples to support multiple expected results and improved structure for evaluator functions. - Enhanced handling of expected text rules to include multiple variations for better matching accuracy.
2025-07-10 22:32:53 +00:00
parent 4e3446d6fe
commit 6897e5320d
4 changed files with 161 additions and 18 deletions
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -297,8 +297,24 @@ def compare_image_text(image_path, rule):
    reader = easyocr.Reader(['en'])
    result = reader.readtext(image_path)
    extracted_text = ' '.join([entry[1] for entry in result])
+    
+    # Log OCR results
+    logger.info(f"OCR extracted texts: {[entry[1] for entry in result]}")
+    logger.info(f"Combined extracted text: {extracted_text}")
+    
    if rule['type'] == 'text':
-        return 1 if rule['text'] in extracted_text else 0
+        target_text = rule['text']
+        match_found = target_text in extracted_text
+        
+        # Log matching results
+        logger.info(f"Target text: '{target_text}'")
+        logger.info(f"Match found: {match_found}")
+        if match_found:
+            logger.info("✅ Text matching successful!")
+        else:
+            logger.info("❌ Text matching failed!")
+        
+        return 1 if match_found else 0
    else:
        raise ValueError("Unsupported rule type")

@@ -986,3 +1002,10 @@ def compare_unique_train_records(processed_file, expected_files, **kwargs):
        return 0

    return 1
+
+if __name__ == "__main__":
+    image_path = "/home/ubuntu/OSWorld/cache/02ce9a50-7af2-47ed-8596-af0c230501f8/ls.png"
+    print(compare_image_text(image_path, {
+        "type": "text",
+        "text": "ls"
+      }))