Enhance image text comparison functionality with detailed logging

- Added logging for OCR results and text matching outcomes in compare_image_text function.
- Updated JSON examples to support multiple expected results and improved structure for evaluator functions.
- Enhanced handling of expected text rules to include multiple variations for better matching accuracy.
This commit is contained in:
yuanmengqi
2025-07-10 22:32:53 +00:00
parent 4e3446d6fe
commit 6897e5320d
4 changed files with 161 additions and 18 deletions

View File

@@ -297,8 +297,24 @@ def compare_image_text(image_path, rule):
reader = easyocr.Reader(['en'])
result = reader.readtext(image_path)
extracted_text = ' '.join([entry[1] for entry in result])
# Log OCR results
logger.info(f"OCR extracted texts: {[entry[1] for entry in result]}")
logger.info(f"Combined extracted text: {extracted_text}")
if rule['type'] == 'text':
return 1 if rule['text'] in extracted_text else 0
target_text = rule['text']
match_found = target_text in extracted_text
# Log matching results
logger.info(f"Target text: '{target_text}'")
logger.info(f"Match found: {match_found}")
if match_found:
logger.info("✅ Text matching successful!")
else:
logger.info("❌ Text matching failed!")
return 1 if match_found else 0
else:
raise ValueError("Unsupported rule type")
@@ -986,3 +1002,10 @@ def compare_unique_train_records(processed_file, expected_files, **kwargs):
return 0
return 1
if __name__ == "__main__":
image_path = "/home/ubuntu/OSWorld/cache/02ce9a50-7af2-47ed-8596-af0c230501f8/ls.png"
print(compare_image_text(image_path, {
"type": "text",
"text": "ls"
}))

View File

@@ -402,12 +402,17 @@ def check_direct_json_object(result, rules) -> float:
expected_value_list = expected_json.get(key)
logger.info(f"[DEBUG] Checking list key '{key}': expected_list={expected_value_list}, actual='{result.get(key)}'")
for each_expected_value in expected_value_list:
# Handle both list and string cases
if isinstance(result.get(key), list) and each_expected_value in result.get(key):
flag = 1
logger.info(f"[DEBUG] Found expected value '{each_expected_value}' in result list for key '{key}'")
break
elif isinstance(result.get(key), str) and each_expected_value == result.get(key):
flag = 1
logger.info(f"[DEBUG] Found expected value '{each_expected_value}' matches result string for key '{key}'")
break
if flag == 0:
logger.info(f"[DEBUG] No expected values found in result list for key '{key}', returning 0.0")
logger.info(f"[DEBUG] No expected values found in result for key '{key}', returning 0.0")
return 0.
elif isinstance(expected_json.get(key), str):
expected_str = expected_json.get(key)