Enhance image text comparison functionality with detailed logging

- Added logging for OCR results and text matching outcomes in compare_image_text function.
- Updated JSON examples to support multiple expected results and improved structure for evaluator functions.
- Enhanced handling of expected text rules to include multiple variations for better matching accuracy.
This commit is contained in:
yuanmengqi
2025-07-10 22:32:53 +00:00
parent 4e3446d6fe
commit 6897e5320d
4 changed files with 161 additions and 18 deletions

View File

@@ -297,8 +297,24 @@ def compare_image_text(image_path, rule):
reader = easyocr.Reader(['en'])
result = reader.readtext(image_path)
extracted_text = ' '.join([entry[1] for entry in result])
# Log OCR results
logger.info(f"OCR extracted texts: {[entry[1] for entry in result]}")
logger.info(f"Combined extracted text: {extracted_text}")
if rule['type'] == 'text':
return 1 if rule['text'] in extracted_text else 0
target_text = rule['text']
match_found = target_text in extracted_text
# Log matching results
logger.info(f"Target text: '{target_text}'")
logger.info(f"Match found: {match_found}")
if match_found:
logger.info("✅ Text matching successful!")
else:
logger.info("❌ Text matching failed!")
return 1 if match_found else 0
else:
raise ValueError("Unsupported rule type")
@@ -986,3 +1002,10 @@ def compare_unique_train_records(processed_file, expected_files, **kwargs):
return 0
return 1
if __name__ == "__main__":
image_path = "/home/ubuntu/OSWorld/cache/02ce9a50-7af2-47ed-8596-af0c230501f8/ls.png"
print(compare_image_text(image_path, {
"type": "text",
"text": "ls"
}))