Enhance image text comparison functionality with detailed logging

- Added logging for OCR results and text matching outcomes in compare_image_text function.
- Updated JSON examples to support multiple expected results and improved structure for evaluator functions.
- Enhanced handling of expected text rules to include multiple variations for better matching accuracy.
This commit is contained in:
yuanmengqi
2025-07-10 22:32:53 +00:00
parent 4e3446d6fe
commit 6897e5320d
4 changed files with 161 additions and 18 deletions

View File

@@ -297,8 +297,24 @@ def compare_image_text(image_path, rule):
reader = easyocr.Reader(['en'])
result = reader.readtext(image_path)
extracted_text = ' '.join([entry[1] for entry in result])
# Log OCR results
logger.info(f"OCR extracted texts: {[entry[1] for entry in result]}")
logger.info(f"Combined extracted text: {extracted_text}")
if rule['type'] == 'text':
return 1 if rule['text'] in extracted_text else 0
target_text = rule['text']
match_found = target_text in extracted_text
# Log matching results
logger.info(f"Target text: '{target_text}'")
logger.info(f"Match found: {match_found}")
if match_found:
logger.info("✅ Text matching successful!")
else:
logger.info("❌ Text matching failed!")
return 1 if match_found else 0
else:
raise ValueError("Unsupported rule type")
@@ -986,3 +1002,10 @@ def compare_unique_train_records(processed_file, expected_files, **kwargs):
return 0
return 1
if __name__ == "__main__":
image_path = "/home/ubuntu/OSWorld/cache/02ce9a50-7af2-47ed-8596-af0c230501f8/ls.png"
print(compare_image_text(image_path, {
"type": "text",
"text": "ls"
}))

View File

@@ -402,12 +402,17 @@ def check_direct_json_object(result, rules) -> float:
expected_value_list = expected_json.get(key)
logger.info(f"[DEBUG] Checking list key '{key}': expected_list={expected_value_list}, actual='{result.get(key)}'")
for each_expected_value in expected_value_list:
# Handle both list and string cases
if isinstance(result.get(key), list) and each_expected_value in result.get(key):
flag = 1
logger.info(f"[DEBUG] Found expected value '{each_expected_value}' in result list for key '{key}'")
break
elif isinstance(result.get(key), str) and each_expected_value == result.get(key):
flag = 1
logger.info(f"[DEBUG] Found expected value '{each_expected_value}' matches result string for key '{key}'")
break
if flag == 0:
logger.info(f"[DEBUG] No expected values found in result list for key '{key}', returning 0.0")
logger.info(f"[DEBUG] No expected values found in result for key '{key}', returning 0.0")
return 0.
elif isinstance(expected_json.get(key), str):
expected_str = expected_json.get(key)

View File

@@ -44,19 +44,52 @@
"os"
],
"evaluator": {
"func": "compare_image_text",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/ls.png",
"dest": "ls.png"
},
"expected": {
"type": "rule",
"rules": {
"type": "text",
"text": "ls"
"func": [
"compare_image_text",
"compare_image_text",
"compare_image_text"
],
"conj": "or",
"result": [
{
"type": "vm_file",
"path": "/home/user/Desktop/ls.png",
"dest": "ls.png"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/ls.png",
"dest": "ls.png"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/ls.png",
"dest": "ls.png"
}
}
],
"expected": [
{
"type": "rule",
"rules": {
"type": "text",
"text": "ls"
}
},
{
"type": "rule",
"rules": {
"type": "text",
"text": "1s"
}
},
{
"type": "rule",
"rules": {
"type": "text",
"text": "1s"
}
}
]
},
"proxy": false
}

View File

@@ -69,16 +69,98 @@
"rules": {
"expected": {
"ec076282f61ba74642e94b5a6a1250c6988204d59d9b02936606b6b8ef1e4433": [
"Kilimanjaro"
"Kili",
"kili",
"Kili.jpg",
"kili.jpg",
"Kilimanjaro",
"kilimanjaro",
"Kilimanjaro.jpg",
"kilimanjaro.jpg",
"Mount Kilimanjaro",
"mount kilimanjaro",
"Mount Kilimanjaro.jpg",
"mount kilimanjaro.jpg",
"Kilimanjaro Mountain",
"kilimanjaro mountain",
"Kilimanjaro Mountain.jpg",
"kilimanjaro mountain.jpg"
],
"6ed4239ecc2be3ec15ad65a78c5c823b9004d640b8cc83a6a7af5930f354de91": [
"Himalayas",
"Everest",
"Sagarmatha"
"everest",
"Everest.jpg",
"everest.jpg",
"Mount Everest",
"mount everest",
"Mount Everest.jpg",
"mount everest.jpg",
"Everest Mountain",
"everest mountain",
"Everest Mountain.jpg",
"everest mountain.jpg",
"Sagarmatha",
"sagarmatha",
"Sagarmatha.jpg",
"sagarmatha.jpg",
"Sagarmatha Mountain",
"sagarmatha mountain",
"Sagarmatha Mountain.jpg",
"sagarmatha mountain.jpg",
"Chomolungma",
"chomolungma",
"Chomolungma.jpg",
"chomolungma.jpg",
"Qomolangma",
"qomolangma",
"Qomolangma.jpg",
"qomolangma.jpg",
"Himalayas",
"himalayas",
"Himalayas.jpg",
"himalayas.jpg",
"Himalayas Mountain",
"himalayas mountain",
"Himalayas Mountain.jpg",
"himalayas mountain.jpg",
"Himalaya",
"himalaya",
"Himalaya.jpg",
"himalaya.jpg",
"Himalaya Mountain",
"himalaya mountain",
"Himalaya Mountain.jpg",
"himalaya mountain.jpg"
],
"79f45d40d8413d4e81f1b9734ea39e58622cafd79e12bab32959643fc245147c": [
"Hua",
"hua"
"hua",
"Hua.jpg",
"hua.jpg",
"Mount Hua",
"mount hua",
"Mount Hua.jpg",
"mount hua.jpg",
"Hua Mountain",
"hua mountain",
"Hua Mountain.jpg",
"hua mountain.jpg",
"Huashan",
"huashan",
"Huashan.jpg",
"huashan.jpg",
"Hua Shan",
"hua shan",
"Hua Shan.jpg",
"hua shan.jpg",
"Huashan Mountain",
"huashan mountain",
"Huashan Mountain.jpg",
"huashan mountain.jpg",
"Hua Shan Mountain",
"hua shan mountain",
"Hua Shan Mountain.jpg",
"hua shan mountain.jpg"
]
},
"expect_in_result": true