Task fix batch (#383)

* update 873cafdd-a581-47f6-8b33-b9696ddb7b05 task eval

* c1fa57f3-c3db-4596-8f09-020701085416 fix, add tolerance to url matching

* 8df7e444-8e06-4f93-8a1a-c5c974269d82 add more clear instruction to the filename for compress

* add address string normalization for 6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a

---------

Co-authored-by: Jiaqi <dengjiaqi@moonshot.cn>
This commit is contained in:
MillanK
2025-11-19 17:24:25 +08:00
committed by GitHub
parent 903ed36715
commit cbc3b590ff
5 changed files with 44 additions and 33 deletions

View File

@@ -827,8 +827,8 @@ def get_active_tab_info(env, config: Dict[str, str]):
try:
logger.info(f"[ACTIVE_TAB_INFO] Navigating to URL: {active_tab_url}")
page.goto(active_tab_url, wait_until='networkidle', timeout=timeout_ms)
page.wait_for_load_state('networkidle', timeout=timeout_ms) # Wait for the 'load' event to complete
page.goto(active_tab_url, wait_until='load', timeout=timeout_ms)
page.wait_for_load_state('load', timeout=timeout_ms) # Wait for the 'load' event to complete
active_tab_info = {
'title': page.title(),

View File

@@ -2,6 +2,8 @@ import functools
import itertools
import logging
import os.path
import re
import unicodedata
# import operator
from numbers import Number
@@ -744,6 +746,18 @@ def compare_table(result: str, expected: str = None, **options) -> float:
# }}} function compare_table #
def _normalize_city_string(value: Any) -> str:
"""Lowercase, strip punctuation, and remove accents for tolerant matching."""
if value is None:
return ""
if not isinstance(value, str):
value = str(value)
normalized = unicodedata.normalize("NFKD", value)
normalized = "".join(ch for ch in normalized if not unicodedata.combining(ch))
normalized = re.sub(r"[^a-z0-9]+", " ", normalized.lower())
return normalized.strip()
def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
wb = openpyxl.load_workbook(actual_city_list_path)
@@ -752,38 +766,35 @@ def compare_conference_city_in_order(actual_city_list_path, expected_city):
for row in sheet["C2:C22"]:
for cell in row:
actual_city_list.append(cell.value)
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
try:
for i in range(len(actual_city_list)):
if isinstance(expected_city_list[i], str):
if expected_city_list[i] not in actual_city_list[i]:
logger.debug(
f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}"
)
print(
f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}"
)
return 0.0
elif isinstance(expected_city_list[i], List):
if not any(
possible_str in actual_city_list[i]
for possible_str in expected_city_list[i]
):
logger.debug(
f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}"
)
print(
f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}"
)
return 0.0
for i, actual_city in enumerate(actual_city_list):
actual_normalized = _normalize_city_string(actual_city)
expected_entry = expected_city_list[i]
if isinstance(expected_entry, str):
expected_candidates = [expected_entry]
elif isinstance(expected_entry, List):
expected_candidates = expected_entry
else:
raise TypeError("Expected city should be a string or a list of strings")
except:
matched = False
for candidate in expected_candidates:
normalized_candidate = _normalize_city_string(candidate)
if normalized_candidate and normalized_candidate in actual_normalized:
matched = True
break
if not matched:
logger.debug(
f"Expected city {expected_entry}; Actual city {actual_city}"
)
print(f"Expected city {expected_entry}; Actual city {actual_city}")
return 0.0
except Exception as exc:
logger.error(f"Error comparing conference cities: {exc}")
return 0.0
return 1.0

View File

@@ -52,7 +52,7 @@
"type": "rule",
"rules": {
"expected": [
"united.com/en/us/checked-bag-fee-calculator"
"united\\.com/en/us/checked-bag-fee-calculator(/.*)?"
]
}
}

View File

@@ -60,7 +60,7 @@
"rules": {
"expected": [
"Zoom Chrome Extension",
"Speechify Text to Speech Voice Reader",
"Speechify — Voice AI Assistant",
"React Developer Tools",
"Momentum",
"Google Translate"

View File

@@ -40,8 +40,8 @@
},
"result": {
"type": "vm_file",
"path": "/home/user/Recruitment_and_retention_of_health_professionals_across_Europe.zip",
"dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip"
"path": "/home/user/essay_submission.zip",
"dest": "essay_submission.zip"
}
},
"proxy": false,