diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index f498ae9..cb2a176 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -827,8 +827,8 @@ def get_active_tab_info(env, config: Dict[str, str]): try: logger.info(f"[ACTIVE_TAB_INFO] Navigating to URL: {active_tab_url}") - page.goto(active_tab_url, wait_until='networkidle', timeout=timeout_ms) - page.wait_for_load_state('networkidle', timeout=timeout_ms) # Wait for the 'load' event to complete + page.goto(active_tab_url, wait_until='load', timeout=timeout_ms) + page.wait_for_load_state('load', timeout=timeout_ms) # Wait for the 'load' event to complete active_tab_info = { 'title': page.title(), diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 4a1598c..41dd3d6 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -2,6 +2,8 @@ import functools import itertools import logging import os.path +import re +import unicodedata # import operator from numbers import Number @@ -744,6 +746,18 @@ def compare_table(result: str, expected: str = None, **options) -> float: # }}} function compare_table # +def _normalize_city_string(value: Any) -> str: + """Lowercase, strip punctuation, and remove accents for tolerant matching.""" + if value is None: + return "" + if not isinstance(value, str): + value = str(value) + normalized = unicodedata.normalize("NFKD", value) + normalized = "".join(ch for ch in normalized if not unicodedata.combining(ch)) + normalized = re.sub(r"[^a-z0-9]+", " ", normalized.lower()) + return normalized.strip() + + def compare_conference_city_in_order(actual_city_list_path, expected_city): expected_city_list = expected_city["expected"] wb = openpyxl.load_workbook(actual_city_list_path) @@ -752,38 +766,35 @@ def compare_conference_city_in_order(actual_city_list_path, expected_city): for row in sheet["C2:C22"]: for cell in row: actual_city_list.append(cell.value) - # expected_city is the city that we want to compare with the actual city list - # must in order index - # debug + try: - for i in range(len(actual_city_list)): - if isinstance(expected_city_list[i], str): - if expected_city_list[i] not in actual_city_list[i]: - logger.debug( - f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}" - ) - print( - f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}" - ) - return 0.0 - - elif isinstance(expected_city_list[i], List): - if not any( - possible_str in actual_city_list[i] - for possible_str in expected_city_list[i] - ): - logger.debug( - f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}" - ) - print( - f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}" - ) - return 0.0 + for i, actual_city in enumerate(actual_city_list): + actual_normalized = _normalize_city_string(actual_city) + expected_entry = expected_city_list[i] + if isinstance(expected_entry, str): + expected_candidates = [expected_entry] + elif isinstance(expected_entry, List): + expected_candidates = expected_entry else: raise TypeError("Expected city should be a string or a list of strings") - except: + matched = False + for candidate in expected_candidates: + normalized_candidate = _normalize_city_string(candidate) + if normalized_candidate and normalized_candidate in actual_normalized: + matched = True + break + + if not matched: + logger.debug( + f"Expected city {expected_entry}; Actual city {actual_city}" + ) + print(f"Expected city {expected_entry}; Actual city {actual_city}") + return 0.0 + + except Exception as exc: + logger.error(f"Error comparing conference cities: {exc}") return 0.0 return 1.0 diff --git a/evaluation_examples/examples/chrome/c1fa57f3-c3db-4596-8f09-020701085416.json b/evaluation_examples/examples/chrome/c1fa57f3-c3db-4596-8f09-020701085416.json index 64ff372..b63b1c6 100644 --- a/evaluation_examples/examples/chrome/c1fa57f3-c3db-4596-8f09-020701085416.json +++ b/evaluation_examples/examples/chrome/c1fa57f3-c3db-4596-8f09-020701085416.json @@ -52,7 +52,7 @@ "type": "rule", "rules": { "expected": [ - "united.com/en/us/checked-bag-fee-calculator" + "united\\.com/en/us/checked-bag-fee-calculator(/.*)?" ] } } diff --git a/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json b/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json index ac1919a..d6a0709 100644 --- a/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json +++ b/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json @@ -60,7 +60,7 @@ "rules": { "expected": [ "Zoom Chrome Extension", - "Speechify Text to Speech Voice Reader", + "Speechify — Voice AI Assistant", "React Developer Tools", "Momentum", "Google Translate" diff --git a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json index f99e0bd..004c475 100644 --- a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json +++ b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json @@ -40,8 +40,8 @@ }, "result": { "type": "vm_file", - "path": "/home/user/Recruitment_and_retention_of_health_professionals_across_Europe.zip", - "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip" + "path": "/home/user/essay_submission.zip", + "dest": "essay_submission.zip" } }, "proxy": false,