From 2e3a4a5ba9e5e61ca2f29c2a5a6316cd1056f486 Mon Sep 17 00:00:00 2001 From: yuanmengqi Date: Tue, 1 Jul 2025 15:57:14 +0000 Subject: [PATCH] fix tasks --- desktop_env/evaluators/getters/chrome.py | 2 ++ desktop_env/evaluators/metrics/general.py | 10 ++++++++-- .../47543840-672a-467d-80df-8f7c3b9788c9.json | 4 ++-- .../6766f2b8-8a72-417f-a9e5-56fcaa735837.json | 2 +- .../f5d96daf-83a8-4c86-9686-bada31fc66ab.json | 20 ++++++++++++++----- .../fc6d8143-9452-4171-9459-7f515143419a.json | 8 ++++---- .../settings/proxy/dataimpulse.json | 4 ++-- manual_examine.py | 3 +-- 8 files changed, 35 insertions(+), 18 deletions(-) diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 02a9d99..c9c852c 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -1444,6 +1444,8 @@ def get_active_tab_url_parse(env, config: Dict[str, Any]): # change original key to new key, keep value unchange value = extracted_params.pop(key) extracted_params[config["replace"][key]] = value + if config.get("split_list", False): + extracted_params = {key: extracted_params[key].split(',') for key in extracted_params.keys()} return extracted_params diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index a401b74..c2fed18 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -322,8 +322,14 @@ def check_direct_json_object(result, rules) -> float: expected_json = rules["expected"] for key in expected_json.keys(): expected_value = expected_json.get(key) - if expected_value != result.get(key): - return 0. + if expected_json.get("ignore_list_order", False): + expected_value = sorted(expected_value) + result_value = sorted(result.get(key)) + if expected_value != result_value: + return 0. + else: + if expected_value != result.get(key): + return 0. return 1.0 else: expected_json = rules["expected"] diff --git a/evaluation_examples/examples/chrome/47543840-672a-467d-80df-8f7c3b9788c9.json b/evaluation_examples/examples/chrome/47543840-672a-467d-80df-8f7c3b9788c9.json index 21e96ad..c74fdcf 100644 --- a/evaluation_examples/examples/chrome/47543840-672a-467d-80df-8f7c3b9788c9.json +++ b/evaluation_examples/examples/chrome/47543840-672a-467d-80df-8f7c3b9788c9.json @@ -1,7 +1,7 @@ { "id": "47543840-672a-467d-80df-8f7c3b9788c9", "snapshot": "chrome", - "instruction": "Find and select the car with the most number of seats to pick up in Boston Logan Intl Airport from 10th next month to 11th next month.", + "instruction": "Show me the cars available for pickup at Boston Logan Intl Airport from the 10th to the 11th of next month, sorted by the number of seats to find the largest capacity.", "source": "test_task_1", "config": [ { @@ -75,7 +75,7 @@ "goto_prefix": "https://www.", "category": "xpath", "xpathObject": { - "/html/body/div[6]/div[2]/div[1]/div/div/div[2]/div[1]/section[1]/div/form/div[1]/div[2]/div/a": "rank" + "/html/body/div[6]/div[2]/div[1]/div/div/div[2]/section[1]/div[1]/form/div[1]/div[1]/div[2]/div/a": "rank" } } ], diff --git a/evaluation_examples/examples/chrome/6766f2b8-8a72-417f-a9e5-56fcaa735837.json b/evaluation_examples/examples/chrome/6766f2b8-8a72-417f-a9e5-56fcaa735837.json index 4d6357f..a33fb4c 100644 --- a/evaluation_examples/examples/chrome/6766f2b8-8a72-417f-a9e5-56fcaa735837.json +++ b/evaluation_examples/examples/chrome/6766f2b8-8a72-417f-a9e5-56fcaa735837.json @@ -1,7 +1,7 @@ { "id": "6766f2b8-8a72-417f-a9e5-56fcaa735837", "snapshot": "chrome", - "instruction": "Could you help me install the unpacked extension at /home/user/Desktop/ in Chrome?", + "instruction": "Could you help me unzip the downloaded extension file from /home/user/Desktop/ to /home/user/Desktop/ and configure it in Chrome's extensions?", "source": "https://support.google.com/chrome/thread/205881926/it-s-possible-to-load-unpacked-extension-automatically-in-chrome?hl=en", "config": [ { diff --git a/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json b/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json index 29c13b0..32101f6 100644 --- a/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json +++ b/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json @@ -53,16 +53,26 @@ "chrome" ], "evaluator": { - "func": "is_expected_active_tab", + "func": "check_direct_json_object", "result": { - "type": "active_url_from_accessTree", - "goto_prefix": "https://www." + "type": "active_tab_url_parse", + "goto_prefix": "https://www.", + "parse_keys": [ + "modelList" + ], + "split_list": true }, "expected": { "type": "rule", "rules": { - "type": "url", - "url": "https://www.apple.com/iphone/compare/?modelList=iphone-15-pro-max,iphone-15-pro,iphone-13-pro-max" + "expected": { + "modelList": [ + "iphone-15-pro-max", + "iphone-14-pro-max", + "iphone-13-pro-max" + ] + }, + "ignore_list_order": true } } }, diff --git a/evaluation_examples/examples/chrome/fc6d8143-9452-4171-9459-7f515143419a.json b/evaluation_examples/examples/chrome/fc6d8143-9452-4171-9459-7f515143419a.json index cd2383d..985200a 100644 --- a/evaluation_examples/examples/chrome/fc6d8143-9452-4171-9459-7f515143419a.json +++ b/evaluation_examples/examples/chrome/fc6d8143-9452-4171-9459-7f515143419a.json @@ -49,10 +49,10 @@ "goto_prefix": "https://www.", "category": "class", "class_singleObject": { - "search-date": "time" + "mach-flight-context-info__wrapper--date": "time" }, - "class_multiObject": { - "search-segment-cities__city": { + "class_multiObject_child": { + "mach-flight-context-info__wrapper__info--separator": { "0": "start", "1": "end" } @@ -67,7 +67,7 @@ "expected": { "start": "NYC", "end": "CMH", - "time": "{DoW}, {Month} {DayD}, {Year}" + "time": "{DoW}, {Month} {Day0D}, {Year}" } } } diff --git a/evaluation_examples/settings/proxy/dataimpulse.json b/evaluation_examples/settings/proxy/dataimpulse.json index 34b7d59..d7f1cfb 100644 --- a/evaluation_examples/settings/proxy/dataimpulse.json +++ b/evaluation_examples/settings/proxy/dataimpulse.json @@ -2,8 +2,8 @@ { "host": "gw.dataimpulse.com", "port": 823, - "username": "fba5ac061fe18be70c6c", - "password": "3b5669b6640fc80c", + "username": "67e5faf31654b923f06b", + "password": "26a3158d346abdfa", "protocol": "http", "provider": "dataimpulse", "type": "residential", diff --git a/manual_examine.py b/manual_examine.py index ddd69b7..8e8eaf2 100644 --- a/manual_examine.py +++ b/manual_examine.py @@ -266,8 +266,7 @@ def main(): screen_size=(args.screen_width, args.screen_height), headless=args.headless, os_type="Ubuntu", - require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"], - enable_proxy=True + require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"] ) # 执行手动检查