From 97ed6f99b0039c76eb82d1210b299c7fef87d85b Mon Sep 17 00:00:00 2001 From: yuanmengqi Date: Sat, 12 Jul 2025 20:28:55 +0000 Subject: [PATCH] Final review multi_apps fix the rest part --- desktop_env/evaluators/metrics/chrome.py | 24 +++++++++++++------ desktop_env/evaluators/metrics/gimp.py | 5 +++- .../a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json | 14 +++++++---- .../e8172110-ec08-421b-a6f5-842e6451911f.json | 16 +++++-------- .../f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json | 2 +- 5 files changed, 38 insertions(+), 23 deletions(-) diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index 632c53e..6c3811f 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -29,8 +29,8 @@ def is_expected_active_tab(active_tab_info: Dict[str, str], rule: Dict[str, Any] actual_url = active_tab_info.get('url', None) else: actual_url = active_tab_info - print("expected_url: {}".format(expected_url)) - print("actual_url: {}".format(actual_url)) + logger.info("expected_url: {}".format(expected_url)) + logger.info("actual_url: {}".format(actual_url)) return 1 if compare_urls(expected_url, actual_url) else 0 else: logger.error(f"Unknown type: {match_type}") @@ -76,23 +76,26 @@ def is_expected_url_pattern_match(result, rules) -> float: if type(result) == dict: result_url = result["url"] - print("result url: {}".format(result_url)) + logger.info("result url: {}".format(result_url)) else: result_url = result # expect_regex = re.compile(rules["expected"]) patterns = rules["expected"] - print("expected_regex: {}".format(patterns)) + logger.info("expected_regex: {}".format(patterns)) for pattern in patterns: match = re.search(pattern, result_url) - print(match) + logger.info("match: {}".format(match)) if not match: return 0. return 1. def is_expected_installed_extensions(installed_extensions, expected) -> float: - print("installed_extensions: ") - print(installed_extensions) + if not installed_extensions: + return 0. + + logger.info("installed_extensions: ") + logger.info(installed_extensions) expected_extensions = expected["expected"] # whether the expected extensions are installed @@ -109,6 +112,8 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f """ Checks if the expected tabs are open in Chrome. """ + if not open_tabs: + return 0. match_type = rule['type'] @@ -146,8 +151,10 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float: bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None) if liked_authors_folder: # Check if it contains the specified URLs + logger.info("'Liked Authors' folder exists") liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if bookmark['type'] == 'url'] + logger.info("Here is the 'Liked Authors' folder's urls: {}".format(liked_authors_urls)) urls = rule['urls'] @@ -168,6 +175,9 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float: def is_expected_search_query(active_tab_info: Dict[str, str], rules: Dict[str, Any]) -> float: + if not active_tab_info: + return 0. + expected = rules['expect'] pattern = expected['pattern'] matched = re.search(pattern, active_tab_info['url']) diff --git a/desktop_env/evaluators/metrics/gimp.py b/desktop_env/evaluators/metrics/gimp.py index 5dddd78..a6dcc29 100644 --- a/desktop_env/evaluators/metrics/gimp.py +++ b/desktop_env/evaluators/metrics/gimp.py @@ -396,7 +396,10 @@ def check_structure_sim_resized(src_path, tgt_path): # Check if the structure is similar structure_same = structure_check_by_ssim(img_src_resized, img_tgt) - return structure_same + if structure_same: + return 1. + else: + return 0. def check_contrast_increase_and_structure_sim(src_path, tgt_path): diff --git a/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json index 37af93b..a612d38 100644 --- a/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json +++ b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json @@ -83,21 +83,27 @@ "urls": [ [ "https://jimfan.me/", - "https://research.nvidia.com/person/linxi-jim-fan" + "https://research.nvidia.com/person/linxi-jim-fan", + "https://www.linkedin.com/in/drjimfan/" ], [ "https://research.nvidia.com/person/de-an-huang", - "https://ai.stanford.edu/~dahuang/" + "https://ai.stanford.edu/~dahuang/", + "https://www.linkedin.com/in/de-an-huang-38242a69" ], [ "https://yukezhu.me/", "https://www.cs.utexas.edu/people/faculty-researchers/yuke-zhu", "https://experts.utexas.edu/yuke_zhu", - "https://research.nvidia.com/person/yuke-zhu" + "https://research.nvidia.com/person/yuke-zhu", + "https://www.linkedin.com/in/yukez/" ], [ + "https://tensorlab.cms.caltech.edu/users/anima/", "http://tensorlab.cms.caltech.edu/users/anima/", - "https://www.eas.caltech.edu/people/anima" + "https://www.eas.caltech.edu/people/anima", + "https://en.wikipedia.org/wiki/Anima_Anandkumar", + "https://www.linkedin.com/in/anima-anandkumar/" ] ] } diff --git a/evaluation_examples/examples/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f.json b/evaluation_examples/examples/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f.json index 8506dda..9f5f924 100644 --- a/evaluation_examples/examples/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f.json +++ b/evaluation_examples/examples/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f.json @@ -11,10 +11,6 @@ { "url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f/character.png", "path": "/home/user/Desktop/character.png" - }, - { - "url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f/character_no_background_gold.png", - "path": "/home/user/Desktop/character_no_background_gold.png" } ] } @@ -36,8 +32,8 @@ ], "evaluator": { "func": [ - "check_structure_sim_resized", - "check_structure_sim_resized" + "check_structure_sim", + "check_structure_sim" ], "result": [ { @@ -53,13 +49,13 @@ ], "expected": [ { - "type": "vm_file", - "path": "/home/user/Desktop/character_no_background_gold.png", + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f/character_no_background_gold.png", "dest": "character_no_background_gold.png" }, { - "type": "vm_file", - "path": "/home/user/Desktop/character_no_background_gold.png", + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f/character_no_background_gold.png", "dest": "character_no_background_gold.png" } ] diff --git a/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json b/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json index 7dd4f83..2441c3e 100644 --- a/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json +++ b/evaluation_examples/examples/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2.json @@ -65,7 +65,7 @@ "type": "rule", "rules": { "expect": { - "pattern": "https?://(www\\.?)?google\\.com/search\\?q=nereida(&|$)" + "pattern": "(?i)https?://(?:www\\.)?google\\.com/search\\?q=nereida(?:&|$|#).*" } } }