diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 7348fec..0eaca48 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -6,6 +6,7 @@ from .chrome import ( get_pdf_from_url, get_shortcuts_on_desktop, get_history, + get_page_info, get_enabled_experiments, get_chrome_language, get_chrome_font_size, diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 97a5e7b..c1baaf4 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -3,9 +3,10 @@ import logging import os import sqlite3 from typing import Dict, Any + +from playwright.sync_api import sync_playwright from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile -from playwright.sync_api import sync_playwright logger = logging.getLogger("desktopenv.getters.chrome") @@ -310,6 +311,36 @@ def get_extensions_installed_from_shop(env, config: Dict[str, str]): # The following ones require Playwright to be installed on the target machine, and the chrome needs to be pre-config on # port info to allow remote debugging, see README.md for details +def get_page_info(env, config: Dict[str, str]): + host = env.vm_ip + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + url = config["url"] + + remote_debugging_url = f"http://{host}:{port}" + with sync_playwright() as p: + # connect to remote Chrome instance + browser = p.chromium.connect_over_cdp(remote_debugging_url) + page = browser.contexts[0].new_page() + page.goto(url) + + try: + # Wait for the page to finish loading, this prevents the "execution context was destroyed" issue + page.wait_for_load_state('load') # Wait for the 'load' event to complete + title = page.title() + url = page.url + page_info = {'title': title, 'url': url, 'content': page.content()} + except TimeoutError: + # If page loading times out, catch the exception and store the current information in the list + page_info = {'title': 'Load timeout', 'url': page.url, 'content': page.content()} + except Exception as e: + # Catch other potential exceptions that might occur while reading the page title + print(f'Error: {e}') + page_info = {'title': 'Error encountered', 'url': page.url, 'content': page.content()} + + browser.close() + return page_info + + def get_open_tabs_info(env, config: Dict[str, str]): host = env.vm_ip port = 9222 # fixme: this port is hard-coded, need to be changed from config file @@ -487,9 +518,9 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str: for q in _query: search = f'( {q} ) and "{parent_id}" in parents' filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList() - if len(filelist) == 0: # target file not found + if len(filelist) == 0: # target file not found return None - file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one + file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one parent_id = file['id'] file.GetContentFile(_path, mimetype=file['mimeType']) @@ -501,8 +532,9 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str: if 'query' in config: return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest'])) elif 'path' in config: - query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1 - else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])] + query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len( + config['path']) - 1 + else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])] return get_single_file(query, os.path.join(env.cache_dir, config['dest'])) elif 'query_list' in config: _path_list = [] @@ -511,12 +543,14 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str: dest = config['dest'][idx] _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest))) return _path_list - else: # path_list in config + else: # path_list in config _path_list = [] assert len(config['path_list']) == len(config['dest']) for idx, path in enumerate(config['path_list']): - query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1 - else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)] + query = [ + f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len( + path) - 1 + else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)] dest = config['dest'][idx] _path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest))) return _path_list @@ -545,7 +579,7 @@ def get_enable_do_not_track(env, config: Dict[str, str]): content = env.controller.get_file(preference_file_path) data = json.loads(content) - if_enable_do_not_track = data.get('enable_do_not_track', {}) # bool + if_enable_do_not_track = data.get('enable_do_not_track', {}) # bool return "true" if if_enable_do_not_track else "false" except Exception as e: logger.error(f"Error: {e}") @@ -575,7 +609,7 @@ def get_enable_enhanced_safety_browsing(env, config: Dict[str, str]): content = env.controller.get_file(preference_file_path) data = json.loads(content) - if_enable_do_not_track = data.get('safebrowsing', {}).get('enhanced', {}) # bool + if_enable_do_not_track = data.get('safebrowsing', {}).get('enhanced', {}) # bool return "true" if if_enable_do_not_track else "false" except Exception as e: logger.error(f"Error: {e}") @@ -610,7 +644,7 @@ def get_new_startup_page(env, config: Dict[str, str]): if "session" not in data.keys(): return "true" else: - if_enable_do_not_track = data.get('session', {}).get('restore_on_startup', {}) # int, need to be 5 + if_enable_do_not_track = data.get('session', {}).get('restore_on_startup', {}) # int, need to be 5 return "true" if if_enable_do_not_track == 5 else "false" except Exception as e: logger.error(f"Error: {e}") @@ -648,4 +682,4 @@ def get_find_unpacked_extension_path(env, config: Dict[str, str]): return all_extensions_path except Exception as e: logger.error(f"Error: {e}") - return "Google" \ No newline at end of file + return "Google" diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index c3e80b3..e65a71d 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -16,7 +16,8 @@ from .chrome import ( check_enabled_experiments, check_history_deleted, is_expected_search_query, - is_expected_active_tab + is_expected_active_tab, + is_added_to_steam_cart ) from .docs import ( compare_font_names, diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index 0517c1d..eff8d67 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -1,8 +1,12 @@ -import logging, re, os, shutil +import logging +import os +import re +import shutil from typing import Any, Dict, List, Union -from bs4 import BeautifulSoup, Tag + import fitz # PyMuPDF import rapidfuzz.fuzz as fuzz +from bs4 import BeautifulSoup, Tag from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls @@ -25,6 +29,7 @@ def is_expected_active_tab(active_tab_info: Dict[str, str], rule: Dict[str, Any] logger.error(f"Unknown type: {match_type}") return 0 + def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float: """ Checks if the expected tabs are open in Chrome. @@ -102,14 +107,14 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float: pred_folder = os.path.splitext(pred_path)[0] + '_pred' gold_folder = os.path.splitext(gold_path)[0] + '_gold' - if os.path.exists(pred_folder): # remove existing folder for new predictions + if os.path.exists(pred_folder): # remove existing folder for new predictions shutil.rmtree(pred_folder, ignore_errors=True) os.makedirs(pred_folder) shutil.unpack_archive(pred_path, pred_folder) - if not os.path.exists(gold_folder): # use cache if exists + if not os.path.exists(gold_folder): # use cache if exists os.makedirs(gold_folder) shutil.unpack_archive(gold_path, gold_folder) - + pred_files = sorted(os.listdir(pred_folder)) gold_files = sorted(os.listdir(gold_folder)) if pred_files != gold_files: return 0. @@ -119,7 +124,8 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float: if file_type == 'text': from .vscode import compare_text_file return compare_text_file - elif file_type == 'pdf': return compare_pdfs + elif file_type == 'pdf': + return compare_pdfs elif file_type == 'docx': from .docs import compare_docx_files return compare_docx_files @@ -141,7 +147,8 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float: elif file_type == 'video': from .vlc import compare_videos return compare_videos - else: raise ValueError('[ERROR]: not support file type: %s' % file_type) + else: + raise ValueError('[ERROR]: not support file type: %s' % file_type) score = 0 compare_function = get_compare_function() @@ -160,7 +167,7 @@ def compare_htmls(html_path1: str, html_path2: str) -> float: soup1 = BeautifulSoup(inf, 'lxml') with open(html_path2, 'r', encoding='utf-8') as inf: soup2 = BeautifulSoup(inf, 'lxml') - + def compare_elements(elem1, elem2): if not (isinstance(elem1, Tag) and isinstance(elem2, Tag)): return elem1 == elem2 @@ -252,3 +259,18 @@ def check_font_size(font_size, rule): return 1. if rule['min'] < default_font_size < rule['max'] else 0. else: raise TypeError(f"{rule['type']} not support yet!") + + +def is_added_to_steam_cart(active_tab_info, rule): + """ + Check if the item is added to the Steam cart. + """ + items = rule['items'] + + content = active_tab_info['content'] + + for item in items: + if item not in content: + return 0. + + return 1. diff --git a/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json b/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json index 022fc54..4555083 100644 --- a/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json +++ b/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json @@ -3,16 +3,66 @@ "snapshot": "chrome", "instruction": "Browse the natural products database.", "source": "Mind2Web", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://drugs.com" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_expected_active_tab", "result": { + "type": "active_tab_info" }, "expected": { + "type": "rule", + "rules": { + "type": "url", + "url": "https://www.drugs.com/npc/" + } } } } diff --git a/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json b/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json index 69ea676..1a14405 100644 --- a/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json +++ b/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json @@ -3,16 +3,69 @@ "snapshot": "chrome", "instruction": "Find Dota 2 game and add all DLC to cart.", "source": "Mind2Web", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.dota2.com/home", + "https://store.steampowered.com" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_added_to_steam_cart", "result": { + "type": "page_info", + "url": "https://store.steampowered.com/cart/" }, "expected": { + "type": "rule", + "rules": { + "items": [ + "The Dota 2 Official Soundtrack" + ] + } } } } diff --git a/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json b/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json index 173d49a..8c97446 100644 --- a/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json +++ b/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json @@ -1,18 +1,68 @@ { - "id": "", + "id": "59155008-fe71-45ec-8a8f-dc35497b6aa8", "snapshot": "chrome", - "instruction": "", + "instruction": "What are the similar names to the name carl", "source": "Mind2Web", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.babycenter.com/child" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_expected_active_tab", "result": { + "type": "active_tab_info" }, "expected": { + "type": "rule", + "rules": { + "type": "url", + "url": "https://www.babycenter.com/baby-names/details/carl-853" + } } } } diff --git a/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json b/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json index 95d5b9e..76ad5f8 100644 --- a/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json +++ b/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json @@ -3,16 +3,66 @@ "snapshot": "chrome", "instruction": "Find the Driver License Eligibility Requirements", "source": "Mind2Web", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.dmv.virginia.gov/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_expected_active_tab", "result": { + "type": "active_tab_info" }, "expected": { + "type": "rule", + "rules": { + "type": "url", + "url": "https://www.dmv.virginia.gov/licenses-ids/license/applying/eligibility" + } } } } diff --git a/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json b/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json index f912c1e..b2da339 100644 --- a/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json +++ b/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json @@ -3,16 +3,66 @@ "snapshot": "chrome", "instruction": "Show me the scores for the 2019 super bowl", "source": "Mind2Web", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.nfl.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_expected_active_tab", "result": { + "type": "active_tab_info" }, "expected": { + "type": "rule", + "rules": { + "type": "url", + "url": "https://www.nfl.com/scores/2019/POST4" + } } } } diff --git a/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json b/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json index d94cd6f..f91d346 100644 --- a/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json +++ b/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json @@ -3,16 +3,66 @@ "snapshot": "chrome", "instruction": "Compare iPhone 15 Pro Max with iPhone 13 Pro Max", "source": "Mind2Web", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.apple.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_expected_active_tab", "result": { + "type": "active_tab_info" }, "expected": { + "type": "rule", + "rules": { + "type": "url", + "url": "https://www.apple.com/iphone/compare/?modelList=iphone-15-pro-max,iphone-15-pro,iphone-13-pro-max" + } } } }