From 324409866448605f94d95d9e27969ae1a8ec9d4e Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Sat, 24 Feb 2024 21:57:01 +0800 Subject: [PATCH] finish the rest part of chrome examples and verify them on mac arm64 --- desktop_env/evaluators/getters/__init__.py | 11 +- desktop_env/evaluators/getters/chrome.py | 303 +++++++++++++++++- desktop_env/evaluators/getters/misc.py | 175 ++++++++++ desktop_env/evaluators/metrics/__init__.py | 6 +- desktop_env/evaluators/metrics/chrome.py | 21 ++ desktop_env/evaluators/metrics/general.py | 23 +- .../1704f00f-79e6-43a7-961b-cedd3724d5fd.json | 93 ++++++ .../2888b4e6-5b47-4b57-8bf5-c73827890774.json | 66 ++++ .../368d9ba4-203c-40c1-9fa3-da2f1430ce63.json | 85 +++++ .../47543840-672a-467d-80df-8f7c3b9788c9.json | 120 +++++++ .../6c4c23a1-42a4-43cc-9db1-2f86ff3738cc.json | 81 +++++ .../7f52cab9-535c-4835-ac8c-391ee64dc930.json | 66 ++++ .../82279c77-8fc6-46f6-9622-3ba96f61b477.json | 68 ++++ .../82bc8d6a-36eb-4d2d-8801-ef714fb1e55a.json | 73 +++++ .../9f3f70fc-5afc-4958-a7b7-3bb4fcb01805.json | 84 +++++ .../9f935cce-0a9f-435f-8007-817732bfc0a5.json | 62 ++++ .../a96b564e-dbe9-42c3-9ccf-b4498073438a.json | 30 +- .../b070486d-e161-459b-aa2b-ef442d973b92.json | 65 ++++ .../b4f95342-463e-4179-8c3f-193cd7241fb2.json | 70 ++++ .../b7895e80-f4d1-4648-bee0-4eb45a6f1fa8.json | 81 +++++ .../c1fa57f3-c3db-4596-8f09-020701085416.json | 72 +++++ .../cabb3bae-cccb-41bd-9f5d-0f3a9fecd825.json | 62 ++++ .../da46d875-6b82-4681-9284-653b0c7ae241.json | 112 +++++++ .../f3b19d1e-2d48-44e9-b4e1-defcae1a0197.json | 8 +- .../f79439ad-3ee8-4f99-a518-0eb60e5652b0.json | 77 +++++ .../fc6d8143-9452-4171-9459-7f515143419a.json | 79 +++++ 26 files changed, 1967 insertions(+), 26 deletions(-) create mode 100644 evaluation_examples/examples/chrome/1704f00f-79e6-43a7-961b-cedd3724d5fd.json create mode 100644 evaluation_examples/examples/chrome/2888b4e6-5b47-4b57-8bf5-c73827890774.json create mode 100644 evaluation_examples/examples/chrome/368d9ba4-203c-40c1-9fa3-da2f1430ce63.json create mode 100644 evaluation_examples/examples/chrome/47543840-672a-467d-80df-8f7c3b9788c9.json create mode 100644 evaluation_examples/examples/chrome/6c4c23a1-42a4-43cc-9db1-2f86ff3738cc.json create mode 100644 evaluation_examples/examples/chrome/7f52cab9-535c-4835-ac8c-391ee64dc930.json create mode 100644 evaluation_examples/examples/chrome/82279c77-8fc6-46f6-9622-3ba96f61b477.json create mode 100644 evaluation_examples/examples/chrome/82bc8d6a-36eb-4d2d-8801-ef714fb1e55a.json create mode 100644 evaluation_examples/examples/chrome/9f3f70fc-5afc-4958-a7b7-3bb4fcb01805.json create mode 100644 evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json create mode 100644 evaluation_examples/examples/chrome/b070486d-e161-459b-aa2b-ef442d973b92.json create mode 100644 evaluation_examples/examples/chrome/b4f95342-463e-4179-8c3f-193cd7241fb2.json create mode 100644 evaluation_examples/examples/chrome/b7895e80-f4d1-4648-bee0-4eb45a6f1fa8.json create mode 100644 evaluation_examples/examples/chrome/c1fa57f3-c3db-4596-8f09-020701085416.json create mode 100644 evaluation_examples/examples/chrome/cabb3bae-cccb-41bd-9f5d-0f3a9fecd825.json create mode 100644 evaluation_examples/examples/chrome/da46d875-6b82-4681-9284-653b0c7ae241.json create mode 100644 evaluation_examples/examples/chrome/f79439ad-3ee8-4f99-a518-0eb60e5652b0.json create mode 100644 evaluation_examples/examples/chrome/fc6d8143-9452-4171-9459-7f515143419a.json diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 7348fec..c688127 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -16,14 +16,21 @@ from .chrome import ( get_enable_do_not_track, get_enable_enhanced_safety_browsing, get_new_startup_page, - get_find_unpacked_extension_path + get_find_unpacked_extension_path, + get_data_delete_automacally, + get_active_tab_html_parse, + get_active_tab_html_parse_accTree, + get_active_tab_url_parse, + get_gotoRecreationPage_and_get_html_content, + get_url_dashPart, + get_active_url_from_accessTree ) from .file import get_cloud_file, get_vm_file, get_cache_file from .general import get_vm_command_line, get_vm_terminal_output from .gimp import get_gimp_config_file from .impress import get_audio_in_slide from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory -from .misc import get_rule, get_accessibility_tree +from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime from .replay import get_replay from .vlc import get_vlc_playing_info, get_vlc_config, get_default_video_player from .vscode import get_vscode_config diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 323fb46..d8e44c3 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -1,11 +1,27 @@ import json import logging import os +import time import sqlite3 -from typing import Dict, Any +from typing import Dict, Any, List from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile from playwright.sync_api import sync_playwright +from urllib.parse import urlparse, parse_qs + +import lxml.etree +from lxml.cssselect import CSSSelector +from lxml.etree import _Element + +_accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org" + , "attr": "uri:deskat:attributes.at-spi.gnome.org" + , "cp": "uri:deskat:component.at-spi.gnome.org" + , "doc": "uri:deskat:document.at-spi.gnome.org" + , "docattr": "uri:deskat:attributes.document.at-spi.gnome.org" + , "txt": "uri:deskat:text.at-spi.gnome.org" + , "val": "uri:deskat:value.at-spi.gnome.org" + , "act": "uri:deskat:action.at-spi.gnome.org" + } logger = logging.getLogger("desktopenv.getters.chrome") @@ -340,7 +356,58 @@ def get_open_tabs_info(env, config: Dict[str, str]): return tabs_info +def get_active_url_from_accessTree(env, config): + """ + Playwright cannot get the url of active tab directly, + so we need to use accessibility tree to get the active tab info. + This function is used to get the active tab url from the accessibility tree. + config: + Dict[str, str]{ + 'xpath': + the same as in metrics.general.accessibility_tree. + 'selectors': + the same as in metrics.general.accessibility_tree. + 'goto_prefix': + the prefix you want to add to the beginning of the url to be opened, default is "https://", + (the url we get from accTree does not have prefix) + ...(other keys, not used in this function) + } + Return + url: str + """ + accessibility_tree: str = env.controller.get_accessibility_tree() + # download accessibility tree to "/home/user/Desktop" + logger.debug("AT@eval: %s", accessibility_tree) + # first, use accessibility API to get the active tab URL + at: _Element = lxml.etree.fromstring(accessibility_tree) + if "xpath" in config: + elements: List[_Element] = at.xpath(config["xpath"], namespaces=_accessibility_ns_map) + elif "selectors" in config: + selector = CSSSelector(", ".join(config["selectors"]), namespaces=_accessibility_ns_map) + elements: List[_Element] = selector(at) + else: + raise ValueError("At least one of xpath and selectors is required") + + if len(elements) == 0: + print("no elements found") + return 0. + active_tab_url = config["goto_prefix"]+elements[0].text if "goto_prefix" in config.keys() else "https://" + elements[0].text + print("active tab url now: {}".format(active_tab_url)) + return active_tab_url + + def get_active_tab_info(env, config: Dict[str, str]): + """ + This function is used to get all info about active tab. + Warning! This function will reload the target-url page + If the tartget url has cache or cookie, this function may reload to another page. + If you have tested the url will not pop up to another page (check in incongnito mode yourself first), + you can use this function. + config: Dict[str, str]{ + # Keys used in get_active_url_from_accessTree: "xpath", "selectors" + } + """ + active_tab_url = get_active_url_from_accessTree(env, config) host = env.vm_ip port = 9222 # fixme: this port is hard-coded, need to be changed from config file @@ -348,24 +415,21 @@ def get_active_tab_info(env, config: Dict[str, str]): with sync_playwright() as p: # connect to remote Chrome instance browser = p.chromium.connect_over_cdp(remote_debugging_url) - active_tab_info = {} - for context in browser.contexts: - for page in context.pages: - if page.is_visible("body"): # check the visibility of the page body to determine the active status - active_tab_info = { - 'title': page.title(), - 'url': page.url, - 'content': page.content() # get the HTML content of the page - } - break - if active_tab_info: - break + # go to the target URL page + page = browser.new_page() + page.goto(active_tab_url) + page.wait_for_load_state('load') # Wait for the 'load' event to complete + active_tab_info = { + 'title': page.title(), + 'url': page.url, + 'content': page.content() # get the HTML content of the page + } browser.close() - print("active_tab_title: {}".format(active_tab_info.get('title', 'None'))) - print("active_tab_url: {}".format(active_tab_info.get('url', 'None'))) - print("active_tab_content: {}".format(active_tab_info.get('content', 'None'))) + # print("active_tab_title: {}".format(active_tab_info.get('title', 'None'))) + # print("active_tab_url: {}".format(active_tab_info.get('url', 'None'))) + # print("active_tab_content: {}".format(active_tab_info.get('content', 'None'))) return active_tab_info @@ -648,4 +712,209 @@ def get_find_unpacked_extension_path(env, config: Dict[str, str]): return all_extensions_path except Exception as e: logger.error(f"Error: {e}") - return "Google" \ No newline at end of file + return "Google" + + +def get_data_delete_automacally(env, config: Dict[str, str]): + """ + This function is used to open th "auto-delete" mode of chromium + """ + os_type = env.vm_platform + if os_type == 'Windows': + preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'), + 'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip() + elif os_type == 'Darwin': + preference_file_path = env.controller.execute_python_command( + "import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[ + 'output'].strip() + elif os_type == 'Linux': + # preference_file_path = env.controller.execute_python_command( + # "import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[ + # 'output'].strip() + preference_file_path = env.controller.execute_python_command( + "import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[ + 'output'].strip() + else: + raise Exception('Unsupported operating system') + + try: + content = env.controller.get_file(preference_file_path) + data = json.loads(content) + data_delete_state = data["profile"]["exit_type"] + return data_delete_state + except Exception as e: + logger.error(f"Error: {e}") + return "Google" + + +def get_active_tab_html_parse(env, config: Dict[str, Any]): + """ + This function is used to get the specific element's text content from the active tab's html. + config: + Dict[str, str]{ + # Keys used in get_active_url_from_accessTree: "xpath", "selectors" + 'category': + choose from ["class", "label", "xpath", "input"], used to indicate how to find the element + 'labelObject': + only exists when category is "label", + a dict like { "labelSelector": "the key you want to store the text content of this label's ee=lement"} + 'class_singleObject': + only exists when category is "class", a dict with keys as the class name, + like { "class name" : "the key you want to store the text content of this element" } + 'class_multiObject': + only exists when category is "class", used for elements with same class name. + Two layer of dict, like + ( { + "class name": { + "rank in this class" : "the key you want to store the text content of this element" + ... + } + } ) + 'xpathObject': + only exists when category is "xpath", a dict with keys as the xpath, + like { "full xpath" : "the key you want to store the text content of this element" } + 'inputObject': + only exists when category is "input", + a dict with keys as the input element's xpath, like { "full xpath" : "the key you want to store the text content of this element" } + } + """ + active_tab_url = get_active_url_from_accessTree(env, config) + host = env.vm_ip + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + + remote_debugging_url = f"http://{host}:{port}" + with sync_playwright() as p: + # connect to remote Chrome instance + browser = p.chromium.connect_over_cdp(remote_debugging_url) + target_page = None + for context in browser.contexts: + for page in context.pages: + page.wait_for_load_state("load") + if page.url == active_tab_url: + target_page = page + break + return_json = {} + if config["category"] == "class": + #find the text of elements in html with specific class name + class_multiObject = config["class_multiObject"] + for key in class_multiObject.keys(): + object_dict = class_multiObject[key] + for order_key in object_dict.keys(): + return_json[object_dict[order_key]] = target_page.query_selector_all("."+key)[int(order_key)].text_content().strip() + class_singleObject = config["class_singleObject"] + for key in class_singleObject.keys(): + return_json[class_singleObject[key]] = target_page.query_selector("."+key).text_content().strip() + elif config['category'] == "label": + #find the text of elements in html with specific label name + labelObject = config["labelObject"] + for key in labelObject.keys(): + return_json[labelObject[key]] = target_page.get_by_label(key).text_content().strip() + elif config["category"] == "xpath": + #find the text of elements in html with specific xpath + xpathObject = config["xpathObject"] + for key in xpathObject.keys(): + return_json[xpathObject[key]] = target_page.locator("xpath="+key).text_content().strip() + elif config["category"] == "input": + inputObject = config["inputObject"] + for key in inputObject.keys(): + return_json[inputObject[key]] = target_page.locator("xpath="+key).input_value().strip() + browser.close() + return return_json + + +def get_gotoRecreationPage_and_get_html_content(env, config: Dict[str, Any]): + """ + especially used for www.recreation.gov examples + """ + host = env.vm_ip + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + + remote_debugging_url = f"http://{host}:{port}" + with sync_playwright() as p: + browser = p.chromium.connect_over_cdp(remote_debugging_url) + page = browser.new_page() + page.goto("https://www.recreation.gov/") + page.fill("input#hero-search-input", "Albion Basin") + page.click("button.nav-search-button") + print("after first click") + time.sleep(2) + # Assuming .search-result-highlight--success leads to a new page or requires page load + with page.expect_popup() as popup_info: + page.click(".search-result-highlight--success") + print("after second click") + newpage = popup_info.value + newpage.wait_for_load_state() + print("go to newpage: ") + print(newpage.title()) + time.sleep(2) + newpage.click("button.next-available") + print("after third click") + + + return_json = {} + return_json["expected"]={} + #find the text of elements in html with specific class name + if config["selector"] == "class": + if "order" in config.keys(): + className = config["class"] + return_json["expected"][className]=newpage.query_selector_all("."+className)[int(config["order"])].text_content().strip() + else: + className = config["class"] + return_json["expected"][className] = newpage.query_selector("."+className).text_content().strip() + browser.close() + return return_json + + +def get_active_tab_url_parse(env, config:Dict[str, Any]): + """ + This function is used to parse the url according to config["parse_keys"]. + config: + 'parse_keys': must exist, + a list of keys to extract from the query parameters of the url. + 'replace': optional, + a dict, used to replace the original key with the new key. + ( { "original key": "new key" } ) + """ + active_tab_url = get_active_url_from_accessTree(env, config) + + # connect to remote Chrome instance + # parse in a hard-coded way to find the specific info about task + parsed_url = urlparse(active_tab_url) + # Extract the query parameters + query_params = parse_qs(parsed_url.query) + # Define the keys of interest + keys_of_interest = [key for key in config["parse_keys"]] + # Extract the parameters of interest + extracted_params = {key: query_params.get(key, [''])[0] for key in keys_of_interest} + if "replace" in config: + for key in config["replace"].keys(): + # change original key to new key, keep value unchange + value = extracted_params.pop(key) + extracted_params[config["replace"][key]] = value + return extracted_params + + +def get_url_dashPart(env, config: Dict[str, str]): + """ + This function is used to extract one of the dash-separated part of the URL. + config + 'partIndex': must exist, + the index of the dash-separated part to extract, starting from 0. + 'needDeleteId': optional, + a boolean, used to indicate whether to delete the "id" part ( an example: "/part-you-want?id=xxx" ) + 'returnType': must exist, + a string, used to indicate the return type, "string" or "json". + """ + active_tab_url = get_active_url_from_accessTree(env, config) + + # extract the last dash-separated part of the URL, and delete all the characters after "id" + dash_part = active_tab_url.split("/")[config["partIndex"]] + if config["needDeleteId"]: + dash_part = dash_part.split("?")[0] + # print("active_tab_title: {}".format(active_tab_info.get('title', 'None'))) + # print("active_tab_url: {}".format(active_tab_info.get('url', 'None'))) + # print("active_tab_content: {}".format(active_tab_info.get('content', 'None'))) + if config["returnType"] == "string": + return dash_part + elif config["returnType"] == "json": + return {config["key"]: dash_part} diff --git a/desktop_env/evaluators/getters/misc.py b/desktop_env/evaluators/getters/misc.py index b6b933a..99de948 100644 --- a/desktop_env/evaluators/getters/misc.py +++ b/desktop_env/evaluators/getters/misc.py @@ -1,10 +1,78 @@ import logging from typing import TypeVar +from datetime import datetime, timedelta logger = logging.getLogger("desktopenv.getters.misc") R = TypeVar("Rule") +day_of_week_mapping = { + 0: 'Mon', + 1: 'Tue', + 2: 'Wed', + 3: 'Thu', + 4: 'Fri', + 5: 'Sat', + 6: 'Sun' +} + +month_mapping = { + 1: 'Jan', + 2: 'Feb', + 3: 'Mar', + 4: 'Apr', + 5: 'May', + 6: 'Jun', + 7: 'Jul', + 8: 'Aug', + 9: 'Sep', + 10: 'Oct', + 11: 'Nov', + 12: 'Dec' +} + +Month_Mapping_Full = { + 1: "January", + 2: "February", + 3: "March", + 4: "April", + 5: "May", + 6: "June", + 7: "July", + 8: "August", + 9: "September", + 10: "October", + 11: "November", + 12: "December" +} + +month_mapping_full = { + 1: 'january', + 2: 'february', + 3:'march', + 4: 'april', + 5:'may', + 6: 'june', + 7: 'july', + 8: 'august', + 9:'september', + 10: 'october', + 11: 'november', + 12: 'december' +} + +relativeTime_to_IntDay = { + "tomorrow": 1, + "5th next month": "special", + "10th next month": "special", + "11th next month": "special", + "this month": "special", + "this Saturday": "special", + "this Sunday": "special", + "next Monday": "special", + "next Friday": "special", + "first monday four months later": "special" +} def get_rule(env, config: R) -> R: """ @@ -12,6 +80,113 @@ def get_rule(env, config: R) -> R: """ return config["rules"] +def get_rule_relativeTime(env, config: R) -> R: + """ + According to the rule definded in funciton "apply_rules_to_timeFormat", convert the relative time to absolute time. + config: + 'relativeTime': { + "from": must exist; indicates the relativeTime. + "to": optional; indicates the relativeTime. + } + If relativeTime only has key "from", then the key of time in "expected" dict must be "time". + If relativeTime has key "to", then the key of time in "expected" dict must be "from" and "to". + """ + relativeRules = config["rules"] + relativeTime = relativeRules["relativeTime"] # int, "+" means future, "-" means past + # get the date now + now = datetime.now() + # calculate the relative time + if "to" not in relativeTime.keys(): + start_relative_time = relativeTime["from"] + if relativeTime_to_IntDay[start_relative_time] != "special": + # relativeTime can be represented by actual int days + start_relative_time_IntDat = relativeTime_to_IntDay[start_relative_time] + timediff = timedelta(days=start_relative_time_IntDat) + absoluteDay = now + timediff + else: + # special case, you can add more special cases here + if start_relative_time == "5th next month": + next_year = now.year + 1 if now.month == 12 else now.year + next_month = now.month + 1 if now.month < 12 else 1 + next_day = 5 + absoluteDay = datetime(next_year, next_month, next_day) + elif start_relative_time == "10th next month": + next_year = now.year + 1 if now.month == 12 else now.year + next_month = now.month + 1 if now.month < 12 else 1 + next_day = 10 + absoluteDay = datetime(next_year, next_month, next_day) + elif start_relative_time == "this month": + absoluteDay = now + elif start_relative_time == "next Monday": + absoluteDay = now + timedelta(days=((6-now.weekday())+1)) + elif start_relative_time == "first monday four months later": + next_year = now.year + 1 if now.month >=9 else now.year + next_month = (now.month + 4)%12 + # get the first monday of the next_month + temp_date = datetime(next_year, next_month, 1) + absoluteDay = temp_date + timedelta(days=((6-temp_date.weekday())+1)%7) + regular_time = apply_rules_to_timeFormat(relativeRules["expected"]["time"], absoluteDay) + config["rules"]["expected"]["time"] = regular_time + + else: + from_time = relativeTime["from"] + to_time = relativeTime["to"] + # deal with from_time first + if relativeTime_to_IntDay[from_time] != "special": + from_time_IntDat = relativeTime_to_IntDay[from_time] + from_timediff = timedelta(days=from_time_IntDat) + from_absoluteDay = now + from_timediff + else: + if from_time == "this Saturday": + from_absoluteDay = now + timedelta(days=(5-now.weekday())) + elif from_time == "10th next month": + next_year = now.year + 1 if now.month == 12 else now.year + next_month = now.month + 1 if now.month < 12 else 1 + next_day = 10 + from_absoluteDay = datetime(next_year, next_month, next_day) + elif from_time == "next Monday": + from_absoluteDay = now + timedelta(days=((6-now.weekday())+1)) + else: + pass # more rules here + regular_from_time = apply_rules_to_timeFormat(relativeRules["expected"]["from"], from_absoluteDay) + config["rules"]["expected"]["from"] = regular_from_time + + # deal with to_time + if relativeTime_to_IntDay[to_time] != "special": + to_time_IntDat = relativeTime_to_IntDay[to_time] + to_timediff = timedelta(days=to_time_IntDat) + to_absoluteDay = now + to_timediff + else: + if to_time == "this Sunday": + to_absoluteDay = now + timedelta(days=(6-now.weekday())) + elif to_time == "11th next month": + next_year = now.year + 1 if now.month == 12 else now.year + next_month = now.month + 1 if now.month < 12 else 1 + next_day = 11 + to_absoluteDay = datetime(next_year, next_month, next_day) + elif to_time == "next Friday": + to_absoluteDay = now + timedelta(days=((4-now.weekday()) if now.weekday() < 4 else (6-now.weekday()) + 5)) + else: + pass # more rules here + regular_to_time = apply_rules_to_timeFormat(relativeRules["expected"]["to"], to_absoluteDay) + config["rules"]["expected"]["to"] = regular_to_time + + return config["rules"] + + +def apply_rules_to_timeFormat(timeFormat: str, absoluteDay: datetime): + timeFormat = timeFormat.replace("{DoW}", day_of_week_mapping[absoluteDay.weekday()], 1) + timeFormat = timeFormat.replace("{Month}", month_mapping[absoluteDay.month], 1) + timeFormat = timeFormat.replace("{DayD}", str(absoluteDay.day), 1) + timeFormat = timeFormat.replace("{Year}", str(absoluteDay.year), 1) + timeFormat = timeFormat.replace("{Month0D}", "0"+str(absoluteDay.month) if absoluteDay.month < 10 else str(absoluteDay.month), 1) + timeFormat = timeFormat.replace("{month}", month_mapping_full[absoluteDay.month], 1) + timeFormat = timeFormat.replace("{MonthFull}", Month_Mapping_Full[absoluteDay.month], 1) + timeFormat = timeFormat.replace("{Day0D}", "0"+str(absoluteDay.day) if absoluteDay.day < 10 else str(absoluteDay.day), 1) + # you can add other replace rules here + + return timeFormat + def get_accessibility_tree(env, *args) -> str: accessibility_tree: str = env.controller.get_accessibility_tree() diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index ccdda25..4a36926 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -16,7 +16,8 @@ from .chrome import ( check_enabled_experiments, check_history_deleted, is_expected_search_query, - is_expected_active_tab + is_expected_active_tab, + is_expected_url_pattern_match ) from .docs import ( compare_font_names, @@ -54,7 +55,8 @@ from .general import ( exact_match, is_in_list, fuzzy_match, - check_include_exclude + check_include_exclude, + check_direct_json_object ) from .gimp import ( check_brightness_decrease_and_structure_sim, diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index 69d3dcd..519e93d 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -25,6 +25,27 @@ def is_expected_active_tab(active_tab_info: Dict[str, str], rule: Dict[str, Any] logger.error(f"Unknown type: {match_type}") return 0 +# rules[expected] is a string-formatted regex +def is_expected_url_pattern_match(result, rules) -> float: + """ + This function is used to search the expected pattern in the url using regex. + result is the return value of function "activte_tab_info" or return value of function "get_active_url_from_accessTree" + """ + if type(result)== dict: + result_url = result["url"] + print("result url: {}".format(result_url)) + else: + result_url = result + # expect_regex = re.compile(rules["expected"]) + patterns = rules["expected"] + print("expected_regex: {}".format(patterns)) + for pattern in patterns: + match = re.search(pattern, result_url) + print(match) + if not match: + return 0. + return 1. + def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float: """ Checks if the expected tabs are open in Chrome. diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 9681c07..6c3e7b8 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -44,7 +44,9 @@ def is_in_list(result, rules) -> float: return 1. else: return 0. - + + + def fuzzy_match(result, rules) -> float: expect = rules["expected"] @@ -135,7 +137,7 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: needed. If both are present, `xpath` takes the priority. "text": str as the expected text content of the selected element. "exact": bool specifying whether exact match or fuzzy match should - be performed. defaults to True + be performed. defaults to True. } Returns: @@ -152,6 +154,7 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: raise ValueError("At least one of xpath and selectors is required") if len(elements) == 0: + print("no elements") return 0. if "text" in rules: @@ -217,3 +220,19 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str value = value[k] metric = metric and not _match_value_to_rule(value, r) return metric + + +def check_direct_json_object(result, rules)->float: + """ + One of the most commonly used function to evalute. + Compare two json objects directly. + """ + print("result: ") + print(result) + print("expected: ") + print(rules["expected"]) + expected_json = rules["expected"] + for key in expected_json.keys(): + if expected_json[key] != result[key]: + return 0. + return 1.0 \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/1704f00f-79e6-43a7-961b-cedd3724d5fd.json b/evaluation_examples/examples/chrome/1704f00f-79e6-43a7-961b-cedd3724d5fd.json new file mode 100644 index 0000000..109249f --- /dev/null +++ b/evaluation_examples/examples/chrome/1704f00f-79e6-43a7-961b-cedd3724d5fd.json @@ -0,0 +1,93 @@ +{ + "id": "1704f00f-79e6-43a7-961b-cedd3724d5fd", + "snapshot": "chrome", + "instruction": "Find a large car with lowest price from next Monday to next Friday in Zurich.", + "source": "test_task_0", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.rentalcars.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":["check_direct_json_object", "check_direct_json_object"], + "result": [{ + "type": "active_tab_url_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "parse_keys": ["locationName", "dropLocationName", "filterCriteria_carCategory", "filterCriteria_sortBy"] + }, + { + "type": "active_tab_html_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "category": "xpath", + "xpathObject":{ + "/html/body/main/div/div/div/section/div/div/div/div[1]/div[1]/p": "from", + "/html/body/main/div/div/div/section/div/div/div/div[1]/div[3]/p": "to" + } + }], + "expected":[{ + "type": "rule", + "rules":{ + "expected": { + "locationName": "Zurich Airport", + "dropLocationName": "Zurich Airport", + "filterCriteria_carCategory": "large", + "filterCriteria_sortBy": "PRICE" + } + } + }, + { + "type": "rule_relativeTime", + "rules":{ + "relativeTime":{ + "from":"next Monday", + "to":"next Friday" + }, + "expected": { + "from": "{DoW}, {DayD} {Month} {Year}, 10:00", + "to": "{DoW}, {DayD} {Month} {Year}, 10:00" + } + }} + ] + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/2888b4e6-5b47-4b57-8bf5-c73827890774.json b/evaluation_examples/examples/chrome/2888b4e6-5b47-4b57-8bf5-c73827890774.json new file mode 100644 index 0000000..a3e046c --- /dev/null +++ b/evaluation_examples/examples/chrome/2888b4e6-5b47-4b57-8bf5-c73827890774.json @@ -0,0 +1,66 @@ +{ + "id": "2888b4e6-5b47-4b57-8bf5-c73827890774", + "snapshot": "chrome", + "instruction": "Find a men's T-Shirt that is in large size with a stripe pattern, short sleeve and under the Sales&Discount.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.macys.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"exact_match", + "result": { + "type": "url_dashPart", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "partIndex": -1, + "needDeleteId": true, + "returnType": "string" + }, + "expected":{ + "type": "rule", + "rules":{ + "expected": "Stripe,Men,L,Short%20Sleeve,Sales%20%26%20Discounts" + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/368d9ba4-203c-40c1-9fa3-da2f1430ce63.json b/evaluation_examples/examples/chrome/368d9ba4-203c-40c1-9fa3-da2f1430ce63.json new file mode 100644 index 0000000..75295df --- /dev/null +++ b/evaluation_examples/examples/chrome/368d9ba4-203c-40c1-9fa3-da2f1430ce63.json @@ -0,0 +1,85 @@ +{ + "id": "368d9ba4-203c-40c1-9fa3-da2f1430ce63", + "snapshot": "chrome", + "instruction": "find the Monthly forecast for Manchester, GB for this month", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.accuweather.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":["check_direct_json_object", "is_expected_url_pattern_match"], + "result": [{ + "type": "url_dashPart", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "partIndex": -2, + "needDeleteId": false, + "returnType": "json", + "key":"time" + }, + { + "type": "active_tab_info", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www." + }], + "expected":[ + { + "type": "rule_relativeTime", + "rules":{ + "relativeTime": { + "from": "this month" + }, + "expected": { + "time": "{month}-weather" + } + } + }, + { + "type": "rule", + "rules":{ + "expected": ["\/manchester\/"] + } + }] + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/47543840-672a-467d-80df-8f7c3b9788c9.json b/evaluation_examples/examples/chrome/47543840-672a-467d-80df-8f7c3b9788c9.json new file mode 100644 index 0000000..8ff1ca1 --- /dev/null +++ b/evaluation_examples/examples/chrome/47543840-672a-467d-80df-8f7c3b9788c9.json @@ -0,0 +1,120 @@ +{ + "id": "47543840-672a-467d-80df-8f7c3b9788c9", + "snapshot": "chrome", + "instruction": "Find and select the car with the most number of seats to pick up in Boston Logan Intl Airport from 10th next month to 11th next month.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.budget.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":["is_expected_url_pattern_match", "check_direct_json_object", "check_direct_json_object"], + "conj": "and", + "result": [ + { + "type": "active_url_from_accessTree", + "selectors": [ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www." + }, + { + "type": "active_tab_html_parse", + "selectors": [ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "category": "class", + "class_singleObject":{}, + "class_multiObject":{ + "location-info":{ + "0": "start_location", + "1": "end_location" + }, + "day-time-info":{ + "0": "from", + "1": "to" + } + } + }, + { + "type": "active_tab_html_parse", + "selectors": [ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "category": "xpath", + "xpathObject":{ + "/html/body/div[6]/div[2]/div[1]/div/div/div[2]/div[1]/section[1]/div/form/div[1]/div[2]/div/a": "rank" + } + } + ], + "expected":[ + { + "type": "rule", + "rules":{ + "expected": ["reservation#\/vehicles"] + } + }, + { + "type": "rule_relativeTime", + "rules":{ + "relativeTime":{ + "from":"10th next month", + "to": "11th next month" + }, + "expected": { + "start_location": "Boston Logan Intl Airport,\n\t\t\t\t\t\t\t\tBOS \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t Pick-Up", + "end_location": "Boston Logan Intl Airport,\n\t\t\t\t\t\t\t\tBOS", + "from": "{DoW}, {Month} {Day0D}, 12:00 PM", + "to": "{DoW}, {Month} {Day0D}, 12:00 PM" + } + } + }, + { + "type": "rule", + "rules":{ + "expected": { + "rank": "Number of Seats (High to Low)" + } + } + } + ] + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/6c4c23a1-42a4-43cc-9db1-2f86ff3738cc.json b/evaluation_examples/examples/chrome/6c4c23a1-42a4-43cc-9db1-2f86ff3738cc.json new file mode 100644 index 0000000..582aaca --- /dev/null +++ b/evaluation_examples/examples/chrome/6c4c23a1-42a4-43cc-9db1-2f86ff3738cc.json @@ -0,0 +1,81 @@ +{ + "id": "6c4c23a1-42a4-43cc-9db1-2f86ff3738cc", + "snapshot": "chrome", + "instruction": "Find flights from Seattle to New York on 5th next month and only show those that can be purchased with miles.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.delta.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"check_direct_json_object", + "result": { + "type": "active_tab_html_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "category": "class", + "class_singleObject":{ + "search-date": "time", + "price-in-tabs__nav--selected": "category" + }, + "class_multiObject":{ + "search-segment-cities__city": { + "0": "start", + "1": "end" + } + } + }, + "expected":{ + "type": "rule_relativeTime", + "rules":{ + "relativeTime": { + "from": "5th next month" + }, + "expected": { + "start": "SEA", + "end": "NYC", + "time": "{DoW}, {Month} {DayD}, {Year}", + "category": "Miles" + } + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/7f52cab9-535c-4835-ac8c-391ee64dc930.json b/evaluation_examples/examples/chrome/7f52cab9-535c-4835-ac8c-391ee64dc930.json new file mode 100644 index 0000000..d08381b --- /dev/null +++ b/evaluation_examples/examples/chrome/7f52cab9-535c-4835-ac8c-391ee64dc930.json @@ -0,0 +1,66 @@ +{ + "id": "7f52cab9-535c-4835-ac8c-391ee64dc930", + "snapshot": "chrome", + "instruction": "Create a list of drip coffee makers that are on sale and within $25-60 and have a black finish.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://shopping.google.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"check_direct_json_object", + "result": { + "type": "active_tab_url_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "parse_keys": ["q", "tbs"] + }, + "expected":{ + "type": "rule", + "rules":{ + "expected": { + "q": "drip coffee maker", + "tbs": "mr:1,price:1,ppr_min:25,ppr_max:60,pdtr0:1825161|1825162" + } + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/82279c77-8fc6-46f6-9622-3ba96f61b477.json b/evaluation_examples/examples/chrome/82279c77-8fc6-46f6-9622-3ba96f61b477.json new file mode 100644 index 0000000..c3d1051 --- /dev/null +++ b/evaluation_examples/examples/chrome/82279c77-8fc6-46f6-9622-3ba96f61b477.json @@ -0,0 +1,68 @@ +{ + "id": "82279c77-8fc6-46f6-9622-3ba96f61b477", + "snapshot": "chrome", + "instruction": "Find electric cars with a maximum price of $50,000 within 50 miles of 10001.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.cars.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"check_direct_json_object", + "result": { + "type": "active_tab_url_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "parse_keys": ["list_price_max", "maximum_distance", "zip","fuel_slugs[]"] + }, + "expected":{ + "type": "rule", + "rules":{ + "expected": { + "list_price_max": "50000", + "maximum_distance": "50", + "zip":"10001", + "fuel_slugs[]":"electric" + } + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/82bc8d6a-36eb-4d2d-8801-ef714fb1e55a.json b/evaluation_examples/examples/chrome/82bc8d6a-36eb-4d2d-8801-ef714fb1e55a.json new file mode 100644 index 0000000..76cb2cb --- /dev/null +++ b/evaluation_examples/examples/chrome/82bc8d6a-36eb-4d2d-8801-ef714fb1e55a.json @@ -0,0 +1,73 @@ +{ + "id": "82bc8d6a-36eb-4d2d-8801-ef714fb1e55a", + "snapshot": "chrome", + "instruction": "On April 21, look up a flight from Mumbai to Stockholm.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.qatarairways.com/en-hk/homepage.html" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"check_direct_json_object", + "result": { + "type": "active_tab_url_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "parse_keys": ["fromStation", "toStation", "departing"], + "replace":{ + "departing": "time" + } + }, + "expected":{ + "type": "rule_relativeTime", + "rules":{ + "relativeTime":{ + "from": "next Monday" + }, + "expected": { + "fromStation": "BOM", + "toStation": "STO", + "time": "{Year}-{Month0D}-{DayD}" + } + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/9f3f70fc-5afc-4958-a7b7-3bb4fcb01805.json b/evaluation_examples/examples/chrome/9f3f70fc-5afc-4958-a7b7-3bb4fcb01805.json new file mode 100644 index 0000000..0e65934 --- /dev/null +++ b/evaluation_examples/examples/chrome/9f3f70fc-5afc-4958-a7b7-3bb4fcb01805.json @@ -0,0 +1,84 @@ +{ + "id": "9f3f70fc-5afc-4958-a7b7-3bb4fcb01805", + "snapshot": "chrome", + "instruction": "Browse the list of women's Nike jerseys over $60.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.nba.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":["is_expected_url_pattern_match", "check_direct_json_object"], + "conj": "and", + "result": [ + { + "type": "active_tab_info", + "selectors": [ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ] + }, + { + "type": "active_tab_html_parse", + "selectors": [ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "category": "xpath", + "xpathObject":{ + "/html/body/div[2]/div/div[6]/div[2]/div[2]/div/div[1]/div[4]/ul/li[2]": "money" + } + } + ], + "expected":[ + { + "type": "rule", + "rules":{ + "expected": ["\/women-jerseys\/"] + } + }, + { + "type": "rule", + "rules":{ + "expected": { + "money": "over $60" + } + } + } + ] + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json b/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json new file mode 100644 index 0000000..d24b29a --- /dev/null +++ b/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json @@ -0,0 +1,62 @@ +{ + "id": "9f935cce-0a9f-435f-8007-817732bfc0a5", + "snapshot": "chrome", + "instruction": "Browse list of Civil Division forms.", + "source": "online_tasks", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.justice.gov/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"is_expected_url_pattern_match", + "result": { + "type": "active_tab_info", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www." + }, + "expected":{ + "type": "rule", + "rules":{ + "expected": ["forms\\?title=&field_component_target_id=431"] + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/a96b564e-dbe9-42c3-9ccf-b4498073438a.json b/evaluation_examples/examples/chrome/a96b564e-dbe9-42c3-9ccf-b4498073438a.json index d0a46b2..cfb0401 100644 --- a/evaluation_examples/examples/chrome/a96b564e-dbe9-42c3-9ccf-b4498073438a.json +++ b/evaluation_examples/examples/chrome/a96b564e-dbe9-42c3-9ccf-b4498073438a.json @@ -22,6 +22,30 @@ "tcp:localhost:1337" ] } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.flightaware.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);" + ] + } } ], "trajectory": "trajectories/", @@ -31,7 +55,11 @@ "evaluator": { "func":"is_expected_active_tab", "result": { - "type": "active_tab_info" + "type": "active_tab_info", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www." }, "expected":{ "type": "rule", diff --git a/evaluation_examples/examples/chrome/b070486d-e161-459b-aa2b-ef442d973b92.json b/evaluation_examples/examples/chrome/b070486d-e161-459b-aa2b-ef442d973b92.json new file mode 100644 index 0000000..935126a --- /dev/null +++ b/evaluation_examples/examples/chrome/b070486d-e161-459b-aa2b-ef442d973b92.json @@ -0,0 +1,65 @@ +{ + "id": " b070486d-e161-459b-aa2b-ef442d973b92", + "snapshot": "chrome", + "instruction": "Show side effects of Tamiflu.", + "source": "online_tasks", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.drugs.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"exact_match", + "result": { + "type": "url_dashPart", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "partIndex": -1, + "needDeleteId": false, + "returnType": "string" + }, + "expected":{ + "type": "rule", + "rules":{ + "expected": "tamiflu.html#side-effects" + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/b4f95342-463e-4179-8c3f-193cd7241fb2.json b/evaluation_examples/examples/chrome/b4f95342-463e-4179-8c3f-193cd7241fb2.json new file mode 100644 index 0000000..39457ba --- /dev/null +++ b/evaluation_examples/examples/chrome/b4f95342-463e-4179-8c3f-193cd7241fb2.json @@ -0,0 +1,70 @@ +{ + "id": "b4f95342-463e-4179-8c3f-193cd7241fb2", + "snapshot": "chrome", + "instruction": "Find the next available date for Albion Basin.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.recreation.gov/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"check_direct_json_object", + "result": { + "type": "active_tab_html_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "category":"class", + "class_singleObject":{}, + "class_multiObject":{ + "camp-sortable-column-header":{ + "2":"camp-sortable-column-header" + } + } + }, + "expected":{ + "type":"gotoRecreationPage_and_get_html_content", + "selector": "class", + "class": "camp-sortable-column-header", + "order": "2" + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/b7895e80-f4d1-4648-bee0-4eb45a6f1fa8.json b/evaluation_examples/examples/chrome/b7895e80-f4d1-4648-bee0-4eb45a6f1fa8.json new file mode 100644 index 0000000..97a7534 --- /dev/null +++ b/evaluation_examples/examples/chrome/b7895e80-f4d1-4648-bee0-4eb45a6f1fa8.json @@ -0,0 +1,81 @@ +{ + "id": "b7895e80-f4d1-4648-bee0-4eb45a6f1fa8", + "snapshot": "chrome", + "instruction": "Find a Hotel in New York City with lowest price possible for 2 adults this weekend.", + "source": "test_task_0", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.tripadvisor.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"check_direct_json_object", + "result": { + "type": "active_tab_html_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "category": "xpath", + "xpathObject":{ + "/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[1]/div/button/div[3]":"from", + "/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[2]/button/div[3]":"to", + "/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[1]/div/h1":"city", + "/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[3]/button/div[3]/span/span[2]":"adult", + "/html/body/div[1]/main/div[3]/div/div[2]/div/div[1]/div/div[2]/div[1]/div/div[1]/div/div[1]/div[2]/div/div[2]/div/button/div/div":"rank" + } + }, + "expected": + { + "type": "rule_relativeTime", + "rules":{ + "relativeTime": { + "from": "this Saturday", + "to": "this Sunday" + }, + "expected": { + "from": "{DoW}, {Month} {DayD}", + "to": "{DoW}, {Month} {DayD}", + "city": "New York City Hotels", + "adult": "2 adults", + "rank": "Price (low to high)" + } + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/c1fa57f3-c3db-4596-8f09-020701085416.json b/evaluation_examples/examples/chrome/c1fa57f3-c3db-4596-8f09-020701085416.json new file mode 100644 index 0000000..f98ab7e --- /dev/null +++ b/evaluation_examples/examples/chrome/c1fa57f3-c3db-4596-8f09-020701085416.json @@ -0,0 +1,72 @@ +{ + "id": "c1fa57f3-c3db-4596-8f09-020701085416", + "snapshot": "chrome", + "instruction": "Open the baggage fee calculator.", + "source": "test_task_1", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.united.com/en/us" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"is_expected_url_pattern_match", + "result": { + "type": "active_tab_info", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www." + }, + "expected":{ + "type": "rule", + "rules":{ + "expected": ["baggage-calculator"] + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/cabb3bae-cccb-41bd-9f5d-0f3a9fecd825.json b/evaluation_examples/examples/chrome/cabb3bae-cccb-41bd-9f5d-0f3a9fecd825.json new file mode 100644 index 0000000..0c6fd04 --- /dev/null +++ b/evaluation_examples/examples/chrome/cabb3bae-cccb-41bd-9f5d-0f3a9fecd825.json @@ -0,0 +1,62 @@ +{ + "id": "cabb3bae-cccb-41bd-9f5d-0f3a9fecd825", + "snapshot": "chrome", + "instruction": "Browse spider-man toys for kids and sort by lowest price.", + "source": "online_tasks", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.kohls.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"is_expected_url_pattern_match", + "result": { + "type": "active_url_from_accessTree", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www." + }, + "expected":{ + "type": "rule", + "rules":{ + "expected": ["AgeAppropriate:Kids", "search=spider-man%20toys", "S=4"] + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/da46d875-6b82-4681-9284-653b0c7ae241.json b/evaluation_examples/examples/chrome/da46d875-6b82-4681-9284-653b0c7ae241.json new file mode 100644 index 0000000..448d581 --- /dev/null +++ b/evaluation_examples/examples/chrome/da46d875-6b82-4681-9284-653b0c7ae241.json @@ -0,0 +1,112 @@ +{ + "id": "da46d875-6b82-4681-9284-653b0c7ae241", + "snapshot": "chrome", + "instruction": "Schedule an appointment to apply for transportation access pass in the Charlie Card store on the first Monday four months later, 10:15 am, fill in my details (James Smith, james.smith@gmail.com), and book.", + "source": "test_task_2", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.mbta.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":["is_expected_url_pattern_match", "check_direct_json_object", "check_direct_json_object"], + "conj": "and", + "result": [ + { + "type": "active_tab_info", + "selectors": [ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ] + }, + { + "type": "active_tab_html_parse", + "selectors": [ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "category": "class", + "class_singleObject":{}, + "class_multiObject":{ + "breakword":{ + "1": "content", + "2": "time" + } + } + }, + { + "type": "active_tab_html_parse", + "selectors": [ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "category": "input", + "inputObject":{ + "/html/body/div/div/form/div[7]/div/div/div[1]/input[1]": "name", + "/html/body/div/div/form/div[7]/div/div/div[1]/input[2]": "mail" + } + } + ], + "expected":[ + { + "type": "rule", + "rules":{ + "expected": ["CharlieCardStoreAppointments@mbta.com\/bookings\/"] + } + }, + { + "type": "rule_relativeTime", + "rules":{ + "relativeTime":{ + "from":"first monday four months later" + }, + "expected": { + "content": "Apply for Transportation Access Pass (TAP) CharlieCard non-auto approval", + "time": "{MonthFull} {Day0D}, 10:15 am" + } + } + }, + { + "type": "rule", + "rules":{ + "expected": { + "name": "James Smith", + "mail": "james.smith@gmail.com" + } + } + } + ] + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/f3b19d1e-2d48-44e9-b4e1-defcae1a0197.json b/evaluation_examples/examples/chrome/f3b19d1e-2d48-44e9-b4e1-defcae1a0197.json index e8b7d52..d781e67 100644 --- a/evaluation_examples/examples/chrome/f3b19d1e-2d48-44e9-b4e1-defcae1a0197.json +++ b/evaluation_examples/examples/chrome/f3b19d1e-2d48-44e9-b4e1-defcae1a0197.json @@ -31,12 +31,16 @@ "evaluator": { "func":"is_expected_url_pattern_match", "result": { - "type": "active_tab_info" + "type": "active_tab_info", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www." }, "expected":{ "type": "rule", "rules":{ - "expected": "https://seatgeek\\.com/help/articles/\\d+-Buying-Tickets" + "expected": ["https://seatgeek\\.com/help/articles/\\d+-Buying-Tickets"] } } } diff --git a/evaluation_examples/examples/chrome/f79439ad-3ee8-4f99-a518-0eb60e5652b0.json b/evaluation_examples/examples/chrome/f79439ad-3ee8-4f99-a518-0eb60e5652b0.json new file mode 100644 index 0000000..015452e --- /dev/null +++ b/evaluation_examples/examples/chrome/f79439ad-3ee8-4f99-a518-0eb60e5652b0.json @@ -0,0 +1,77 @@ +{ + "id": "f79439ad-3ee8-4f99-a518-0eb60e5652b0", + "snapshot": "chrome", + "instruction": "Search for a one way flight from Dublin to Vienna on 10th next month for 2 adults.", + "source": "test_task_2", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.ryanair.com/gb/en" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"check_direct_json_object", + "result": { + "type": "active_tab_url_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "parse_keys":["originIata", "destinationIata", "tpAdults", "tpTeens", "tpChildren", "tpStartDate", "isReturn"], + "replace":{ + "tpStartDate": "time" + } + }, + "expected":{ + "type": "rule_relativeTime", + "rules":{ + "relativeTime": { + "from": "10th next month" + }, + "expected": { + "originIata": "DUB", + "destinationIata": "VIE", + "tpAdults": "2", + "tpTeens": "0", + "tpChildren": "0", + "time": "{Year}-{Month0D}-{DayD}", + "isReturn":"false" + } + } + } + } + } + \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/fc6d8143-9452-4171-9459-7f515143419a.json b/evaluation_examples/examples/chrome/fc6d8143-9452-4171-9459-7f515143419a.json new file mode 100644 index 0000000..9b11e2f --- /dev/null +++ b/evaluation_examples/examples/chrome/fc6d8143-9452-4171-9459-7f515143419a.json @@ -0,0 +1,79 @@ +{ + "id": "fc6d8143-9452-4171-9459-7f515143419a", + "snapshot": "chrome", + "instruction": "Find the status of tomorrow flights from New York airports to Columbus in Ohio.", + "source": "test_task_0", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.delta.com/" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Google Chrome" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func":"check_direct_json_object", + "result": { + "type": "active_tab_html_parse", + "selectors":[ + "application[name=Chromium] entry[name=Address\\ and\\ search\\ bar]" + ], + "goto_prefix": "https://www.", + "category": "class", + "class_singleObject":{ + "search-date": "time" + }, + "class_multiObject":{ + "search-segment-cities__city": { + "0": "start", + "1": "end" + } + } + }, + "expected":{ + "type": "rule_relativeTime", + "rules":{ + "relativeTime": { + "from": "tomorrow" + }, + "expected": { + "start": "NYC", + "end": "CMH", + "time": "{DoW}, {Month} {DayD}, {Year}" + } + } + } + } + } + \ No newline at end of file