From b01acb312ef1f1b54d8714254f7a8f11fd9f2ae5 Mon Sep 17 00:00:00 2001 From: David Chang Date: Mon, 4 Mar 2024 15:19:39 +0800 Subject: [PATCH 01/11] ver Mar4thv2 removed a useless function --- quick_compare_table.py | 51 ------------------------------------------ 1 file changed, 51 deletions(-) delete mode 100644 quick_compare_table.py diff --git a/quick_compare_table.py b/quick_compare_table.py deleted file mode 100644 index 6767f9a..0000000 --- a/quick_compare_table.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/python3 - -from desktop_env.evaluators.metrics import compare_table -import json -import sys -import os.path -from typing import Dict -from typing import Any - -import logging -import datetime - -logger = logging.getLogger() -logger.setLevel(logging.DEBUG) - -datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") - -file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str))) -debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str))) -stdout_handler = logging.StreamHandler(sys.stdout) -sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str))) - -file_handler.setLevel(logging.INFO) -debug_handler.setLevel(logging.DEBUG) -stdout_handler.setLevel(logging.INFO) -sdebug_handler.setLevel(logging.DEBUG) - -formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") -file_handler.setFormatter(formatter) -debug_handler.setFormatter(formatter) -stdout_handler.setFormatter(formatter) -sdebug_handler.setFormatter(formatter) - -stdout_handler.addFilter(logging.Filter("desktopenv")) -sdebug_handler.addFilter(logging.Filter("desktopenv")) - -logger.addHandler(file_handler) -logger.addHandler(debug_handler) -logger.addHandler(stdout_handler) -logger.addHandler(sdebug_handler) - -config_file: str = sys.argv[1] - -with open(config_file) as f: - config: Dict[str, Any] = json.load(f) - -print( compare_table( os.path.join("cache/", config["id"], config["evaluator"]["result"]["dest"]) - , os.path.join("cache/", config["id"], config["evaluator"]["expected"]["dest"]) - , **config["evaluator"]["options"] - ) - ) From 459e247736c0fefa60f8a2580f10e2b4ae8af83c Mon Sep 17 00:00:00 2001 From: David Chang Date: Mon, 4 Mar 2024 23:26:22 +0800 Subject: [PATCH 02/11] ver Mar4thv3 some new multi_app configs --- desktop_env/evaluators/getters/misc.py | 6 +- desktop_env/evaluators/metrics/general.py | 58 +++++++------ .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json | 2 +- .../e2392362-125e-4f76-a2ee-524b183a3412.json | 86 +++++++++++++++++++ .../f5c13cdd-205c-4719-a562-348ae5cd1d91.json | 71 +++++++++++++++ .../12086550-11c0-466b-b367-1d9e75b3910e.json | 12 +-- requirements.txt | 1 + 7 files changed, 202 insertions(+), 34 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json create mode 100644 evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json diff --git a/desktop_env/evaluators/getters/misc.py b/desktop_env/evaluators/getters/misc.py index db04aea..976db19 100644 --- a/desktop_env/evaluators/getters/misc.py +++ b/desktop_env/evaluators/getters/misc.py @@ -1,5 +1,5 @@ import logging -from typing import TypeVar +from typing import TypeVar, Dict from datetime import datetime, timedelta logger = logging.getLogger("desktopenv.getters.misc") @@ -74,13 +74,13 @@ relativeTime_to_IntDay = { "first monday four months later": "special" } -def get_rule(env, config: R) -> R: +def get_rule(env, config: Dict[str, R]) -> R: """ Returns the rule as-is. """ return config["rules"] -def get_rule_relativeTime(env, config: R) -> R: +def get_rule_relativeTime(env, config: Dict[str, R]) -> R: """ According to the rule definded in funciton "apply_rules_to_timeFormat", convert the relative time to absolute time. config: diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 4458a69..26a8e3c 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,6 +1,7 @@ import csv import functools import json +import yaml import operator import re import sqlite3 @@ -132,11 +133,11 @@ _accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org" } -def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: +def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float: """ Args: result (str): XML of GNOME Accessibility Tree - rules (Dict[str, Any]): dict like + rules (List[Dict[str, Any]]): list of dict like { "selectors": list of str as CSS selectors, will be connected by ", " to form a composite selector. Only one from `selectors` and @@ -154,30 +155,33 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: """ at: _Element = lxml.etree.fromstring(result) - if "xpath" in rules: - elements: List[_Element] = at.xpath(rules["xpath"], namespaces=_accessibility_ns_map) - elif "selectors" in rules: - selector = CSSSelector(", ".join(rules["selectors"]), namespaces=_accessibility_ns_map) - elements: List[_Element] = selector(at) - else: - raise ValueError("At least one of xpath and selectors is required") + total_match_score = 1. + for r in rules: + if "xpath" in r: + elements: List[_Element] = at.xpath(r["xpath"], namespaces=_accessibility_ns_map) + elif "selectors" in r: + selector = CSSSelector(", ".join(r["selectors"]), namespaces=_accessibility_ns_map) + elements: List[_Element] = selector(at) + else: + raise ValueError("At least one of xpath and selectors is required") - if len(elements) == 0: - print("no elements") - return 0. + if len(elements) == 0: + print("no elements") + return 0. - if "text" in rules: - match_func: Callable[[str], Number] = functools.partial(operator.eq if rules["exact"] \ - else (lambda a, b: fuzz.ratio(a, b) / 100.) - , rules["text"] - ) - match_score: Number = 0 - for elm in elements: - match_score = max(match_score, match_func(elm.text or None)) - else: - match_score = 1. + if "text" in r: + match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \ + else (lambda a, b: fuzz.ratio(a, b) / 100.) + , r["text"] + ) + match_score: Number = 0 + for elm in elements: + match_score = max(match_score, match_func(elm.text or None)) + else: + match_score = 1. + total_match_score *= match_score - return float(match_score) + return float(total_match_score) # def check_existence(result: str, *args) -> float: @@ -189,7 +193,7 @@ def run_sqlite3(result: str, rules: Dict[str, Any]) -> float: return float(cursor.fetchone()[0] or 0) -def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]]) -> float: +def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]], is_yaml: bool = False) -> float: """ Args: result (str): path to json file @@ -204,6 +208,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str ], "unexpect": page-tab[name=\"About Profiles\"]" - ] - } + "rules": [ + { + "selectors": [ + "application[name=Thunderbird] page-tab-list[attr|id=\"tabmail-tabs\"]>page-tab[name=\"About Profiles\"]" + ] + } + ] }, "func": "check_accessibility_tree" } diff --git a/requirements.txt b/requirements.txt index a6082f9..6571f11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,3 +42,4 @@ func-timeout beautifulsoup4 dashscope google-generativeai +PyYaml From f21d6851089c7251889a01ea4d460e880cc60646 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 6 Mar 2024 15:05:47 +0800 Subject: [PATCH 03/11] Update examples --- .../337d318b-aa07-4f4f-b763-89d9a2dd013f.json | 49 +++++++++ .../82e3c869-49f6-4305-a7ce-f3e64a0618e7.json | 89 +++++++++++++++ .../8e116af7-7db7-4e35-a68b-b0939c066c78.json | 54 ++++++++++ .../deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json | 101 ++++++++++++++++++ 4 files changed, 293 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json create mode 100644 evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json create mode 100644 evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json create mode 100644 evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json diff --git a/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json new file mode 100644 index 0000000..d2d4193 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json @@ -0,0 +1,49 @@ +{ + "id": "337d318b-aa07-4f4f-b763-89d9a2dd013f", + "snapshot": "libreoffice_calc", + "instruction": "Cross-check the invoices with the bank statements and identify any discrepancies. Then pull out the invoices that don't match the statements and put them in the \"problematic\" folder.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/invoice TII-20220301-90.pdf", + "url": "https://drive.google.com/uc?id=13y1Dkh9dJUaWeMSk6pg_UY-R7K4bhAJM&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # GES-20220215-82.pdf", + "url": "https://drive.google.com/uc?id=1zZYJQVpbGmqT_eH0x9Z5O7WoIFrQo3sN&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # 243729.pdf", + "url": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download" + }, + { + "path": "/home/user/Desktop/Bank-Statement.pdf", + "url": "https://drive.google.com/uc?id=1-KS6p0aip56iPmH4okhXZhLgqVwrcjfw&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/337d318b-aa07-4f4f-b763-89d9a2dd013f", + "related_apps": [ + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/problematic/Invoice # 243729.pdf", + "dest": "Invoice # 243729.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download", + "dest": "Invoice # 243729 Gold.pdf" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json new file mode 100644 index 0000000..deda04e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -0,0 +1,89 @@ +{ + "id": "82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "snapshot": "libreoffice_calc", + "instruction": "Please sift through the folder with all the event photos taken by our photographer. I need you to extract the photos featuring the presenters and place them in a separate folder named 'presenter'. Then, compress this folder into a zip file so I can easily share it with others later.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/IDS LLM seminar/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00659.jpg", + "url": "https://drive.google.com/uc?id=1NjnSab2aEtJytYajM9FqeXsLm4ItxTsJ&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00657.jpg", + "url": "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00574.jpg", + "url": "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00554.jpg", + "url": "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00495.jpg", + "url": "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00454.jpg", + "url": "https://drive.google.com/uc?id=1t9L7fVQVxjovTQufetlogulIctn7DF_L&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "related_apps": [ + "os", + "image" + ], + "evaluator": { + "func": "compare_image_list", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/presenter/DSC00657.jpg", + "/home/user/Desktop/presenter/DSC00574.jpg", + "/home/user/Desktop/presenter/DSC00554.jpg", + "/home/user/Desktop/presenter/DSC00495.jpg" + ], + "dest": [ + "DSC00657.jpg", + "DSC00574.jpg", + "DSC00554.jpg", + "DSC00495.jpg" + ], + "multi": "true" + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download", + "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download", + "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download", + "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + ], + "dest": [ + "DSC00657_gold.jpg", + "DSC00574_gold.jpg", + "DSC00554_gold.jpg", + "DSC00495_gold.jpg" + ], + "multi": "true" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json new file mode 100644 index 0000000..1214c6d --- /dev/null +++ b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json @@ -0,0 +1,54 @@ +{ + "id": "8e116af7-7db7-4e35-a68b-b0939c066c78", + "snapshot": "libreoffice_calc", + "instruction": "Please update my bookkeeping sheet with the recent transactions from the provided folder, detailing my expenses over the past few days.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "url": "https://drive.google.com/uc?id=1QOSpTZPFzFZeC0tng4Gfws544LFln836&export=download" + }, + { + "path": "/home/user/Desktop/receipt_0.jpeg", + "url": "https://drive.google.com/uc?id=1b0BRc-BzXObVCUEonJfRbDsrgxZugj3U&export=download" + }, + { + "path": "/home/user/Desktop/receipt_1.jpg", + "url": "https://drive.google.com/uc?id=1S-JBDqwEf7Z_JXDItK_F4BOHgScTjlyN&export=download" + }, + { + "path": "/home/user/Desktop/receipt_2.jpg", + "url": "https://drive.google.com/uc?id=1Ys2abZi9_0y8sxuj2vCbC0OhjC6YdrC-&export=download" + }, + { + "path": "/home/user/Desktop/receipt_3.pdf", + "url": "https://drive.google.com/uc?id=1sKvBbGDpmUkv891xTqX7w5dtEvchQahd&export=download" + }, + { + "path": "/home/user/Desktop/receipt_4.jpg", + "url": "https://drive.google.com/uc?id=1kW7xH5bc2jRaKGDKHDrgSehTrPgkyzkc&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78", + "related_apps": [ + "", + "" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json new file mode 100644 index 0000000..0037591 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json @@ -0,0 +1,101 @@ +{ + "id": "deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "snapshot": "libreoffice_calc", + "instruction": "Find a paper list of all the new foundation language models issued on 11st Oct. 2023 via arxiv daily, and organize it into the sheet I opened.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "url": "https://drive.google.com/uc?id=1NJFAUDzatd5TbBqXeCy3-ok4BWj-xayT&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/New Large Language Models.xlsx" + } + } + ], + "trajectory": "trajectories/deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "related_apps": [ + "libreoffice_calc", + "chrome", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "New Large Language Models.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "dest": "New Large Language Models.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1BHOyjFo72b74YKWTqPMaoNvCzICkos-G&export=download", + "dest": "New Large Language Models Gold.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "B2:B5", + "C2:C5" + ], + "type": "exact_match" + }, + { + "range": [ + "A2:A5" + ], + "type": "fuzzy_match", + "threshold": 90, + "ignore_case": true + } + ] + } + ] + } + } +} From 5817403e2e9d443d5b5ecc59e59cb6e41d0e074d Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 15:06:08 +0800 Subject: [PATCH 04/11] ver Mar6th three new tasks --- .../415ef462-bed3-493a-ac36-ca8c6d23bf1b.json | 147 ++++++++++++++++++ .../e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json | 59 +++++++ .../f8369178-fafe-40c2-adc4-b9b08a125456.json | 31 ++++ 3 files changed, 237 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json create mode 100644 evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json create mode 100644 evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json diff --git a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json new file mode 100644 index 0000000..f3365fb --- /dev/null +++ b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json @@ -0,0 +1,147 @@ +{ + "id": "415ef462-bed3-493a-ac36-ca8c6d23bf1b", + "snapshot": "thunderbird", + "instruction": "Save the AWS invoice of December from the email. I have moved that email to local \"Bills\" folder. Save it to the my receipts folder. Keep the file name pattern and update a record to my tally book.", + "source": "authors", + "config": [ + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Finance/receipts", "/home/user/Documents/Projects"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2308.pdf", "url": "https://drive.google.com/uc?id=1azRFXf4A7fvW0S7r9upHvleMEi-92hHM&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2309.pdf", "url": "https://drive.google.com/uc?id=1x-lpHm8U4U7uRPZ74-9wq9KzW2R55ln1&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2310.pdf", "url": "https://drive.google.com/uc?id=1pcrgV9G6NO4ekMEQBiupwXtq6mmke7b_&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2311.pdf", "url": "https://drive.google.com/uc?id=1JzbCK_nIY8X_3QZjnkzTtb-cRoq9zNT-&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/X-receipt-2312.pdf", "url": "https://drive.google.com/uc?id=1QzWjNzvNosG_yQr7VVonvYb3cUYF5f3u&export=download"}, + {"path": "/home/user/Documents/Finance/tally_book.xlsx", "url": "https://drive.google.com/uc?id=13yuLhBPmouoWR-DybfgaIbWUOxbY_jhL&export=download"}, + {"path": "/home/user/.projects.tar.xz", "url": "https://drive.google.com/uc?id=1oJcxpjqF474Wm16i1aZc8DlCEfAvc4t_&export=download"}, + { + "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.projects.tar.xz", "-C", "/home/user/Documents/Projects"] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xzv", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": ["thunderbird"] + } + } + ], + "trajectory": "trajectories/415ef462-bed3-493a-ac36-ca8c6d23bf1b", + "related_apps": ["thunderbird", "libreoffice_calc", "os"], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "tally_book.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/.aws-invoice-2312.pdf", "url": "https://drive.google.com/uc?id=1RqbulzKG_HeYb1GZmLABOzlohlFg02UU&export=download"} + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["diff", ".aws-invoice-2312.pdf", "/home/user/Documents/Finance/receipts/aws-invoice-2312.pdf"], + "stdout": "diff.out" + } + } + ], + "func": ["compare_table", "check_list"], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Documents/Finance/tally_book.xlsx", + "dest": "tally_book.xlsx" + }, + { + "type": "cache_file", + "path": "diff.out" + } + ], + "expected": [ + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1x8m-korGI1PhJm8PAQVTlWYKneK4WKvn&export=download", + "dest": "tally_book_gt.xlsx" + }, + { + "type": "rule", + "rules": { + "unexpect": [ + ".+" + ] + } + } + ], + "options": [ + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + }, + {} + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json b/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json new file mode 100644 index 0000000..0d86766 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json @@ -0,0 +1,59 @@ +{ + "id": "e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56", + "snapshot": "libreoffice_writer", + "instruction": "Install LanguageTool extension for my LibreOffice", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": ["libreoffice", "--writer"] + } + } + ], + "trajectory": "trajectories/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56", + "related_apps": ["chrome", "libreoffice", "os"], + "evaluator": { + "postconfig": [ + { + "type": "command", + "parameters": { + "command": ["grep", "-nHr", "languagetool", "/home/user/.config/libreoffice/4/user/uno_packages/cache/uno_packages/"], + "stdout": "grep.out" + } + }, + { + "type": "command", + "parameters": { + "command": ["apt", "list", "--installed"], + "stdout": "apt.out" + } + } + ], + "func": ["check_list", "check_list"], + "result": [ + { + "type": "cache_file", + "path": "grep.out" + }, + { + "type": "cache_file", + "path": "apt.out" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": ["org\\.openoffice\\.languagetool\\.oxt"] + } + }, + { + "type": "rule", + "rules": { + "expect": ["openjdk-\\d+-(jre|jdk)"] + } + } + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json b/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json new file mode 100644 index 0000000..0a56921 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json @@ -0,0 +1,31 @@ +{ + "id": "f8369178-fafe-40c2-adc4-b9b08a125456", + "snapshot": "chrome", + "instruction": "Help me to install Orchis theme from gnome-look.org and change to it for my GNOME desktop.", + "source": "https://itsfoss.com/install-switch-themes-gnome-shell", + "config": [], + "trajectory": "trajectories/f8369178-fafe-40c2-adc4-b9b08a125456", + "related_apps": ["chrome", "os"], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": ["gsettings", "get", "org.gnome.desktop.interface", "gtk-theme"], + "stdout": "gsettings.out" + } + } + ], + "func": "check_list", + "result": { + "type": "cache_file", + "path": "gsettings.out" + }, + "expected": { + "type": "rule", + "rules": { + "expect": ["Orchis"] + } + } + } +} From f72b788cdcc22a0583b9f806162399cfaf2cc328 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 15:21:09 +0800 Subject: [PATCH 05/11] ver Mar6th fixed bug in sheet_fuzzy option of compare_table --- desktop_env/evaluators/metrics/table.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index a35c13b..4a58e50 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -12,6 +12,7 @@ import pandas as pd from openpyxl import Workbook from openpyxl.cell.cell import Cell from openpyxl.worksheet.cell_range import MultiCellRange +from openpyxl.utils import get_column_letter from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.worksheet import Worksheet @@ -208,8 +209,10 @@ def compare_table(result: str, expected: str = None, **options) -> float: for rl in r["rules"]: for rng in MultiCellRange(rl["range"]): for cdn in rng.cells: - value1: str = str(read_cell_value(*sheet1, cdn)) - value2: str = str(read_cell_value(*sheet2, cdn)) + coordinate: str = "{:}{:d}".format(get_column_letter(cdn[1]), cdn[0]) + value1: str = str(read_cell_value(*sheet1, coordinate)) + value2: str = str(read_cell_value(*sheet2, coordinate)) + logger.debug("%s: %s vs %s", cdn, value1, value2) for rplc in rl.get("normalization", []): value1 = value1.replace(rplc[0], rplc[1]) @@ -230,11 +233,11 @@ def compare_table(result: str, expected: str = None, **options) -> float: if rl["type"]=="includes": metric: bool = value1 in value2 - if rl["type"]=="includes_by": + elif rl["type"]=="includes_by": metric: bool = value2 in value1 - if rl["type"]=="fuzzy_match": + elif rl["type"]=="fuzzy_match": metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.) - if rl["type"]=="exact_match": + elif rl["type"]=="exact_match": metric: bool = value1==value2 total_metric = total_metric and metric From e045eee901294c409e05eaa0e14c3e448baaddfa Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 16:16:38 +0800 Subject: [PATCH 06/11] ver Mar6thv2 updated read_cell_value --- desktop_env/evaluators/metrics/utils.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 4515cd5..a67b6bb 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -311,14 +311,15 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: , namespaces=_xlsx_ns_imapping ) logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell)) - if "@t" not in cell["c"]: + try: + if "@t" not in cell["c"] or cell["c"]["@t"] == "n": + return float(cell["c"]["v"]) + if cell["c"]["@t"] == "s": + return shared_strs[int(cell["c"]["v"])] + if cell["c"]["@t"] == "str": + return cell["c"]["v"] + except ValueError: return None - if cell["c"]["@t"] == "s": - return shared_strs[int(cell["c"]["v"])] - if cell["c"]["@t"] == "n": - return float(cell["c"]["v"]) - if cell["c"]["@t"] == "str": - return cell["c"]["v"] # }}} read_cell_value # From 806ee50e72bf4cd82c231fd6de22ff2a0c98bdeb Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 16:24:17 +0800 Subject: [PATCH 07/11] ver Mar6th fixed bug --- desktop_env/evaluators/metrics/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index a67b6bb..a7f1f80 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -318,7 +318,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: return shared_strs[int(cell["c"]["v"])] if cell["c"]["@t"] == "str": return cell["c"]["v"] - except ValueError: + except KeyError, ValueError: return None # }}} read_cell_value # From 80ad7e53c42468488e02a0d9ede4bd81fa9b623a Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 16:29:54 +0800 Subject: [PATCH 08/11] ver Mar6thv2 fixed bug --- desktop_env/evaluators/metrics/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index a7f1f80..b57de00 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -318,7 +318,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: return shared_strs[int(cell["c"]["v"])] if cell["c"]["@t"] == "str": return cell["c"]["v"] - except KeyError, ValueError: + except (KeyError, ValueError): return None # }}} read_cell_value # From b79aae4f66b12a741540b600cc06a7d8ebcf2454 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 6 Mar 2024 19:52:48 +0800 Subject: [PATCH 09/11] Update examples --- .../185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json | 96 +++++++++ .../2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json | 102 +++++++++ .../3a93cae4-ad3e-403e-8c12-65303b271818.json | 26 +++ .../767a3271-56db-4745-ac5d-846ef05e6fe5.json | 48 +++-- .../8e116af7-7db7-4e35-a68b-b0939c066c78.json | 195 +++++++++++++++++- .../d28853f4-500a-4749-b9dc-79c3576e759b.json | 26 +++ 6 files changed, 464 insertions(+), 29 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json create mode 100644 evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json create mode 100644 evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json create mode 100644 evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json diff --git a/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json b/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json new file mode 100644 index 0000000..30d5c38 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json @@ -0,0 +1,96 @@ +{ + "id": "185f29bd-5da0-40a6-b69c-ba7f4e0324ef", + "snapshot": "libreoffice_calc", + "instruction": "Transfer the data from our 'Employee Performance Evaluation Summary' Excel sheet into our standardized PDF evaluation forms. Each employee's evaluation data should be accurately filled into the designated fields of the PDF form. It's crucial that the final PDF documents retain a uniform and professional look, ready for distribution to our staff or for filing purposes. Furthermore, please ensure that each PDF file is named according to the employee's name as it appears in the Excel document. This will greatly streamline our evaluation process and enhance our efficiency in managing employee performance records. Oh, use \"√\" as mark on characters.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/Employee Performance Evaluation Summary.xlsx", + "url": "https://drive.google.com/uc?id=1uOzi66bzO_WUnoS4Oqsodrd7_YPLatEk&export=download" + }, + { + "path": "/home/user/Desktop/review_template.pdf", + "url": "https://drive.google.com/uc?id=1YJ4RPGFUuS48tBh31gBerA16JSMw498w&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Employee Performance Evaluation Summary.xlsx" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/review_template.pdf" + } + } + ], + "trajectory": "trajectories/185f29bd-5da0-40a6-b69c-ba7f4e0324ef", + "related_apps": [ + "libreoffice_calc", + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1kZM90nA1krRmV9ug5_BBe8VlrZRVLiLK&export=download", + "https://drive.google.com/uc?id=1zyLzYYThwyit9ciXpfNfPFlBomolOauY&export=download", + "https://drive.google.com/uc?id=1gMT7JBftuymajMAO5rwksORpeVq3uGmH&export=download", + "https://drive.google.com/uc?id=1x0DdtUSZyBifl1tGIWlWKn255WusJeR4&export=download", + "https://drive.google.com/uc?id=1UAcG32WO8XCXElcanjGwbSpJwFuyOkts&export=download", + "https://drive.google.com/uc?id=1PRgryg7Y5evKnDG2LPtAttVp9qAf5VyZ&export=download", + "https://drive.google.com/uc?id=1JxEDriCS2W7BQLdkIgxu_WFCRa9ib4D7&export=download" + ], + "dest": [ + "Alex Lee_Gold.pdf", + "David Wilson_Gold.pdf", + "Emily Johnson_Gold.pdf", + "John Doe_Gold.pdf", + "Linda Green_Gold.pdf", + "Michael Brown_Gold.pdf", + "Sophia Carter_Gold.pdf" + ], + "multi": true, + "gives": [0,1,2,3,4,5,6] + }, + "expected": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/Alex Lee.pdf", + "/home/user/Desktop/David Wilson.pdf", + "/home/user/Desktop/Emily Johnson.pdf", + "/home/user/Desktop/John Doe.pdf", + "/home/user/Desktop/Linda Green.pdf", + "/home/user/Desktop/Michael Brown.pdf", + "/home/user/Desktop/Sophia Carter.pdf" + ], + "dest": [ + "Alex Lee.pdf", + "David Wilson.pdf", + "Emily Johnson.pdf", + "John Doe.pdf", + "Linda Green.pdf", + "Michael Brown.pdf", + "Sophia Carter.pdf" + ], + "multi": true, + "gives": [0,1,2,3,4,5,6] + } + } +} diff --git a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json new file mode 100644 index 0000000..239d695 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json @@ -0,0 +1,102 @@ +{ + "id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", + "snapshot": "libreoffice_calc", + "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. If it turns out that the current formatting does not align with APA 7 standards, I would greatly appreciate your assistance in making the necessary adjustments to comply with those guidelines. ", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/students work/", + "/home/user/Desktop/Lec powerpoint/", + "/home/user/Desktop/Grammar test/", + "/home/user/Desktop/Grammar rules PDF/", + "/home/user/Desktop/FDI/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/students work/Zheng He .docx", + "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download" + }, + { + "path": "/home/user/Desktop/students work/The literature reviews of weekly readings.docx", + "url": "https://drive.google.com/uc?id=18zoZCNtP-wTkxXp2FhH3O_NdLZKVMPIr&export=download" + }, + { + "path": "/home/user/Desktop/students work/The British Justice System.docx", + "url": "https://drive.google.com/uc?id=1z3YHSN4CvC5kN1AwTWB_-plRS4p5GAch&export=download" + }, + { + "path": "/home/user/Desktop/students work/quiz2.docx", + "url": "https://drive.google.com/uc?id=1R5Bii_kvnv_fZVXV-6DMt6Hgq-1gXMo1&export=download" + }, + { + "path": "/home/user/Desktop/students work/quiz.docx", + "url": "https://drive.google.com/uc?id=1PvlGMVX7YkricrjoPRe0e5VQlHeozRPD&export=download" + }, + { + "path": "/home/user/Desktop/students work/Q1&2&3.docx", + "url": "https://drive.google.com/uc?id=1kLQ3lnba6p9lqikHhKDdbqrYagHnZWU_&export=download" + }, + { + "path": "/home/user/Desktop/students work/Photo Ethics in Journalism.docx", + "url": "https://drive.google.com/uc?id=1V6nG6HP_9Kb5KBCRTpaGsRTdPxnJSmRm&export=download" + }, + { + "path": "/home/user/Desktop/students work/cassie.docx", + "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download" + }, + { + "path": "/home/user/Desktop/students work/case study.docx", + "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf", + "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf", + "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf", + "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf", + "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download" + }, + { + "path": "/home/user/Desktop/Public Lecture Teaching Plan.docx", + "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download" + }, + { + "path": "/home/user/Desktop/Course Timetable.xlsx", + "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", + "related_apps": [ + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json new file mode 100644 index 0000000..e1f0544 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json @@ -0,0 +1,26 @@ +{ + "id": "3a93cae4-ad3e-403e-8c12-65303b271818", + "snapshot": "libreoffice_calc", + "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json index 9c7a2b7..28542f8 100644 --- a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json +++ b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json @@ -1,27 +1,25 @@ { - "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", - "snapshot": "libreoffice_calc", - "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", - "source": "authors", - "config": [ - - ], - "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", - "related_apps": [ - "thunderbird", - "libreoffice_calc" - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - - } - } + "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", + "source": "authors", + "config": [ + { + } + ], + "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", + "related_apps": [ + "thunderbird", + "libreoffice_calc" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } } diff --git a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json index 1214c6d..ce00111 100644 --- a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json +++ b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json @@ -34,21 +34,208 @@ } ] } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/my_bookkeeping.xlsx" + } } ], "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78", "related_apps": [ - "", - "" + "libreoffice_calc", + "os", + "image", + "pdf" ], "evaluator": { - "postconfig": [], - "func": "", + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "my_bookkeeping.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", "result": { + "type": "vm_file", + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "dest": "my_bookkeeping.xlsx" }, "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1ygEDdVlkf2ZyqMxJ_ktqo9G_g--rc6co&export=download", + "dest": "my_bookkeeping_gold.xlsx" }, "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "A1:A8", + "B1:B8", + "C1:C8", + "D1:D8", + "E1:E8" + ], + "type": "exact_match" + } + ] + }, + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "C9:C13" + ], + "type": "exact_match", + "ignore_case": true + } + ] + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D9", + "props": { + "value": { + "method": "approx:0.1", + "ref": -186.93 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D10", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3670 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D11", + "props": { + "value": { + "method": "approx:0.1", + "ref": -5.7 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D12", + "props": { + "value": { + "method": "approx:0.1", + "ref": -154.06 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D13", + "props": { + "value": { + "method": "approx:0.1", + "ref": -8.1 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E9", + "props": { + "value": { + "method": "approx:0.1", + "ref": 603.07 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E10", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3066.93 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E11", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3072.63 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E12", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3226.69 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E13", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3234.79 + } + } + } + ] } } } diff --git a/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json b/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json new file mode 100644 index 0000000..0fbac44 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json @@ -0,0 +1,26 @@ +{ + "id": "d28853f4-500a-4749-b9dc-79c3576e759b", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! So, I've got this bit of a situation and I'm hoping you can help me out. I've been working on gathering research for this big project at work, and I've ended up with a bunch of PDF files from various sources. Each PDF contains some really crucial information that I need, but here's the kicker - I need all of this info to be in one place where I can easily access and edit it, like in a Word document or a README file.\n\nThe PDFs are a mix of things - some are reports with statistics and findings, others are articles with key insights, and a few are just informational brochures with important dates and details. What I need is for someone to go through each PDF, extract all the relevant information, and then organize it into a single document. I'm talking about making sure that all the stats, insights, dates, and details from each PDF are neatly compiled, so I don't miss anything.\n\nAnd to make things a bit more complicated, some of these PDFs are scanned images of documents, so the text isn't directly selectable. I guess that means you might need to manually type out some parts or find a way to convert the images to text that can be copied.\n\nOnce everything's been compiled into this one document, could you save it as both a Word doc and a README file? I'd like to have it in these two formats so I can easily share it with my team and also have a version that's ready to be uploaded to our project's repository.\n\nOh, and could you make sure to organize the information by the source PDF? Like, maybe start each section with the title of the PDF or a brief description of its contents, followed by all the extracted info from that PDF. This way, it'll be easier for me and the team to trace back to the original sources if we need to.\n\nI know it's a lot to ask, but having all this information consolidated and organized is going to be a huge help for moving forward with the project. Thanks a bunch for helping me tackle this!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/d28853f4-500a-4749-b9dc-79c3576e759b", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} From da0dafc32c6ea0c9c468e9ff91041acb9935a6a7 Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Wed, 6 Mar 2024 21:20:26 +0800 Subject: [PATCH 10/11] add multi-apps 5 examples by ruisheng 2024-03-06 --- desktop_env/controllers/setup.py | 2 + desktop_env/evaluators/getters/__init__.py | 5 +- desktop_env/evaluators/getters/chrome.py | 85 ++++++++++++- desktop_env/evaluators/getters/file.py | 21 +++- desktop_env/evaluators/getters/impress.py | 21 ++-- desktop_env/evaluators/metrics/__init__.py | 3 +- desktop_env/evaluators/metrics/docs.py | 14 +++ desktop_env/evaluators/metrics/general.py | 18 +++ .../22a4636f-8179-4357-8e87-d1743ece1f81.json | 108 ++++++++++++++++ .../236833a3-5704-47fc-888c-4f298f09f799.json | 67 ++++++++++ .../5990457f-2adb-467b-a4af-5c857c92d762.json | 116 ++++++++++++++++++ .../67890eb6-6ce5-4c00-9e3d-fb4972699b06.json | 110 +++++++++++++++++ .../778efd0a-153f-4842-9214-f05fc176b877.json | 89 ++++++++++++++ .../settings/googledrive/credentials.json | 1 - 14 files changed, 645 insertions(+), 15 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json create mode 100644 evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json create mode 100644 evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json create mode 100644 evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json create mode 100644 evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json delete mode 100644 evaluation_examples/settings/googledrive/credentials.json diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 7a96288..b475008 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -450,6 +450,8 @@ class SetupController: query(str): query pattern string to search files or folder in google drive to delete, please refer to https://developers.google.com/drive/api/guides/search-files?hl=en about how to write query string. trash(bool): whether to delete files permanently or move to trash. By default, trash=false, completely delete it. + for mkdirs: + path(List[str]): the path in the google drive to create folder for upload: path(str): remote url to download file dest(List[str]): the path in the google drive to store the downloaded file diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 43f4c1b..958d98d 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -23,9 +23,10 @@ from .chrome import ( get_active_tab_url_parse, get_gotoRecreationPage_and_get_html_content, get_url_dashPart, - get_active_url_from_accessTree + get_active_url_from_accessTree, + get_info_from_website ) -from .file import get_cloud_file, get_vm_file, get_cache_file +from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file from .general import get_vm_command_line, get_vm_terminal_output from .gimp import get_gimp_config_file from .impress import get_audio_in_slide diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 86aefc4..8bd5842 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -11,7 +11,7 @@ import lxml.etree import requests from lxml.cssselect import CSSSelector from lxml.etree import _Element -from playwright.sync_api import sync_playwright +from playwright.sync_api import sync_playwright, expect from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile @@ -35,6 +35,89 @@ WARNING: """ +def get_info_from_website(env, config: Dict[Any, Any]) -> Any: + """ Get information from a website. Especially useful when the information may be updated through time. + Args: + env (Any): The environment object. + config (Dict[Any, Any]): The configuration dictionary. + - url (str): The URL of the website to visit + - infos (List[Dict[str, str]]): The list of information to be extracted from the website. Each dictionary contains: + - action (str): chosen from 'inner_text', 'attribute', 'click_and_inner_text', 'click_and_attribute', etc., concretely, + - inner_text: extract the inner text of the element specified by the selector + - attribute: extract the attribute of the element specified by the selector + - click_and_inner_text: click elements following the selector and then extract the inner text of the last element + - click_and_attribute: click elements following the selector and then extract the attribute of the last element + - selector (Union[str, List[str]]): The CSS selector(s) of the element(s) to be extracted. + - attribute (str): optional for 'attribute' and 'click_and_attribute', the attribute to be extracted. + - backups (Any): The backup information to be returned if the extraction fails. + """ + try: + host = env.vm_ip + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + remote_debugging_url = f"http://{host}:{port}" + with sync_playwright() as p: + # connect to remote Chrome instance + try: + browser = p.chromium.connect_over_cdp(remote_debugging_url) + except Exception as e: + # If the connection fails (e.g., the agent close the browser instance), start a new browser instance + app = 'chromium' if 'arm' in platform.machine() else 'google-chrome' + payload = json.dumps({"command": [ + app, + "--remote-debugging-port=1337" + ], "shell": False}) + headers = {"Content-Type": "application/json"} + requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload) + time.sleep(5) + browser = p.chromium.connect_over_cdp(remote_debugging_url) + + page = browser.contexts[0].new_page() + page.goto(config["url"]) + page.wait_for_load_state('load') + infos = [] + for info_dict in config.get('infos', []): + if page.url != config["url"]: + page.goto(config["url"]) + page.wait_for_load_state('load') + action = info_dict.get('action', 'inner_text') + if action == "inner_text": + ele = page.locator(info_dict['selector']) + expect(ele).to_be_visible() + infos.append(ele.inner_text()) + elif action == "attribute": + ele = page.locator(info_dict['selector']) + expect(ele).to_be_visible() + infos.append(ele.get_attribute(info_dict['attribute'])) + elif action == 'click_and_inner_text': + for idx, sel in enumerate(info_dict['selector']): + if idx != len(info_dict['selector']) - 1: + link = page.locator(sel) + expect(link).to_be_visible() + link.click() + page.wait_for_load_state('load') + else: + ele = page.locator(sel) + expect(ele).to_be_visible() + infos.append(ele.inner_text()) + elif action == 'click_and_attribute': + for idx, sel in enumerate(info_dict['selector']): + if idx != len(info_dict['selector']) - 1: + link = page.locator(sel) + expect(link).to_be_visible() + link.click() + page.wait_for_load_state('load') + else: + ele = page.locator(sel) + expect(ele).to_be_visible() + infos.append(ele.get_attribute(info_dict['attribute'])) + else: + raise NotImplementedError(f'The action {action} is not supported yet.') + return infos + except Exception as e: + logger.error(f'[ERROR]: failed to obtain information from the website: {config["url"]}. Use backup results instead.') + return config.get('backups', None) + + # The following ones just need to load info from the files of software, no need to connect to the software def get_default_search_engine(env, config: Dict[str, str]): os_type = env.vm_platform diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index 95c0a18..98f6e00 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -1,8 +1,27 @@ import os from typing import Dict, List, Set from typing import Optional, Any, Union - import requests +import pandas as pd + + +def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any: + """ + Config: + path (str): absolute path on the VM to fetch + """ + + path = config["path"] + file_path = get_vm_file(env, {"path": path, "dest": os.path.basename(path)}) + file_type, file_content = config['file_type'], config['file_content'] + if file_type == 'xlsx': + if file_content == 'last_row': + df = pd.read_excel(file_path) + last_row = df.iloc[-1] + last_row_as_list = last_row.astype(str).tolist() + return last_row_as_list + else: + raise NotImplementedError(f"File type {file_type} not supported") def get_cloud_file(env, config: Dict[str, Any]) -> Union[str, List[str]]: diff --git a/desktop_env/evaluators/getters/impress.py b/desktop_env/evaluators/getters/impress.py index ec4a154..d0673dd 100644 --- a/desktop_env/evaluators/getters/impress.py +++ b/desktop_env/evaluators/getters/impress.py @@ -40,20 +40,23 @@ def get_audio_in_slide(env, config: Dict[str, str]): audio_file_path = audio_file_path.replace('\\', '/') # Create a temporary directory to extract the audio file - with tempfile.TemporaryDirectory() as tmpdirname: - # Extract the audio file - myzip.extract(audio_file_path, tmpdirname) - # Get the full path of the extracted audio file - extracted_audio_path = os.path.join(tmpdirname, audio_file_path) - # Return the extracted audio file path - audio_file_path = extracted_audio_path - + tmpdirname = os.path.dirname(ppt_file_localhost_path) + myzip.extract(audio_file_path, tmpdirname) + audio_file_path = os.path.join(tmpdirname, audio_file_path) + return audio_file_path + # with tempfile.TemporaryDirectory() as tmpdirname: + # # Extract the audio file + # myzip.extract(audio_file_path, tmpdirname) + # # Get the full path of the extracted audio file + # extracted_audio_path = os.path.join(tmpdirname, audio_file_path) + # # Return the extracted audio file path + # audio_file_path = extracted_audio_path else: # the audio file is external to the .pptx file # Return the audio file path assert target.startswith("file://"), target audio_file_path = target[7:] - + break if audio_file_path is None: return None diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index c965a95..00165aa 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -60,7 +60,8 @@ from .general import ( fuzzy_match, check_include_exclude, check_direct_json_object, - diff_text_file + diff_text_file, + literal_match ) from .gimp import ( check_brightness_decrease_and_structure_sim, diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index b44df9c..4fa1a03 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -58,6 +58,8 @@ def contains_page_break(docx_file): def compare_docx_files(file1, file2, **options): ignore_blanks = options.get('ignore_blanks', True) + ignore_case = options.get('ignore_case', False) + ignore_order = options.get('ignore_order', False) content_only = options.get('content_only', False) def get_paragraph_texts_odt(document): @@ -82,11 +84,17 @@ def compare_docx_files(file1, file2, **options): doc2 = Document(file2) doc1_paragraphs = [p.text for p in doc1.paragraphs] doc2_paragraphs = [p.text for p in doc2.paragraphs] + if ignore_order: + doc1_paragraphs = sorted(doc1_paragraphs) + doc2_paragraphs = sorted(doc2_paragraphs) elif file1.endswith('.odt') and file2.endswith('.odt'): doc1 = load(file1) doc2 = load(file2) doc1_paragraphs = get_paragraph_texts_odt(doc1) doc2_paragraphs = get_paragraph_texts_odt(doc2) + if ignore_order: + doc1_paragraphs = sorted(doc1_paragraphs) + doc2_paragraphs = sorted(doc2_paragraphs) else: # Unsupported file types or mismatch print("Unsupported file types or mismatch between file types.") @@ -96,6 +104,8 @@ def compare_docx_files(file1, file2, **options): # Compare the content of the documents text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip() text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip() + if ignore_case: + text1, text2 = text1.lower(), text2.lower() similarity = fuzz.ratio(text1, text2) / 100.0 return similarity @@ -103,6 +113,8 @@ def compare_docx_files(file1, file2, **options): if ignore_blanks: text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip() text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip() + if ignore_case: + text1, text2 = text1.lower(), text2.lower() if text1 != text2: return 0 else: @@ -111,6 +123,8 @@ def compare_docx_files(file1, file2, **options): # Compare each paragraph for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs): + if ignore_case: + p1, p2 = p1.lower(), p2.lower() if p1 != p2: return 0 diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 4458a69..b3a837d 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -39,6 +39,24 @@ def exact_match(result, rules) -> float: else: return 0. + +def literal_match(result: Any, expected: Any, **options) -> float: + literal_type = options.get('type', 'str') + if literal_type == 'str': + ignore_case = options.get('ignore_case', False) + score = str(result) == str(expected) if not ignore_case else str(result).lower() == str(expected).lower() + return float(score) + elif literal_type == 'list': + if type(result) not in [list, tuple] or type(expected) not in [list, tuple] or len(result) != len(expected): + return .0 + ignore_case = options.get('ignore_case', False) + result = [str(s) for s in result] if not ignore_case else [str(s).lower() for s in result] + expected = [str(s) for s in expected] if not ignore_case else [str(s).lower() for s in expected] + return float(result == expected) + else: + raise NotImplementedError(f"Type {type} not supported") + + def is_in_list(result, rules) -> float: expect = rules["expected"] if expect in result: diff --git a/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json b/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json new file mode 100644 index 0000000..d22ddaf --- /dev/null +++ b/evaluation_examples/examples/multi_apps/22a4636f-8179-4357-8e87-d1743ece1f81.json @@ -0,0 +1,108 @@ +{ + "id": "22a4636f-8179-4357-8e87-d1743ece1f81", + "snapshot": "chrome", + "instruction": "Please help me convert the file \"Meeting-Agenda.docx\" to a pdf file and upload to the folder 'meetings' in my google drive.", + "source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive", + "config": [ + { + "type": "googledrive", + "parameters": { + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "operation": [ + "delete", + "mkdirs" + ], + "args": [ + { + "query": "title = 'Meeting-Agenda.docx' or title = 'Meeting-Agenda.pdf'", + "trash": false + }, + { + "path": [ + "meetings" + ] + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://news.google.com", + "https://x.com", + "https://www.deepl.com" + ] + } + }, + { + "type": "login", + "parameters": { + "settings_file": "evaluation_examples/settings/google/settings.json", + "platform": "googledrive" + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=17fkMG4guromDzAHNCzzJieZHa2nJDBpc&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/Meeting-Agenda.docx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer", + "/home/user/Desktop/Meeting-Agenda.docx" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_writer", + "chrome" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "googledrive_file", + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "path": [ + "meetings", + "Meeting-Agenda.pdf" + ], + "dest": "Meeting-Agenda.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1wHWQ6mTJcNLtrL83GrUPzxx2HeOC4L8T&export=download&authuser=0&confirm=t", + "dest": "gold-Meeting-Agenda.pdf" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json b/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json new file mode 100644 index 0000000..3c5f349 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json @@ -0,0 +1,67 @@ +{ + "id": "236833a3-5704-47fc-888c-4f298f09f799", + "snapshot": "chrome", + "instruction": "Find daily papers on Huggingface and take down all the titles, authors and the abstracts of papers on 1st March, 2024 in the doc file 'paper_reading_2024_03_01.docx' on desktop. Each paragraph (split by empty lines) conforms to the following format:\nTitle: xxx\nAuthors: xxx, xxx, xxx\nAbstract: xxxxxxxx.\nArxiv PDF: https://xxxx.pdf", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://huggingface.co/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome", + "libreoffice_writer" + ], + "evaluator": { + "func": "compare_docx_files", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/paper_reading_2024_03_01.docx", + "dest": "paper_reading_2024_03_01.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1TUTihXD93bIlekuYy_44fmXAhI1KVol4&export=download&authuser=0&confirm=t", + "dest": "gold_paper_reading_2024_03_01.docx" + }, + "options": { + "ignore_blanks": true, + "ignore_case": true, + "ignore_order": true + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json new file mode 100644 index 0000000..9d754dd --- /dev/null +++ b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json @@ -0,0 +1,116 @@ +{ + "id": "5990457f-2adb-467b-a4af-5c857c92d762", + "snapshot": "chrome", + "instruction": "Append one entry of AI researcher Yann LeCun from Google Scholar into an existing table researchers.xlsx.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://arxiv.org/abs/2005.14165", + "https://wallhaven.cc/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1EbQ75SHLthiZCBqCJtO0fLXJZcKrNler&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/researchers.xlsx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "nautilus", + "/home/user/Desktop" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome", + "libreoffice_calc" + ], + "evaluator": { + "func": "literal_match", + "result": { + "type": "content_from_vm_file", + "path": "/home/user/Desktop/researchers.xlsx", + "file_type": "xlsx", + "file_content": "last_row" + }, + "expected": { + "type": "info_from_website", + "url": "https://scholar.google.com/citations?user=WLN3QrAAAAAJ&hl=en", + "infos": [ + { + "action": "inner_text", + "selector": "#gsc_prf_in" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(1) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(2) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_rsb_st > tbody > tr:nth-child(3) > td:nth-child(2)" + }, + { + "action": "inner_text", + "selector": "#gsc_a_b > tr:nth-child(1) > td.gsc_a_t > a" + }, + { + "action": "click_and_attribute", + "selector": [ + "#gsc_a_b > tr:nth-child(1) > td.gsc_a_t > a", + "#gsc_oci_title_gg > div:nth-child(1) > a" + ], + "attribute": "href" + } + ], + "backups": [ + "Yann LeCun", + "345074", + "147", + "372", + "Deep learning", + "https://creativecoding.soe.ucsc.edu/courses/cs523/slides/week3/DeepLearning_LeCun.pdf" + ] + }, + "options": { + "type": "list", + "ignore_case": true + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json b/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json new file mode 100644 index 0000000..f414cc4 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/67890eb6-6ce5-4c00-9e3d-fb4972699b06.json @@ -0,0 +1,110 @@ +{ + "id": "67890eb6-6ce5-4c00-9e3d-fb4972699b06", + "snapshot": "libreoffice_calc", + "instruction": "I am an NLP researcher. Check out the best long paper awards of ACL from 2019 to 2022 and record the 'title', 'year', 'author list' and 'PDF link' into table best_awards_acl.xslx on the desktop. Separate authors only by commas, use offical aclanthology urls not arxiv, and sort rows by years in descending orders.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1v5DgViUgAL771kBWy-qBddVGyjGmgFhK", + "path": "/home/user/Desktop/best_awards_acl.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/best_awards_acl.xlsx" + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://aclanthology.org/" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_calc", + "chrome" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "best_awards_acl.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/best_awards_acl.xlsx", + "dest": "best_awards_acl.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=17ORdOPl3sZGk3s4Wm0vESgImKZjdZBqX&export=download&authuser=0&confirm=t", + "dest": "gold_best_awards_acl.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json new file mode 100644 index 0000000..a18657a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json @@ -0,0 +1,89 @@ +{ + "id": "778efd0a-153f-4842-9214-f05fc176b877", + "snapshot": "vlc", + "instruction": "I'm using libreoffice impress to write slideshows. I found that the video being played by VLC media player had a good soundtrack. Please extract the audio to planet.wav and use it as background music for this slideshow.", + "source": "https://researchguides.case.edu/c.php?g=1286426", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1I-ArULOnZzlGkS9UyIuE8Dyuyus27iZt&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/Minimalist_Business_Slides.pptx" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1HiW-PokIfWRyRvLwlkiVKKNCB2h2bcx7&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/planet.mp4" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Minimalist_Business_Slides.pptx" + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "vlc", + "--repeat", + "/home/user/Desktop/planet.mp4" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_impress", + "vlc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Minimalist_Business_Slides.pptx - LibreOffice Impress", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_audios", + "result": { + "type": "audio_in_slide", + "ppt_file_path": "/home/user/Desktop/Minimalist_Business_Slides.pptx", + "slide_index": 0, + "dest": "planet.wav" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1F_iBPLgVg-2g1LQ5rxKyCrFa9UitZ9yn&export=download&authuser=0&confirm=t", + "dest": "gold_planet.wav" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/settings/googledrive/credentials.json b/evaluation_examples/settings/googledrive/credentials.json deleted file mode 100644 index 049985c..0000000 --- a/evaluation_examples/settings/googledrive/credentials.json +++ /dev/null @@ -1 +0,0 @@ -{"access_token": "ya29.a0AfB_byCmR_-BUvipM02LYvRdhSIsjxhdK4a1tpDABrZOjTOWPkPhs4gy070hbrq0tZBeld813_HqZ4q5GTeLzddfVVTWfiHdDSLlT4Bf5_f4ZURSB53XJAdAyYeI7LcT4RwF1WoAQhtldbIBJ4rizVk7L6O3486u6e9OaCgYKARQSARISFQHGX2Mi1U-dePZ0efbg8nydUEsP9Q0171", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0edHIvL2N4T8ICgYIARAAGA4SNwF-L9IrKhoX-pALW0nJ18niS1Gy3Lg9wF_G1joCoGHLM8v4-WJsibIB04KjWYCp_40Cs1WS7es", "token_expiry": "2024-03-02T13:59:28Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byCmR_-BUvipM02LYvRdhSIsjxhdK4a1tpDABrZOjTOWPkPhs4gy070hbrq0tZBeld813_HqZ4q5GTeLzddfVVTWfiHdDSLlT4Bf5_f4ZURSB53XJAdAyYeI7LcT4RwF1WoAQhtldbIBJ4rizVk7L6O3486u6e9OaCgYKARQSARISFQHGX2Mi1U-dePZ0efbg8nydUEsP9Q0171", "expires_in": 3599, "refresh_token": "1//0edHIvL2N4T8ICgYIARAAGA4SNwF-L9IrKhoX-pALW0nJ18niS1Gy3Lg9wF_G1joCoGHLM8v4-WJsibIB04KjWYCp_40Cs1WS7es", "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"} \ No newline at end of file From 054e016aff2e7561e97032177d4fcb5d31524ecf Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 23:29:01 +0800 Subject: [PATCH 11/11] ver Mar6thv3 new multi_app tasks and metrics --- branch-config/filelist | 1 - desktop_env/evaluators/metrics/__init__.py | 2 +- desktop_env/evaluators/metrics/others.py | 128 +++++++++++++++ .../3f05f3b9-29ba-4b6b-95aa-2204697ffc06.json | 153 ++++++++++++++++++ .../42d25c08-fb87-4927-8b65-93631280a26f.json | 98 +++++++++++ .../48c46dc7-fe04-4505-ade7-723cba1aa6f6.json | 110 +++++++++++++ .../788b3701-3ec9-4b67-b679-418bfa726c22.json | 57 +++++++ requirements.txt | 1 + 8 files changed, 548 insertions(+), 2 deletions(-) create mode 100644 desktop_env/evaluators/metrics/others.py create mode 100644 evaluation_examples/examples/multi_apps/3f05f3b9-29ba-4b6b-95aa-2204697ffc06.json create mode 100644 evaluation_examples/examples/multi_apps/42d25c08-fb87-4927-8b65-93631280a26f.json create mode 100644 evaluation_examples/examples/multi_apps/48c46dc7-fe04-4505-ade7-723cba1aa6f6.json create mode 100644 evaluation_examples/examples/multi_apps/788b3701-3ec9-4b67-b679-418bfa726c22.json diff --git a/branch-config/filelist b/branch-config/filelist index aec119b..513343c 100644 --- a/branch-config/filelist +++ b/branch-config/filelist @@ -11,5 +11,4 @@ experiment_screenshot_a11y_tree.py experiment_screenshot_seeact.py experiment_screenshot_som.py -quick_compare_table.py quick_evaluate.py diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index c965a95..27cd341 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -130,7 +130,7 @@ from .vscode import ( check_python_file_by_gold_file, compare_zip_files ) - +from .others import compare_epub, check_mp3_meta def infeasible(): pass diff --git a/desktop_env/evaluators/metrics/others.py b/desktop_env/evaluators/metrics/others.py new file mode 100644 index 0000000..7892d0e --- /dev/null +++ b/desktop_env/evaluators/metrics/others.py @@ -0,0 +1,128 @@ +import zipfile +import os.path +import os + +import lxml.html +from lxml.html import HtmlElement +from typing import List, Dict +from typing import Union, TypeVar +from mutagen.easyid3 import EasyID3 + +from .general import diff_text_file +from .utils import _match_value_to_rule + +import logging + +logger = logging.getLogger("desktopenv.metric.others") + +def process_epub(filename: str) -> List[str]: + file_list: List[str] = [] + + base_dir: str = filename + ".dir" + os.makedirs(base_dir, exist_ok=True) + + try: + with zipfile.ZipFile(filename, "r") as z_f: + with z_f.open("toc.ncx") as in_f\ + , open(os.path.join(base_dir, "toc.ncx"), "w") as out_f: + contents: str = in_f.read().decode() + contents = contents.splitlines() + for l in contents: + if "navPoint" not in l: + out_f.write(l + "\n") + file_list.append(os.path.join(base_dir, "toc.ncx")) + with z_f.open("content.opf") as in_f\ + , open(os.path.join(base_dir, "content.opf"), "w") as out_f: + contents: str = in_f.read().decode() + contents = contents.splitlines() + for l in contents: + if "dc:identifier" not in l: + out_f.write(l + "\n") + file_list.append(os.path.join(base_dir, "content.opf")) + for f_n in z_f.namelist(): + if f_n.endswith(".html"): + with z_f.open(f_n) as in_f\ + , open(os.path.join(base_dir, f_n), "w") as out_f: + html: HtmlElement = lxml.html.fromstring( + ''.join( filter( lambda ch: ch!="\n" and ch!="\r" + , in_f.read().decode() + ) + ).encode() + ) + out_f.write(lxml.html.tostring(html, pretty_print=True, encoding="unicode")) + file_list.append(os.path.join(base_dir, f_n)) + logger.debug("%s: %s", filename, file_list) + return list(sorted(file_list)) + except zipfile.BadZipFile: + return [] + +def compare_epub(result: str, expected: str) -> float: + if result is None: + return 0. + result_files: List[str] = process_epub(result) + expected_files: List[str] = process_epub(expected) + + metric: float = 1. + for f1, f2 in zip(result_files, expected_files): + current_metric: float = diff_text_file(f1, f2) + logger.debug("%s vs %s: %f", f1, f2, current_metric) + metric *= current_metric + return metric + +V = TypeVar("Value") + +def check_mp3_meta(result: str, meta: Dict[str, Dict[str, Union[str, V]]]) -> bool: + # checks using _match_value_to_rule + if result is None: + return 0. + + id3_dict = EasyID3(result) + metric: bool = True + for k, r in meta.items(): + value = id3_dict.get(k, "") + if isinstance(value, list): + value: str = value[0] + logger.debug("%s.%s: %s", result, k, value) + metric = metric and _match_value_to_rule(value, r) + return float(metric) + +if __name__ == "__main__": + import datetime + import sys + + logger = logging.getLogger() + logger.setLevel(logging.DEBUG) + + datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") + + file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str))) + debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str))) + stdout_handler = logging.StreamHandler(sys.stdout) + sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str))) + + file_handler.setLevel(logging.INFO) + debug_handler.setLevel(logging.DEBUG) + stdout_handler.setLevel(logging.INFO) + sdebug_handler.setLevel(logging.DEBUG) + + formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") + file_handler.setFormatter(formatter) + debug_handler.setFormatter(formatter) + stdout_handler.setFormatter(formatter) + sdebug_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(debug_handler) + logger.addHandler(stdout_handler) + logger.addHandler(sdebug_handler) + + metric = check_mp3_meta( "snapshots/test/cache/3f05f3b9-29ba-4b6b-95aa-2204697ffc06/Cheng Xiang - Missing You - gt.mp3" + , { "title": { "method": "eq" + , "ref": "Missing You" + } + , "artist": { "method": "eq" + , "ref": "Cheng Xiang" + } + } + ) + print(metric) diff --git a/evaluation_examples/examples/multi_apps/3f05f3b9-29ba-4b6b-95aa-2204697ffc06.json b/evaluation_examples/examples/multi_apps/3f05f3b9-29ba-4b6b-95aa-2204697ffc06.json new file mode 100644 index 0000000..60d0a16 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/3f05f3b9-29ba-4b6b-95aa-2204697ffc06.json @@ -0,0 +1,153 @@ +{ + "id": "3f05f3b9-29ba-4b6b-95aa-2204697ffc06", + "snapshot": "os", + "instruction": "I have a collection of MP3s named with their artist and title. However, their meta data are messy. I don't know how to quickly fix them. Some people tells that Picard or Kid3 may help, but I have never used them. Please help me to do this.", + "source": "authors", + "config": [ + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Finance/receipts", "/home/user/Documents/Projects/OSWorld", "/home/user/Documents/Novels/4th Year in Tsinghua", "/home/user/Documents/Novels/Pass Through"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2308.pdf", "url": "https://drive.google.com/uc?id=1azRFXf4A7fvW0S7r9upHvleMEi-92hHM&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2309.pdf", "url": "https://drive.google.com/uc?id=1x-lpHm8U4U7uRPZ74-9wq9KzW2R55ln1&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2310.pdf", "url": "https://drive.google.com/uc?id=1pcrgV9G6NO4ekMEQBiupwXtq6mmke7b_&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2311.pdf", "url": "https://drive.google.com/uc?id=1JzbCK_nIY8X_3QZjnkzTtb-cRoq9zNT-&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2312.pdf", "url": "https://drive.google.com/uc?id=1RqbulzKG_HeYb1GZmLABOzlohlFg02UU&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/X-receipt-2312.pdf", "url": "https://drive.google.com/uc?id=1QzWjNzvNosG_yQr7VVonvYb3cUYF5f3u&export=download"}, + {"path": "/home/user/Documents/Finance/tally_book.xlsx", "url": "https://drive.google.com/uc?id=13yuLhBPmouoWR-DybfgaIbWUOxbY_jhL&export=download"}, + {"path": "/home/user/.projects.tar.xz", "url": "https://drive.google.com/uc?id=1oJcxpjqF474Wm16i1aZc8DlCEfAvc4t_&export=download"}, + {"path": "/home/user/.old-chapters.tar.xz", "url": "https://drive.google.com/uc?id=11y-kLI83sQz6ncmP1-tkIR6mQJ7SfX5-&export=download"}, + {"path": "/home/user/.pass-through.tar.xz", "url": "https://drive.google.com/uc?id=1z7QGTWMHhoZCAd7O1O6CWWKy49oKUowf&export=download"}, + {"path": "/home/user/Music/Cheng Xiang - Missing You.mp3", "url": "https://drive.google.com/uc?id=1dqAiQoHhXn0UIatHs0iUzL266w8OhuzI&export=download"}, + {"path": "/home/user/Music/Han Baoyi - Tears of Dancing Girl.mp3", "url": "https://drive.google.com/uc?id=1ePZMj14XfdzSERB6iveUMTF7nvfx45K7&export=download"}, + {"path": "/home/user/Music/Huang An - I Know Missing is Painful.mp3", "url": "https://drive.google.com/uc?id=1h790X11CHFXZyB6SiftNtiq-3kd2Q12j&export=download"}, + {"path": "/home/user/Music/Chen Shaohua - Red Daughter.mp3", "url": "https://drive.google.com/uc?id=1GHhlAw4KZglAwV9C5bhHDCHIFb_adJUO&export=download"}, + {"path": "/home/user/Music/Zhou Xuan - Nights in Shanghai.mp3", "url": "https://drive.google.com/uc?id=1MGTK-Sr00CihcitI9ebei-HFzJNhtgIo&export=download"} + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.projects.tar.xz", "-C", "/home/user/Documents/Projects/OSWorld"] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.old-chapters.tar.xz", "-C", "/home/user/Documents/Novels/4th Year in Tsinghua"] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.pass-through.tar.xz", "-C", "/home/user/Documents/Novels/Pass Through"] + } + } + ], + "trajectory": "trajectories/3f05f3b9-29ba-4b6b-95aa-2204697ffc06", + "related_apps": ["os", "picard"], + "evaluator": { + "func": ["check_mp3_meta", "check_mp3_meta", "check_mp3_meta", "check_mp3_meta", "check_mp3_meta"], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Music/Cheng Xiang - Missing You.mp3", + "dest": "Cheng Xiang - Missing You.mp3" + }, + { + "type": "vm_file", + "path": "/home/user/Music/Han Baoyi - Tears of Dancing Girl.mp3", + "dest": "Han Baoyi - Tears of Dancing Girl.mp3" + }, + { + "type": "vm_file", + "path": "/home/user/Music/Huang An - I Know Missing is Painful.mp3", + "dest": "Huang An - I Know Missing is Painful.mp3" + }, + { + "type": "vm_file", + "path": "/home/user/Music/Chen Shaohua - Red Daughter.mp3", + "dest": "Chen Shaohua - Red Daughter.mp3" + }, + { + "type": "vm_file", + "path": "/home/user/Music/Zhou Xuan - Nights in Shanghai.mp3", + "dest": "/Zhou Xuan - Nights in Shanghai.mp3" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "title": { + "method": "eq", + "ref": "Missing You" + }, + "artist": { + "method": "eq", + "ref": "Cheng Xiang" + } + } + }, + { + "type": "rule", + "rules": { + "title": { + "method": "eq", + "ref": "Tears of Dancing Girl" + }, + "artist": { + "method": "eq", + "ref": "Han Baoyi" + } + } + }, + { + "type": "rule", + "rules": { + "title": { + "method": "eq", + "ref": "I Know Missing is Painful" + }, + "artist": { + "method": "eq", + "ref": "Huang An" + } + } + }, + { + "type": "rule", + "rules": { + "title": { + "method": "eq", + "ref": "Red Daughter" + }, + "artist": { + "method": "eq", + "ref": "Chen Shaohua" + } + } + }, + { + "type": "rule", + "rules": { + "title": { + "method": "eq", + "ref": "Nights in Shanghai" + }, + "artist": { + "method": "eq", + "ref": "Zhou Xuan" + } + } + } + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/42d25c08-fb87-4927-8b65-93631280a26f.json b/evaluation_examples/examples/multi_apps/42d25c08-fb87-4927-8b65-93631280a26f.json new file mode 100644 index 0000000..7613590 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/42d25c08-fb87-4927-8b65-93631280a26f.json @@ -0,0 +1,98 @@ +{ + "id": "42d25c08-fb87-4927-8b65-93631280a26f", + "snapshot": "chrome", + "instruction": "Hey, my friend has just sent me a web novel, but in txt files. Could you please help me convert it to epub format, so that I can easily read it on my mobile and kindle? I have found several tools to obtain an epub file, like https://github.com/kevinboone/txt2epub and https://github.com/potatoeggy/noveldown. But I'm not sure which one helps. Please help me to do this. Remember to name the file with novel's title.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://github.com/potatoeggy/noveldown", + "https://github.com/kevinboone/txt2epub" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Finance/receipts", "/home/user/Documents/Projects/OSWorld", "/home/user/Documents/Novels/4th Year in Tsinghua", "/home/user/Documents/Novels/Pass Through"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2308.pdf", "url": "https://drive.google.com/uc?id=1azRFXf4A7fvW0S7r9upHvleMEi-92hHM&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2309.pdf", "url": "https://drive.google.com/uc?id=1x-lpHm8U4U7uRPZ74-9wq9KzW2R55ln1&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2310.pdf", "url": "https://drive.google.com/uc?id=1pcrgV9G6NO4ekMEQBiupwXtq6mmke7b_&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2311.pdf", "url": "https://drive.google.com/uc?id=1JzbCK_nIY8X_3QZjnkzTtb-cRoq9zNT-&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2312.pdf", "url": "https://drive.google.com/uc?id=1RqbulzKG_HeYb1GZmLABOzlohlFg02UU&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/X-receipt-2312.pdf", "url": "https://drive.google.com/uc?id=1QzWjNzvNosG_yQr7VVonvYb3cUYF5f3u&export=download"}, + {"path": "/home/user/Documents/Finance/tally_book.xlsx", "url": "https://drive.google.com/uc?id=13yuLhBPmouoWR-DybfgaIbWUOxbY_jhL&export=download"}, + {"path": "/home/user/.projects.tar.xz", "url": "https://drive.google.com/uc?id=1oJcxpjqF474Wm16i1aZc8DlCEfAvc4t_&export=download"}, + {"path": "/home/user/.old-chapters.tar.xz", "url": "https://drive.google.com/uc?id=11y-kLI83sQz6ncmP1-tkIR6mQJ7SfX5-&export=download"}, + {"path": "/home/user/.pass-through.tar.xz", "url": "https://drive.google.com/uc?id=1z7QGTWMHhoZCAd7O1O6CWWKy49oKUowf&export=download"} + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.projects.tar.xz", "-C", "/home/user/Documents/Projects/OSWorld"] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.old-chapters.tar.xz", "-C", "/home/user/Documents/Novels/4th Year in Tsinghua"] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.pass-through.tar.xz", "-C", "/home/user/Documents/Novels/Pass Through"] + } + }, + { + "type": "launch", + "parameters": { + "command": ["nautilus", "/home/user/Documents/Novels/Pass Through"] + } + } + ], + "trajectory": "trajectories/42d25c08-fb87-4927-8b65-93631280a26f", + "related_apps": ["chrome", "os"], + "evaluator": { + "func": "compare_epub", + "result": { + "type": "vm_file", + "dest": "Pass Through.epub", + "path": "/home/user/Documents/Novels/Pass Through/Pass Through.epub" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=14W0R5s0f6jMuDW9kjJMBmJsgDkyLl-aZ&export=download", + "dest": "Pass Through Gold.epub" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/48c46dc7-fe04-4505-ade7-723cba1aa6f6.json b/evaluation_examples/examples/multi_apps/48c46dc7-fe04-4505-ade7-723cba1aa6f6.json new file mode 100644 index 0000000..8c5ef7f --- /dev/null +++ b/evaluation_examples/examples/multi_apps/48c46dc7-fe04-4505-ade7-723cba1aa6f6.json @@ -0,0 +1,110 @@ +{ + "id": "48c46dc7-fe04-4505-ade7-723cba1aa6f6", + "snapshot": "chrome", + "instruction": "I'm currently working on OSWorld project. Every time I need to open the terminal and change to the project directory, open the file manager and change to the project directory, and open relative webpages in chrome browser. This is tedious. Help me to do this automatically. Open project directory in terminal and file manager and open github and online document in chrome browser. That's enough.", + "source": "authors", + "config": [ + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Finance/receipts", "/home/user/Documents/Projects/OSWorld", "/home/user/.local/share/applications"] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["cp", "/usr/share/applications/google-chrome.desktop", "/home/user/.local/share/applications"] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "sed", + "-i.bak", + "-e", + "s#Exec=/usr/bin/google-chrome-stable#& --remote-debugging-port=1337#g", + "/home/user/.local/share/applications/google-chrome.desktop" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["update-desktop-database"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2308.pdf", "url": "https://drive.google.com/uc?id=1azRFXf4A7fvW0S7r9upHvleMEi-92hHM&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2309.pdf", "url": "https://drive.google.com/uc?id=1x-lpHm8U4U7uRPZ74-9wq9KzW2R55ln1&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2310.pdf", "url": "https://drive.google.com/uc?id=1pcrgV9G6NO4ekMEQBiupwXtq6mmke7b_&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2311.pdf", "url": "https://drive.google.com/uc?id=1JzbCK_nIY8X_3QZjnkzTtb-cRoq9zNT-&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2312.pdf", "url": "https://drive.google.com/uc?id=1RqbulzKG_HeYb1GZmLABOzlohlFg02UU&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/X-receipt-2312.pdf", "url": "https://drive.google.com/uc?id=1QzWjNzvNosG_yQr7VVonvYb3cUYF5f3u&export=download"}, + {"path": "/home/user/Documents/Finance/tally_book.xlsx", "url": "https://drive.google.com/uc?id=13yuLhBPmouoWR-DybfgaIbWUOxbY_jhL&export=download"}, + {"path": "/home/user/.projects.tar.xz", "url": "https://drive.google.com/uc?id=1oJcxpjqF474Wm16i1aZc8DlCEfAvc4t_&export=download"} + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.projects.tar.xz", "-C", "/home/user/Documents/Projects/OSWorld"] + } + } + ], + "trajectory": "trajectories/48c46dc7-fe04-4505-ade7-723cba1aa6f6", + "related_apps": ["chrome", "os"], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": ["wmctrl", "-lx"], + "stdout": "wmctrl.out" + } + } + ], + "func": ["check_list", "is_expected_tabs"], + "result": [ + { + "type": "cache_file", + "path": "wmctrl.out" + }, + { + "type": "open_tabs_info" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": [ + "\\borg\\.gnome\\.Nautilus\\.Org\\.gnome\\.Nautilus\\b.*\\bOSWorld\\b", + "gnome-terminal-server\\.Gnome-terminal\\b.*~/Documents/Projects/OSWorld\\b" + ] + } + }, + { + "type": "rule", + "rules": { + "type": "url", + "urls": ["https://github.com", "https://docs.python.org/3/"] + } + } + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/788b3701-3ec9-4b67-b679-418bfa726c22.json b/evaluation_examples/examples/multi_apps/788b3701-3ec9-4b67-b679-418bfa726c22.json new file mode 100644 index 0000000..4b1da9f --- /dev/null +++ b/evaluation_examples/examples/multi_apps/788b3701-3ec9-4b67-b679-418bfa726c22.json @@ -0,0 +1,57 @@ +{ + "id": "788b3701-3ec9-4b67-b679-418bfa726c22", + "snapshot": "chrome", + "instruction": "I'm following a small tale set updating on https://github.com/4th-year-in-tsinghua/4th-year-in-tsinghua. I have already downloaded several chapters to my computer for reading and archiving. There should be some updates and help me to download the next chapter I haven't downloaded yet. Save it to my novel collection folder.", + "source": "authors", + "config": [ + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Finance/receipts", "/home/user/Documents/Projects/OSWorld", "/home/user/Documents/Novels/4th Year in Tsinghua"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2308.pdf", "url": "https://drive.google.com/uc?id=1azRFXf4A7fvW0S7r9upHvleMEi-92hHM&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2309.pdf", "url": "https://drive.google.com/uc?id=1x-lpHm8U4U7uRPZ74-9wq9KzW2R55ln1&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2310.pdf", "url": "https://drive.google.com/uc?id=1pcrgV9G6NO4ekMEQBiupwXtq6mmke7b_&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2311.pdf", "url": "https://drive.google.com/uc?id=1JzbCK_nIY8X_3QZjnkzTtb-cRoq9zNT-&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2312.pdf", "url": "https://drive.google.com/uc?id=1RqbulzKG_HeYb1GZmLABOzlohlFg02UU&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/X-receipt-2312.pdf", "url": "https://drive.google.com/uc?id=1QzWjNzvNosG_yQr7VVonvYb3cUYF5f3u&export=download"}, + {"path": "/home/user/Documents/Finance/tally_book.xlsx", "url": "https://drive.google.com/uc?id=13yuLhBPmouoWR-DybfgaIbWUOxbY_jhL&export=download"}, + {"path": "/home/user/.projects.tar.xz", "url": "https://drive.google.com/uc?id=1oJcxpjqF474Wm16i1aZc8DlCEfAvc4t_&export=download"}, + {"path": "/home/user/.old-chapters.tar.xz", "url": "https://drive.google.com/uc?id=11y-kLI83sQz6ncmP1-tkIR6mQJ7SfX5-&export=download"} + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.projects.tar.xz", "-C", "/home/user/Documents/Projects/OSWorld"] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.old-chapters.tar.xz", "-C", "/home/user/Documents/Novels/4th Year in Tsinghua"] + } + } + ], + "trajectory": "trajectories/788b3701-3ec9-4b67-b679-418bfa726c22", + "related_apps": ["chrome", "os"], + "evaluator": { + "func": "diff_text_file", + "result": { + "type": "vm_file", + "path": "/home/user/Documents/Novels/4th Year in Tsinghua/早期建筑群.tex", + "dest": "download.tex" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1kiCDbTn7uYfgqbymddC5IKMsvpkihW-s&export=download", + "dest": "real.tex" + } + } +} diff --git a/requirements.txt b/requirements.txt index 6571f11..497b9f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,3 +43,4 @@ beautifulsoup4 dashscope google-generativeai PyYaml +mutagen