From b01acb312ef1f1b54d8714254f7a8f11fd9f2ae5 Mon Sep 17 00:00:00 2001 From: David Chang Date: Mon, 4 Mar 2024 15:19:39 +0800 Subject: [PATCH 1/9] ver Mar4thv2 removed a useless function --- quick_compare_table.py | 51 ------------------------------------------ 1 file changed, 51 deletions(-) delete mode 100644 quick_compare_table.py diff --git a/quick_compare_table.py b/quick_compare_table.py deleted file mode 100644 index 6767f9a..0000000 --- a/quick_compare_table.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/python3 - -from desktop_env.evaluators.metrics import compare_table -import json -import sys -import os.path -from typing import Dict -from typing import Any - -import logging -import datetime - -logger = logging.getLogger() -logger.setLevel(logging.DEBUG) - -datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") - -file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str))) -debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str))) -stdout_handler = logging.StreamHandler(sys.stdout) -sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str))) - -file_handler.setLevel(logging.INFO) -debug_handler.setLevel(logging.DEBUG) -stdout_handler.setLevel(logging.INFO) -sdebug_handler.setLevel(logging.DEBUG) - -formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") -file_handler.setFormatter(formatter) -debug_handler.setFormatter(formatter) -stdout_handler.setFormatter(formatter) -sdebug_handler.setFormatter(formatter) - -stdout_handler.addFilter(logging.Filter("desktopenv")) -sdebug_handler.addFilter(logging.Filter("desktopenv")) - -logger.addHandler(file_handler) -logger.addHandler(debug_handler) -logger.addHandler(stdout_handler) -logger.addHandler(sdebug_handler) - -config_file: str = sys.argv[1] - -with open(config_file) as f: - config: Dict[str, Any] = json.load(f) - -print( compare_table( os.path.join("cache/", config["id"], config["evaluator"]["result"]["dest"]) - , os.path.join("cache/", config["id"], config["evaluator"]["expected"]["dest"]) - , **config["evaluator"]["options"] - ) - ) From 459e247736c0fefa60f8a2580f10e2b4ae8af83c Mon Sep 17 00:00:00 2001 From: David Chang Date: Mon, 4 Mar 2024 23:26:22 +0800 Subject: [PATCH 2/9] ver Mar4thv3 some new multi_app configs --- desktop_env/evaluators/getters/misc.py | 6 +- desktop_env/evaluators/metrics/general.py | 58 +++++++------ .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json | 2 +- .../e2392362-125e-4f76-a2ee-524b183a3412.json | 86 +++++++++++++++++++ .../f5c13cdd-205c-4719-a562-348ae5cd1d91.json | 71 +++++++++++++++ .../12086550-11c0-466b-b367-1d9e75b3910e.json | 12 +-- requirements.txt | 1 + 7 files changed, 202 insertions(+), 34 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json create mode 100644 evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json diff --git a/desktop_env/evaluators/getters/misc.py b/desktop_env/evaluators/getters/misc.py index db04aea..976db19 100644 --- a/desktop_env/evaluators/getters/misc.py +++ b/desktop_env/evaluators/getters/misc.py @@ -1,5 +1,5 @@ import logging -from typing import TypeVar +from typing import TypeVar, Dict from datetime import datetime, timedelta logger = logging.getLogger("desktopenv.getters.misc") @@ -74,13 +74,13 @@ relativeTime_to_IntDay = { "first monday four months later": "special" } -def get_rule(env, config: R) -> R: +def get_rule(env, config: Dict[str, R]) -> R: """ Returns the rule as-is. """ return config["rules"] -def get_rule_relativeTime(env, config: R) -> R: +def get_rule_relativeTime(env, config: Dict[str, R]) -> R: """ According to the rule definded in funciton "apply_rules_to_timeFormat", convert the relative time to absolute time. config: diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 4458a69..26a8e3c 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,6 +1,7 @@ import csv import functools import json +import yaml import operator import re import sqlite3 @@ -132,11 +133,11 @@ _accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org" } -def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: +def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float: """ Args: result (str): XML of GNOME Accessibility Tree - rules (Dict[str, Any]): dict like + rules (List[Dict[str, Any]]): list of dict like { "selectors": list of str as CSS selectors, will be connected by ", " to form a composite selector. Only one from `selectors` and @@ -154,30 +155,33 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: """ at: _Element = lxml.etree.fromstring(result) - if "xpath" in rules: - elements: List[_Element] = at.xpath(rules["xpath"], namespaces=_accessibility_ns_map) - elif "selectors" in rules: - selector = CSSSelector(", ".join(rules["selectors"]), namespaces=_accessibility_ns_map) - elements: List[_Element] = selector(at) - else: - raise ValueError("At least one of xpath and selectors is required") + total_match_score = 1. + for r in rules: + if "xpath" in r: + elements: List[_Element] = at.xpath(r["xpath"], namespaces=_accessibility_ns_map) + elif "selectors" in r: + selector = CSSSelector(", ".join(r["selectors"]), namespaces=_accessibility_ns_map) + elements: List[_Element] = selector(at) + else: + raise ValueError("At least one of xpath and selectors is required") - if len(elements) == 0: - print("no elements") - return 0. + if len(elements) == 0: + print("no elements") + return 0. - if "text" in rules: - match_func: Callable[[str], Number] = functools.partial(operator.eq if rules["exact"] \ - else (lambda a, b: fuzz.ratio(a, b) / 100.) - , rules["text"] - ) - match_score: Number = 0 - for elm in elements: - match_score = max(match_score, match_func(elm.text or None)) - else: - match_score = 1. + if "text" in r: + match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \ + else (lambda a, b: fuzz.ratio(a, b) / 100.) + , r["text"] + ) + match_score: Number = 0 + for elm in elements: + match_score = max(match_score, match_func(elm.text or None)) + else: + match_score = 1. + total_match_score *= match_score - return float(match_score) + return float(total_match_score) # def check_existence(result: str, *args) -> float: @@ -189,7 +193,7 @@ def run_sqlite3(result: str, rules: Dict[str, Any]) -> float: return float(cursor.fetchone()[0] or 0) -def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]]) -> float: +def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]], is_yaml: bool = False) -> float: """ Args: result (str): path to json file @@ -204,6 +208,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str ], "unexpect": page-tab[name=\"About Profiles\"]" - ] - } + "rules": [ + { + "selectors": [ + "application[name=Thunderbird] page-tab-list[attr|id=\"tabmail-tabs\"]>page-tab[name=\"About Profiles\"]" + ] + } + ] }, "func": "check_accessibility_tree" } diff --git a/requirements.txt b/requirements.txt index a6082f9..6571f11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,3 +42,4 @@ func-timeout beautifulsoup4 dashscope google-generativeai +PyYaml From f21d6851089c7251889a01ea4d460e880cc60646 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 6 Mar 2024 15:05:47 +0800 Subject: [PATCH 3/9] Update examples --- .../337d318b-aa07-4f4f-b763-89d9a2dd013f.json | 49 +++++++++ .../82e3c869-49f6-4305-a7ce-f3e64a0618e7.json | 89 +++++++++++++++ .../8e116af7-7db7-4e35-a68b-b0939c066c78.json | 54 ++++++++++ .../deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json | 101 ++++++++++++++++++ 4 files changed, 293 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json create mode 100644 evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json create mode 100644 evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json create mode 100644 evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json diff --git a/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json new file mode 100644 index 0000000..d2d4193 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/337d318b-aa07-4f4f-b763-89d9a2dd013f.json @@ -0,0 +1,49 @@ +{ + "id": "337d318b-aa07-4f4f-b763-89d9a2dd013f", + "snapshot": "libreoffice_calc", + "instruction": "Cross-check the invoices with the bank statements and identify any discrepancies. Then pull out the invoices that don't match the statements and put them in the \"problematic\" folder.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/invoice TII-20220301-90.pdf", + "url": "https://drive.google.com/uc?id=13y1Dkh9dJUaWeMSk6pg_UY-R7K4bhAJM&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # GES-20220215-82.pdf", + "url": "https://drive.google.com/uc?id=1zZYJQVpbGmqT_eH0x9Z5O7WoIFrQo3sN&export=download" + }, + { + "path": "/home/user/Desktop/Invoice # 243729.pdf", + "url": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download" + }, + { + "path": "/home/user/Desktop/Bank-Statement.pdf", + "url": "https://drive.google.com/uc?id=1-KS6p0aip56iPmH4okhXZhLgqVwrcjfw&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/337d318b-aa07-4f4f-b763-89d9a2dd013f", + "related_apps": [ + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/problematic/Invoice # 243729.pdf", + "dest": "Invoice # 243729.pdf" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1a7fnWpsAdFQkFWl2BqAA_gTwlZ6nvGAx&export=download", + "dest": "Invoice # 243729 Gold.pdf" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json new file mode 100644 index 0000000..deda04e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -0,0 +1,89 @@ +{ + "id": "82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "snapshot": "libreoffice_calc", + "instruction": "Please sift through the folder with all the event photos taken by our photographer. I need you to extract the photos featuring the presenters and place them in a separate folder named 'presenter'. Then, compress this folder into a zip file so I can easily share it with others later.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/IDS LLM seminar/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00659.jpg", + "url": "https://drive.google.com/uc?id=1NjnSab2aEtJytYajM9FqeXsLm4ItxTsJ&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00657.jpg", + "url": "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00574.jpg", + "url": "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00554.jpg", + "url": "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00495.jpg", + "url": "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + }, + { + "path": "/home/user/Desktop/IDS LLM seminar/DSC00454.jpg", + "url": "https://drive.google.com/uc?id=1t9L7fVQVxjovTQufetlogulIctn7DF_L&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/82e3c869-49f6-4305-a7ce-f3e64a0618e7", + "related_apps": [ + "os", + "image" + ], + "evaluator": { + "func": "compare_image_list", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/presenter/DSC00657.jpg", + "/home/user/Desktop/presenter/DSC00574.jpg", + "/home/user/Desktop/presenter/DSC00554.jpg", + "/home/user/Desktop/presenter/DSC00495.jpg" + ], + "dest": [ + "DSC00657.jpg", + "DSC00574.jpg", + "DSC00554.jpg", + "DSC00495.jpg" + ], + "multi": "true" + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1untYr5vS_wCVd3-5cfdUegBBILT4tHzF&export=download", + "https://drive.google.com/uc?id=1V3g4z2QhIHx48LbeaNx31nMosxYPgtzY&export=download", + "https://drive.google.com/uc?id=1DNz3iZmNk6-dazqXilOFkRpgUahU57zd&export=download", + "https://drive.google.com/uc?id=1_crKe2tdI4Kjg0Xop8vHo7yN9KACJTJF&export=download" + ], + "dest": [ + "DSC00657_gold.jpg", + "DSC00574_gold.jpg", + "DSC00554_gold.jpg", + "DSC00495_gold.jpg" + ], + "multi": "true" + } + } +} diff --git a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json new file mode 100644 index 0000000..1214c6d --- /dev/null +++ b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json @@ -0,0 +1,54 @@ +{ + "id": "8e116af7-7db7-4e35-a68b-b0939c066c78", + "snapshot": "libreoffice_calc", + "instruction": "Please update my bookkeeping sheet with the recent transactions from the provided folder, detailing my expenses over the past few days.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "url": "https://drive.google.com/uc?id=1QOSpTZPFzFZeC0tng4Gfws544LFln836&export=download" + }, + { + "path": "/home/user/Desktop/receipt_0.jpeg", + "url": "https://drive.google.com/uc?id=1b0BRc-BzXObVCUEonJfRbDsrgxZugj3U&export=download" + }, + { + "path": "/home/user/Desktop/receipt_1.jpg", + "url": "https://drive.google.com/uc?id=1S-JBDqwEf7Z_JXDItK_F4BOHgScTjlyN&export=download" + }, + { + "path": "/home/user/Desktop/receipt_2.jpg", + "url": "https://drive.google.com/uc?id=1Ys2abZi9_0y8sxuj2vCbC0OhjC6YdrC-&export=download" + }, + { + "path": "/home/user/Desktop/receipt_3.pdf", + "url": "https://drive.google.com/uc?id=1sKvBbGDpmUkv891xTqX7w5dtEvchQahd&export=download" + }, + { + "path": "/home/user/Desktop/receipt_4.jpg", + "url": "https://drive.google.com/uc?id=1kW7xH5bc2jRaKGDKHDrgSehTrPgkyzkc&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78", + "related_apps": [ + "", + "" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json new file mode 100644 index 0000000..0037591 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json @@ -0,0 +1,101 @@ +{ + "id": "deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "snapshot": "libreoffice_calc", + "instruction": "Find a paper list of all the new foundation language models issued on 11st Oct. 2023 via arxiv daily, and organize it into the sheet I opened.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "url": "https://drive.google.com/uc?id=1NJFAUDzatd5TbBqXeCy3-ok4BWj-xayT&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/New Large Language Models.xlsx" + } + } + ], + "trajectory": "trajectories/deec51c9-3b1e-4b9e-993c-4776f20e8bb2", + "related_apps": [ + "libreoffice_calc", + "chrome", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "New Large Language Models.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/New Large Language Models.xlsx", + "dest": "New Large Language Models.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1BHOyjFo72b74YKWTqPMaoNvCzICkos-G&export=download", + "dest": "New Large Language Models Gold.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "B2:B5", + "C2:C5" + ], + "type": "exact_match" + }, + { + "range": [ + "A2:A5" + ], + "type": "fuzzy_match", + "threshold": 90, + "ignore_case": true + } + ] + } + ] + } + } +} From 5817403e2e9d443d5b5ecc59e59cb6e41d0e074d Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 15:06:08 +0800 Subject: [PATCH 4/9] ver Mar6th three new tasks --- .../415ef462-bed3-493a-ac36-ca8c6d23bf1b.json | 147 ++++++++++++++++++ .../e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json | 59 +++++++ .../f8369178-fafe-40c2-adc4-b9b08a125456.json | 31 ++++ 3 files changed, 237 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json create mode 100644 evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json create mode 100644 evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json diff --git a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json new file mode 100644 index 0000000..f3365fb --- /dev/null +++ b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json @@ -0,0 +1,147 @@ +{ + "id": "415ef462-bed3-493a-ac36-ca8c6d23bf1b", + "snapshot": "thunderbird", + "instruction": "Save the AWS invoice of December from the email. I have moved that email to local \"Bills\" folder. Save it to the my receipts folder. Keep the file name pattern and update a record to my tally book.", + "source": "authors", + "config": [ + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Finance/receipts", "/home/user/Documents/Projects"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2308.pdf", "url": "https://drive.google.com/uc?id=1azRFXf4A7fvW0S7r9upHvleMEi-92hHM&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2309.pdf", "url": "https://drive.google.com/uc?id=1x-lpHm8U4U7uRPZ74-9wq9KzW2R55ln1&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2310.pdf", "url": "https://drive.google.com/uc?id=1pcrgV9G6NO4ekMEQBiupwXtq6mmke7b_&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2311.pdf", "url": "https://drive.google.com/uc?id=1JzbCK_nIY8X_3QZjnkzTtb-cRoq9zNT-&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/X-receipt-2312.pdf", "url": "https://drive.google.com/uc?id=1QzWjNzvNosG_yQr7VVonvYb3cUYF5f3u&export=download"}, + {"path": "/home/user/Documents/Finance/tally_book.xlsx", "url": "https://drive.google.com/uc?id=13yuLhBPmouoWR-DybfgaIbWUOxbY_jhL&export=download"}, + {"path": "/home/user/.projects.tar.xz", "url": "https://drive.google.com/uc?id=1oJcxpjqF474Wm16i1aZc8DlCEfAvc4t_&export=download"}, + { + "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.projects.tar.xz", "-C", "/home/user/Documents/Projects"] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xzv", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": ["thunderbird"] + } + } + ], + "trajectory": "trajectories/415ef462-bed3-493a-ac36-ca8c6d23bf1b", + "related_apps": ["thunderbird", "libreoffice_calc", "os"], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "tally_book.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/.aws-invoice-2312.pdf", "url": "https://drive.google.com/uc?id=1RqbulzKG_HeYb1GZmLABOzlohlFg02UU&export=download"} + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["diff", ".aws-invoice-2312.pdf", "/home/user/Documents/Finance/receipts/aws-invoice-2312.pdf"], + "stdout": "diff.out" + } + } + ], + "func": ["compare_table", "check_list"], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Documents/Finance/tally_book.xlsx", + "dest": "tally_book.xlsx" + }, + { + "type": "cache_file", + "path": "diff.out" + } + ], + "expected": [ + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1x8m-korGI1PhJm8PAQVTlWYKneK4WKvn&export=download", + "dest": "tally_book_gt.xlsx" + }, + { + "type": "rule", + "rules": { + "unexpect": [ + ".+" + ] + } + } + ], + "options": [ + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + }, + {} + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json b/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json new file mode 100644 index 0000000..0d86766 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json @@ -0,0 +1,59 @@ +{ + "id": "e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56", + "snapshot": "libreoffice_writer", + "instruction": "Install LanguageTool extension for my LibreOffice", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": ["libreoffice", "--writer"] + } + } + ], + "trajectory": "trajectories/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56", + "related_apps": ["chrome", "libreoffice", "os"], + "evaluator": { + "postconfig": [ + { + "type": "command", + "parameters": { + "command": ["grep", "-nHr", "languagetool", "/home/user/.config/libreoffice/4/user/uno_packages/cache/uno_packages/"], + "stdout": "grep.out" + } + }, + { + "type": "command", + "parameters": { + "command": ["apt", "list", "--installed"], + "stdout": "apt.out" + } + } + ], + "func": ["check_list", "check_list"], + "result": [ + { + "type": "cache_file", + "path": "grep.out" + }, + { + "type": "cache_file", + "path": "apt.out" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": ["org\\.openoffice\\.languagetool\\.oxt"] + } + }, + { + "type": "rule", + "rules": { + "expect": ["openjdk-\\d+-(jre|jdk)"] + } + } + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json b/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json new file mode 100644 index 0000000..0a56921 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json @@ -0,0 +1,31 @@ +{ + "id": "f8369178-fafe-40c2-adc4-b9b08a125456", + "snapshot": "chrome", + "instruction": "Help me to install Orchis theme from gnome-look.org and change to it for my GNOME desktop.", + "source": "https://itsfoss.com/install-switch-themes-gnome-shell", + "config": [], + "trajectory": "trajectories/f8369178-fafe-40c2-adc4-b9b08a125456", + "related_apps": ["chrome", "os"], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": ["gsettings", "get", "org.gnome.desktop.interface", "gtk-theme"], + "stdout": "gsettings.out" + } + } + ], + "func": "check_list", + "result": { + "type": "cache_file", + "path": "gsettings.out" + }, + "expected": { + "type": "rule", + "rules": { + "expect": ["Orchis"] + } + } + } +} From f72b788cdcc22a0583b9f806162399cfaf2cc328 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 15:21:09 +0800 Subject: [PATCH 5/9] ver Mar6th fixed bug in sheet_fuzzy option of compare_table --- desktop_env/evaluators/metrics/table.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index a35c13b..4a58e50 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -12,6 +12,7 @@ import pandas as pd from openpyxl import Workbook from openpyxl.cell.cell import Cell from openpyxl.worksheet.cell_range import MultiCellRange +from openpyxl.utils import get_column_letter from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.worksheet import Worksheet @@ -208,8 +209,10 @@ def compare_table(result: str, expected: str = None, **options) -> float: for rl in r["rules"]: for rng in MultiCellRange(rl["range"]): for cdn in rng.cells: - value1: str = str(read_cell_value(*sheet1, cdn)) - value2: str = str(read_cell_value(*sheet2, cdn)) + coordinate: str = "{:}{:d}".format(get_column_letter(cdn[1]), cdn[0]) + value1: str = str(read_cell_value(*sheet1, coordinate)) + value2: str = str(read_cell_value(*sheet2, coordinate)) + logger.debug("%s: %s vs %s", cdn, value1, value2) for rplc in rl.get("normalization", []): value1 = value1.replace(rplc[0], rplc[1]) @@ -230,11 +233,11 @@ def compare_table(result: str, expected: str = None, **options) -> float: if rl["type"]=="includes": metric: bool = value1 in value2 - if rl["type"]=="includes_by": + elif rl["type"]=="includes_by": metric: bool = value2 in value1 - if rl["type"]=="fuzzy_match": + elif rl["type"]=="fuzzy_match": metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.) - if rl["type"]=="exact_match": + elif rl["type"]=="exact_match": metric: bool = value1==value2 total_metric = total_metric and metric From e045eee901294c409e05eaa0e14c3e448baaddfa Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 16:16:38 +0800 Subject: [PATCH 6/9] ver Mar6thv2 updated read_cell_value --- desktop_env/evaluators/metrics/utils.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 4515cd5..a67b6bb 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -311,14 +311,15 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: , namespaces=_xlsx_ns_imapping ) logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell)) - if "@t" not in cell["c"]: + try: + if "@t" not in cell["c"] or cell["c"]["@t"] == "n": + return float(cell["c"]["v"]) + if cell["c"]["@t"] == "s": + return shared_strs[int(cell["c"]["v"])] + if cell["c"]["@t"] == "str": + return cell["c"]["v"] + except ValueError: return None - if cell["c"]["@t"] == "s": - return shared_strs[int(cell["c"]["v"])] - if cell["c"]["@t"] == "n": - return float(cell["c"]["v"]) - if cell["c"]["@t"] == "str": - return cell["c"]["v"] # }}} read_cell_value # From 806ee50e72bf4cd82c231fd6de22ff2a0c98bdeb Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 16:24:17 +0800 Subject: [PATCH 7/9] ver Mar6th fixed bug --- desktop_env/evaluators/metrics/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index a67b6bb..a7f1f80 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -318,7 +318,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: return shared_strs[int(cell["c"]["v"])] if cell["c"]["@t"] == "str": return cell["c"]["v"] - except ValueError: + except KeyError, ValueError: return None # }}} read_cell_value # From 80ad7e53c42468488e02a0d9ede4bd81fa9b623a Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 16:29:54 +0800 Subject: [PATCH 8/9] ver Mar6thv2 fixed bug --- desktop_env/evaluators/metrics/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index a7f1f80..b57de00 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -318,7 +318,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: return shared_strs[int(cell["c"]["v"])] if cell["c"]["@t"] == "str": return cell["c"]["v"] - except KeyError, ValueError: + except (KeyError, ValueError): return None # }}} read_cell_value # From b79aae4f66b12a741540b600cc06a7d8ebcf2454 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 6 Mar 2024 19:52:48 +0800 Subject: [PATCH 9/9] Update examples --- .../185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json | 96 +++++++++ .../2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json | 102 +++++++++ .../3a93cae4-ad3e-403e-8c12-65303b271818.json | 26 +++ .../767a3271-56db-4745-ac5d-846ef05e6fe5.json | 48 +++-- .../8e116af7-7db7-4e35-a68b-b0939c066c78.json | 195 +++++++++++++++++- .../d28853f4-500a-4749-b9dc-79c3576e759b.json | 26 +++ 6 files changed, 464 insertions(+), 29 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json create mode 100644 evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json create mode 100644 evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json create mode 100644 evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json diff --git a/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json b/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json new file mode 100644 index 0000000..30d5c38 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json @@ -0,0 +1,96 @@ +{ + "id": "185f29bd-5da0-40a6-b69c-ba7f4e0324ef", + "snapshot": "libreoffice_calc", + "instruction": "Transfer the data from our 'Employee Performance Evaluation Summary' Excel sheet into our standardized PDF evaluation forms. Each employee's evaluation data should be accurately filled into the designated fields of the PDF form. It's crucial that the final PDF documents retain a uniform and professional look, ready for distribution to our staff or for filing purposes. Furthermore, please ensure that each PDF file is named according to the employee's name as it appears in the Excel document. This will greatly streamline our evaluation process and enhance our efficiency in managing employee performance records. Oh, use \"√\" as mark on characters.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/Employee Performance Evaluation Summary.xlsx", + "url": "https://drive.google.com/uc?id=1uOzi66bzO_WUnoS4Oqsodrd7_YPLatEk&export=download" + }, + { + "path": "/home/user/Desktop/review_template.pdf", + "url": "https://drive.google.com/uc?id=1YJ4RPGFUuS48tBh31gBerA16JSMw498w&export=download" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Employee Performance Evaluation Summary.xlsx" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/review_template.pdf" + } + } + ], + "trajectory": "trajectories/185f29bd-5da0-40a6-b69c-ba7f4e0324ef", + "related_apps": [ + "libreoffice_calc", + "os", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "result": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1kZM90nA1krRmV9ug5_BBe8VlrZRVLiLK&export=download", + "https://drive.google.com/uc?id=1zyLzYYThwyit9ciXpfNfPFlBomolOauY&export=download", + "https://drive.google.com/uc?id=1gMT7JBftuymajMAO5rwksORpeVq3uGmH&export=download", + "https://drive.google.com/uc?id=1x0DdtUSZyBifl1tGIWlWKn255WusJeR4&export=download", + "https://drive.google.com/uc?id=1UAcG32WO8XCXElcanjGwbSpJwFuyOkts&export=download", + "https://drive.google.com/uc?id=1PRgryg7Y5evKnDG2LPtAttVp9qAf5VyZ&export=download", + "https://drive.google.com/uc?id=1JxEDriCS2W7BQLdkIgxu_WFCRa9ib4D7&export=download" + ], + "dest": [ + "Alex Lee_Gold.pdf", + "David Wilson_Gold.pdf", + "Emily Johnson_Gold.pdf", + "John Doe_Gold.pdf", + "Linda Green_Gold.pdf", + "Michael Brown_Gold.pdf", + "Sophia Carter_Gold.pdf" + ], + "multi": true, + "gives": [0,1,2,3,4,5,6] + }, + "expected": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/Alex Lee.pdf", + "/home/user/Desktop/David Wilson.pdf", + "/home/user/Desktop/Emily Johnson.pdf", + "/home/user/Desktop/John Doe.pdf", + "/home/user/Desktop/Linda Green.pdf", + "/home/user/Desktop/Michael Brown.pdf", + "/home/user/Desktop/Sophia Carter.pdf" + ], + "dest": [ + "Alex Lee.pdf", + "David Wilson.pdf", + "Emily Johnson.pdf", + "John Doe.pdf", + "Linda Green.pdf", + "Michael Brown.pdf", + "Sophia Carter.pdf" + ], + "multi": true, + "gives": [0,1,2,3,4,5,6] + } + } +} diff --git a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json new file mode 100644 index 0000000..239d695 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json @@ -0,0 +1,102 @@ +{ + "id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", + "snapshot": "libreoffice_calc", + "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. If it turns out that the current formatting does not align with APA 7 standards, I would greatly appreciate your assistance in making the necessary adjustments to comply with those guidelines. ", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/students work/", + "/home/user/Desktop/Lec powerpoint/", + "/home/user/Desktop/Grammar test/", + "/home/user/Desktop/Grammar rules PDF/", + "/home/user/Desktop/FDI/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/students work/Zheng He .docx", + "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download" + }, + { + "path": "/home/user/Desktop/students work/The literature reviews of weekly readings.docx", + "url": "https://drive.google.com/uc?id=18zoZCNtP-wTkxXp2FhH3O_NdLZKVMPIr&export=download" + }, + { + "path": "/home/user/Desktop/students work/The British Justice System.docx", + "url": "https://drive.google.com/uc?id=1z3YHSN4CvC5kN1AwTWB_-plRS4p5GAch&export=download" + }, + { + "path": "/home/user/Desktop/students work/quiz2.docx", + "url": "https://drive.google.com/uc?id=1R5Bii_kvnv_fZVXV-6DMt6Hgq-1gXMo1&export=download" + }, + { + "path": "/home/user/Desktop/students work/quiz.docx", + "url": "https://drive.google.com/uc?id=1PvlGMVX7YkricrjoPRe0e5VQlHeozRPD&export=download" + }, + { + "path": "/home/user/Desktop/students work/Q1&2&3.docx", + "url": "https://drive.google.com/uc?id=1kLQ3lnba6p9lqikHhKDdbqrYagHnZWU_&export=download" + }, + { + "path": "/home/user/Desktop/students work/Photo Ethics in Journalism.docx", + "url": "https://drive.google.com/uc?id=1V6nG6HP_9Kb5KBCRTpaGsRTdPxnJSmRm&export=download" + }, + { + "path": "/home/user/Desktop/students work/cassie.docx", + "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download" + }, + { + "path": "/home/user/Desktop/students work/case study.docx", + "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf", + "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf", + "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf", + "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf", + "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download" + }, + { + "path": "/home/user/Desktop/Public Lecture Teaching Plan.docx", + "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download" + }, + { + "path": "/home/user/Desktop/Course Timetable.xlsx", + "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", + "related_apps": [ + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } +} diff --git a/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json new file mode 100644 index 0000000..e1f0544 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json @@ -0,0 +1,26 @@ +{ + "id": "3a93cae4-ad3e-403e-8c12-65303b271818", + "snapshot": "libreoffice_calc", + "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +} diff --git a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json index 9c7a2b7..28542f8 100644 --- a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json +++ b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json @@ -1,27 +1,25 @@ { - "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", - "snapshot": "libreoffice_calc", - "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", - "source": "authors", - "config": [ - - ], - "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", - "related_apps": [ - "thunderbird", - "libreoffice_calc" - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - - } - } + "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", + "source": "authors", + "config": [ + { + } + ], + "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", + "related_apps": [ + "thunderbird", + "libreoffice_calc" + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + }, + "expected": { + }, + "options": { + } + } } diff --git a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json index 1214c6d..ce00111 100644 --- a/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json +++ b/evaluation_examples/examples/multi_apps/8e116af7-7db7-4e35-a68b-b0939c066c78.json @@ -34,21 +34,208 @@ } ] } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/my_bookkeeping.xlsx" + } } ], "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78", "related_apps": [ - "", - "" + "libreoffice_calc", + "os", + "image", + "pdf" ], "evaluator": { - "postconfig": [], - "func": "", + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "my_bookkeeping.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_table", "result": { + "type": "vm_file", + "path": "/home/user/Desktop/my_bookkeeping.xlsx", + "dest": "my_bookkeeping.xlsx" }, "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1ygEDdVlkf2ZyqMxJ_ktqo9G_g--rc6co&export=download", + "dest": "my_bookkeeping_gold.xlsx" }, "options": { + "rules": [ + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "A1:A8", + "B1:B8", + "C1:C8", + "D1:D8", + "E1:E8" + ], + "type": "exact_match" + } + ] + }, + { + "type": "sheet_fuzzy", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "rules": [ + { + "range": [ + "C9:C13" + ], + "type": "exact_match", + "ignore_case": true + } + ] + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D9", + "props": { + "value": { + "method": "approx:0.1", + "ref": -186.93 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D10", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3670 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D11", + "props": { + "value": { + "method": "approx:0.1", + "ref": -5.7 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D12", + "props": { + "value": { + "method": "approx:0.1", + "ref": -154.06 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "D13", + "props": { + "value": { + "method": "approx:0.1", + "ref": -8.1 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E9", + "props": { + "value": { + "method": "approx:0.1", + "ref": 603.07 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E10", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3066.93 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E11", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3072.63 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E12", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3226.69 + } + } + }, + { + "type": "check_cell", + "sheet_idx": 0, + "coordinate": "E13", + "props": { + "value": { + "method": "approx:0.1", + "ref": -3234.79 + } + } + } + ] } } } diff --git a/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json b/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json new file mode 100644 index 0000000..0fbac44 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/d28853f4-500a-4749-b9dc-79c3576e759b.json @@ -0,0 +1,26 @@ +{ + "id": "d28853f4-500a-4749-b9dc-79c3576e759b", + "snapshot": "libreoffice_calc", + "instruction": "Hey there! So, I've got this bit of a situation and I'm hoping you can help me out. I've been working on gathering research for this big project at work, and I've ended up with a bunch of PDF files from various sources. Each PDF contains some really crucial information that I need, but here's the kicker - I need all of this info to be in one place where I can easily access and edit it, like in a Word document or a README file.\n\nThe PDFs are a mix of things - some are reports with statistics and findings, others are articles with key insights, and a few are just informational brochures with important dates and details. What I need is for someone to go through each PDF, extract all the relevant information, and then organize it into a single document. I'm talking about making sure that all the stats, insights, dates, and details from each PDF are neatly compiled, so I don't miss anything.\n\nAnd to make things a bit more complicated, some of these PDFs are scanned images of documents, so the text isn't directly selectable. I guess that means you might need to manually type out some parts or find a way to convert the images to text that can be copied.\n\nOnce everything's been compiled into this one document, could you save it as both a Word doc and a README file? I'd like to have it in these two formats so I can easily share it with my team and also have a version that's ready to be uploaded to our project's repository.\n\nOh, and could you make sure to organize the information by the source PDF? Like, maybe start each section with the title of the PDF or a brief description of its contents, followed by all the extracted info from that PDF. This way, it'll be easier for me and the team to trace back to the original sources if we need to.\n\nI know it's a lot to ask, but having all this information consolidated and organized is going to be a huge help for moving forward with the project. Thanks a bunch for helping me tackle this!", + "source": "authors", + "config": [ + + ], + "trajectory": "trajectories/d28853f4-500a-4749-b9dc-79c3576e759b", + "related_apps": [ + + ], + "evaluator": { + "postconfig": [], + "func": "", + "result": { + + }, + "expected": { + + }, + "options": { + + } + } +}