From b01acb312ef1f1b54d8714254f7a8f11fd9f2ae5 Mon Sep 17 00:00:00 2001 From: David Chang Date: Mon, 4 Mar 2024 15:19:39 +0800 Subject: [PATCH 1/4] ver Mar4thv2 removed a useless function --- quick_compare_table.py | 51 ------------------------------------------ 1 file changed, 51 deletions(-) delete mode 100644 quick_compare_table.py diff --git a/quick_compare_table.py b/quick_compare_table.py deleted file mode 100644 index 6767f9a..0000000 --- a/quick_compare_table.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/python3 - -from desktop_env.evaluators.metrics import compare_table -import json -import sys -import os.path -from typing import Dict -from typing import Any - -import logging -import datetime - -logger = logging.getLogger() -logger.setLevel(logging.DEBUG) - -datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") - -file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str))) -debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str))) -stdout_handler = logging.StreamHandler(sys.stdout) -sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str))) - -file_handler.setLevel(logging.INFO) -debug_handler.setLevel(logging.DEBUG) -stdout_handler.setLevel(logging.INFO) -sdebug_handler.setLevel(logging.DEBUG) - -formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") -file_handler.setFormatter(formatter) -debug_handler.setFormatter(formatter) -stdout_handler.setFormatter(formatter) -sdebug_handler.setFormatter(formatter) - -stdout_handler.addFilter(logging.Filter("desktopenv")) -sdebug_handler.addFilter(logging.Filter("desktopenv")) - -logger.addHandler(file_handler) -logger.addHandler(debug_handler) -logger.addHandler(stdout_handler) -logger.addHandler(sdebug_handler) - -config_file: str = sys.argv[1] - -with open(config_file) as f: - config: Dict[str, Any] = json.load(f) - -print( compare_table( os.path.join("cache/", config["id"], config["evaluator"]["result"]["dest"]) - , os.path.join("cache/", config["id"], config["evaluator"]["expected"]["dest"]) - , **config["evaluator"]["options"] - ) - ) From 459e247736c0fefa60f8a2580f10e2b4ae8af83c Mon Sep 17 00:00:00 2001 From: David Chang Date: Mon, 4 Mar 2024 23:26:22 +0800 Subject: [PATCH 2/4] ver Mar4thv3 some new multi_app configs --- desktop_env/evaluators/getters/misc.py | 6 +- desktop_env/evaluators/metrics/general.py | 58 +++++++------ .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json | 2 +- .../e2392362-125e-4f76-a2ee-524b183a3412.json | 86 +++++++++++++++++++ .../f5c13cdd-205c-4719-a562-348ae5cd1d91.json | 71 +++++++++++++++ .../12086550-11c0-466b-b367-1d9e75b3910e.json | 12 +-- requirements.txt | 1 + 7 files changed, 202 insertions(+), 34 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json create mode 100644 evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json diff --git a/desktop_env/evaluators/getters/misc.py b/desktop_env/evaluators/getters/misc.py index db04aea..976db19 100644 --- a/desktop_env/evaluators/getters/misc.py +++ b/desktop_env/evaluators/getters/misc.py @@ -1,5 +1,5 @@ import logging -from typing import TypeVar +from typing import TypeVar, Dict from datetime import datetime, timedelta logger = logging.getLogger("desktopenv.getters.misc") @@ -74,13 +74,13 @@ relativeTime_to_IntDay = { "first monday four months later": "special" } -def get_rule(env, config: R) -> R: +def get_rule(env, config: Dict[str, R]) -> R: """ Returns the rule as-is. """ return config["rules"] -def get_rule_relativeTime(env, config: R) -> R: +def get_rule_relativeTime(env, config: Dict[str, R]) -> R: """ According to the rule definded in funciton "apply_rules_to_timeFormat", convert the relative time to absolute time. config: diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 4458a69..26a8e3c 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,6 +1,7 @@ import csv import functools import json +import yaml import operator import re import sqlite3 @@ -132,11 +133,11 @@ _accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org" } -def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: +def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float: """ Args: result (str): XML of GNOME Accessibility Tree - rules (Dict[str, Any]): dict like + rules (List[Dict[str, Any]]): list of dict like { "selectors": list of str as CSS selectors, will be connected by ", " to form a composite selector. Only one from `selectors` and @@ -154,30 +155,33 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: """ at: _Element = lxml.etree.fromstring(result) - if "xpath" in rules: - elements: List[_Element] = at.xpath(rules["xpath"], namespaces=_accessibility_ns_map) - elif "selectors" in rules: - selector = CSSSelector(", ".join(rules["selectors"]), namespaces=_accessibility_ns_map) - elements: List[_Element] = selector(at) - else: - raise ValueError("At least one of xpath and selectors is required") + total_match_score = 1. + for r in rules: + if "xpath" in r: + elements: List[_Element] = at.xpath(r["xpath"], namespaces=_accessibility_ns_map) + elif "selectors" in r: + selector = CSSSelector(", ".join(r["selectors"]), namespaces=_accessibility_ns_map) + elements: List[_Element] = selector(at) + else: + raise ValueError("At least one of xpath and selectors is required") - if len(elements) == 0: - print("no elements") - return 0. + if len(elements) == 0: + print("no elements") + return 0. - if "text" in rules: - match_func: Callable[[str], Number] = functools.partial(operator.eq if rules["exact"] \ - else (lambda a, b: fuzz.ratio(a, b) / 100.) - , rules["text"] - ) - match_score: Number = 0 - for elm in elements: - match_score = max(match_score, match_func(elm.text or None)) - else: - match_score = 1. + if "text" in r: + match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \ + else (lambda a, b: fuzz.ratio(a, b) / 100.) + , r["text"] + ) + match_score: Number = 0 + for elm in elements: + match_score = max(match_score, match_func(elm.text or None)) + else: + match_score = 1. + total_match_score *= match_score - return float(match_score) + return float(total_match_score) # def check_existence(result: str, *args) -> float: @@ -189,7 +193,7 @@ def run_sqlite3(result: str, rules: Dict[str, Any]) -> float: return float(cursor.fetchone()[0] or 0) -def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]]) -> float: +def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]], is_yaml: bool = False) -> float: """ Args: result (str): path to json file @@ -204,6 +208,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str ], "unexpect": page-tab[name=\"About Profiles\"]" - ] - } + "rules": [ + { + "selectors": [ + "application[name=Thunderbird] page-tab-list[attr|id=\"tabmail-tabs\"]>page-tab[name=\"About Profiles\"]" + ] + } + ] }, "func": "check_accessibility_tree" } diff --git a/requirements.txt b/requirements.txt index a6082f9..6571f11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,3 +42,4 @@ func-timeout beautifulsoup4 dashscope google-generativeai +PyYaml From 5817403e2e9d443d5b5ecc59e59cb6e41d0e074d Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 15:06:08 +0800 Subject: [PATCH 3/4] ver Mar6th three new tasks --- .../415ef462-bed3-493a-ac36-ca8c6d23bf1b.json | 147 ++++++++++++++++++ .../e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json | 59 +++++++ .../f8369178-fafe-40c2-adc4-b9b08a125456.json | 31 ++++ 3 files changed, 237 insertions(+) create mode 100644 evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json create mode 100644 evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json create mode 100644 evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json diff --git a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json new file mode 100644 index 0000000..f3365fb --- /dev/null +++ b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json @@ -0,0 +1,147 @@ +{ + "id": "415ef462-bed3-493a-ac36-ca8c6d23bf1b", + "snapshot": "thunderbird", + "instruction": "Save the AWS invoice of December from the email. I have moved that email to local \"Bills\" folder. Save it to the my receipts folder. Keep the file name pattern and update a record to my tally book.", + "source": "authors", + "config": [ + { + "type": "execute", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Finance/receipts", "/home/user/Documents/Projects"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2308.pdf", "url": "https://drive.google.com/uc?id=1azRFXf4A7fvW0S7r9upHvleMEi-92hHM&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2309.pdf", "url": "https://drive.google.com/uc?id=1x-lpHm8U4U7uRPZ74-9wq9KzW2R55ln1&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2310.pdf", "url": "https://drive.google.com/uc?id=1pcrgV9G6NO4ekMEQBiupwXtq6mmke7b_&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/aws-invoice-2311.pdf", "url": "https://drive.google.com/uc?id=1JzbCK_nIY8X_3QZjnkzTtb-cRoq9zNT-&export=download"}, + {"path": "/home/user/Documents/Finance/receipts/X-receipt-2312.pdf", "url": "https://drive.google.com/uc?id=1QzWjNzvNosG_yQr7VVonvYb3cUYF5f3u&export=download"}, + {"path": "/home/user/Documents/Finance/tally_book.xlsx", "url": "https://drive.google.com/uc?id=13yuLhBPmouoWR-DybfgaIbWUOxbY_jhL&export=download"}, + {"path": "/home/user/.projects.tar.xz", "url": "https://drive.google.com/uc?id=1oJcxpjqF474Wm16i1aZc8DlCEfAvc4t_&export=download"}, + { + "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["tar", "-xJvf", "/home/user/.projects.tar.xz", "-C", "/home/user/Documents/Projects"] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xzv", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": ["thunderbird"] + } + } + ], + "trajectory": "trajectories/415ef462-bed3-493a-ac36-ca8c6d23bf1b", + "related_apps": ["thunderbird", "libreoffice_calc", "os"], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "tally_book.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/.aws-invoice-2312.pdf", "url": "https://drive.google.com/uc?id=1RqbulzKG_HeYb1GZmLABOzlohlFg02UU&export=download"} + ] + } + }, + { + "type": "execute", + "parameters": { + "command": ["diff", ".aws-invoice-2312.pdf", "/home/user/Documents/Finance/receipts/aws-invoice-2312.pdf"], + "stdout": "diff.out" + } + } + ], + "func": ["compare_table", "check_list"], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Documents/Finance/tally_book.xlsx", + "dest": "tally_book.xlsx" + }, + { + "type": "cache_file", + "path": "diff.out" + } + ], + "expected": [ + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1x8m-korGI1PhJm8PAQVTlWYKneK4WKvn&export=download", + "dest": "tally_book_gt.xlsx" + }, + { + "type": "rule", + "rules": { + "unexpect": [ + ".+" + ] + } + } + ], + "options": [ + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + }, + {} + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json b/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json new file mode 100644 index 0000000..0d86766 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56.json @@ -0,0 +1,59 @@ +{ + "id": "e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56", + "snapshot": "libreoffice_writer", + "instruction": "Install LanguageTool extension for my LibreOffice", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": ["libreoffice", "--writer"] + } + } + ], + "trajectory": "trajectories/e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56", + "related_apps": ["chrome", "libreoffice", "os"], + "evaluator": { + "postconfig": [ + { + "type": "command", + "parameters": { + "command": ["grep", "-nHr", "languagetool", "/home/user/.config/libreoffice/4/user/uno_packages/cache/uno_packages/"], + "stdout": "grep.out" + } + }, + { + "type": "command", + "parameters": { + "command": ["apt", "list", "--installed"], + "stdout": "apt.out" + } + } + ], + "func": ["check_list", "check_list"], + "result": [ + { + "type": "cache_file", + "path": "grep.out" + }, + { + "type": "cache_file", + "path": "apt.out" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": ["org\\.openoffice\\.languagetool\\.oxt"] + } + }, + { + "type": "rule", + "rules": { + "expect": ["openjdk-\\d+-(jre|jdk)"] + } + } + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json b/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json new file mode 100644 index 0000000..0a56921 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/f8369178-fafe-40c2-adc4-b9b08a125456.json @@ -0,0 +1,31 @@ +{ + "id": "f8369178-fafe-40c2-adc4-b9b08a125456", + "snapshot": "chrome", + "instruction": "Help me to install Orchis theme from gnome-look.org and change to it for my GNOME desktop.", + "source": "https://itsfoss.com/install-switch-themes-gnome-shell", + "config": [], + "trajectory": "trajectories/f8369178-fafe-40c2-adc4-b9b08a125456", + "related_apps": ["chrome", "os"], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": ["gsettings", "get", "org.gnome.desktop.interface", "gtk-theme"], + "stdout": "gsettings.out" + } + } + ], + "func": "check_list", + "result": { + "type": "cache_file", + "path": "gsettings.out" + }, + "expected": { + "type": "rule", + "rules": { + "expect": ["Orchis"] + } + } + } +} From f72b788cdcc22a0583b9f806162399cfaf2cc328 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 6 Mar 2024 15:21:09 +0800 Subject: [PATCH 4/4] ver Mar6th fixed bug in sheet_fuzzy option of compare_table --- desktop_env/evaluators/metrics/table.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index a35c13b..4a58e50 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -12,6 +12,7 @@ import pandas as pd from openpyxl import Workbook from openpyxl.cell.cell import Cell from openpyxl.worksheet.cell_range import MultiCellRange +from openpyxl.utils import get_column_letter from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.worksheet import Worksheet @@ -208,8 +209,10 @@ def compare_table(result: str, expected: str = None, **options) -> float: for rl in r["rules"]: for rng in MultiCellRange(rl["range"]): for cdn in rng.cells: - value1: str = str(read_cell_value(*sheet1, cdn)) - value2: str = str(read_cell_value(*sheet2, cdn)) + coordinate: str = "{:}{:d}".format(get_column_letter(cdn[1]), cdn[0]) + value1: str = str(read_cell_value(*sheet1, coordinate)) + value2: str = str(read_cell_value(*sheet2, coordinate)) + logger.debug("%s: %s vs %s", cdn, value1, value2) for rplc in rl.get("normalization", []): value1 = value1.replace(rplc[0], rplc[1]) @@ -230,11 +233,11 @@ def compare_table(result: str, expected: str = None, **options) -> float: if rl["type"]=="includes": metric: bool = value1 in value2 - if rl["type"]=="includes_by": + elif rl["type"]=="includes_by": metric: bool = value2 in value1 - if rl["type"]=="fuzzy_match": + elif rl["type"]=="fuzzy_match": metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.) - if rl["type"]=="exact_match": + elif rl["type"]=="exact_match": metric: bool = value1==value2 total_metric = total_metric and metric