From 33ace6937b051801adf58fc527b13a72ff53f527 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 28 Feb 2024 22:35:04 +0800 Subject: [PATCH 1/5] ver Feb28th a new multi app task --- init a web extension project with web tool --- branch-config/filelist | 1 + desktop_env/evaluators/metrics/__init__.py | 3 +- desktop_env/evaluators/metrics/general.py | 24 +++- desktop_env/evaluators/metrics/utils.py | 4 +- .../74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json | 107 ++++++++++++++++++ quick_evaluate.py | 77 +++++++++++++ 6 files changed, 209 insertions(+), 7 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json create mode 100644 quick_evaluate.py diff --git a/branch-config/filelist b/branch-config/filelist index b386813..aec119b 100644 --- a/branch-config/filelist +++ b/branch-config/filelist @@ -12,3 +12,4 @@ experiment_screenshot_seeact.py experiment_screenshot_som.py quick_compare_table.py +quick_evaluate.py diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 5a7025c..ed0dd86 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -57,7 +57,8 @@ from .general import ( is_in_list, fuzzy_match, check_include_exclude, - check_direct_json_object + check_direct_json_object, + diff_text_file ) from .gimp import ( check_brightness_decrease_and_structure_sim, diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 550c54b..4458a69 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -12,6 +12,7 @@ import lxml.etree from lxml.cssselect import CSSSelector from lxml.etree import _Element from rapidfuzz import fuzz +import difflib from .utils import _match_record, _match_value_to_rule @@ -45,7 +46,15 @@ def is_in_list(result, rules) -> float: else: return 0. +def diff_text_file(result: str, expect: str) -> float: + if result is None: + return 0. + with open(result) as f: + result_lines: List[str] = f.read().splitlines() + with open(expect) as f: + expected_lines: List[str] = f.read().splitlines() + return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio() def fuzzy_match(result, rules) -> float: expect = rules["expected"] @@ -212,14 +221,21 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str for r in expect_rules: value = result for k in r["key"]: - value = value[k] + try: + value = value[k] + except KeyError: + return 0. metric = metric and _match_value_to_rule(value, r) for r in unexpect_rules: value = result for k in r["key"]: - value = value[k] + try: + value = value[k] + except KeyError: + value = None + break metric = metric and not _match_value_to_rule(value, r) - return metric + return float(metric) def check_direct_json_object(result, rules)->float: @@ -238,4 +254,4 @@ def check_direct_json_object(result, rules)->float: expected_value = expected_json.get(key) if expected_value != result.get(key): return 0. - return 1.0 \ No newline at end of file + return 1.0 diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 6f444f8..0cb8513 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -587,7 +587,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: bool """ - if rule["method"].startswith("re"): + if rule["method"].startswith("re"): # re.FLAGs flags: List[str] = rule["method"].split(".")[1:] flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags) flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0)) @@ -600,7 +600,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: , "ge", "gt" }: return getattr(operator, rule["method"])(value, rule["ref"]) - if rule["method"].startswith("approx"): + if rule["method"].startswith("approx"): # approx:THRESHOLD threshold: float = float(rule["method"].split(":")[1]) logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value)) try: diff --git a/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json new file mode 100644 index 0000000..731020a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json @@ -0,0 +1,107 @@ +{ + "id": "74d5859f-ed66-4d3e-aa0e-93d7a592ce41", + "snapshot": "chrome", + "instruction": "Help me to get an initial setup of web extension with help of https://webext.eu . Tag the extension as \"happy-extension v0.0.1\". Leave description as blank for now. A background script and browser action is needed, while other features are not. Place the auto-generated folder under folder \"~/Projects\".", + "source": "authors", + "config": [], + "trajectory": "trajectories/74d5859f-ed66-4d3e-aa0e-93d7a592ce41", + "related_apps": [ + "chrome", + "os" + ], + "evaluator": { + "func": [ + "check_json", + "diff_text_file", + "diff_text_file", + "diff_text_file", + "diff_text_file" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/manifest.json", + "dest": "manifest.json" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/background_script.js", + "dest": "background_script.js" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/index.html", + "dest": "index.html" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/style.css", + "dest": "style.css" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/script.js", + "dest": "script.js" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": [ + { + "key": ["name"], + "method": "eq", + "ref": "happy-extension" + }, + { + "key": ["version"], + "method": "eq", + "ref": "0.0.1" + }, + { + "key": ["background", "scripts"], + "method": "eq", + "ref": ["background_script.js"] + }, + { + "key": ["browser_action", "default_icon"], + "method": "eq", + "ref": {"64": "icons/icon.png"} + }, + { + "key": ["browser_action", "default_popup"], + "method": "eq", + "ref": "browserAction/index.html" + }, + { + "key": ["browser_action", "default_title"], + "method": "eq", + "ref": "happy-extension" + } + ] + } + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1t5Llhn6seDUXVs-eILu6CjwFEQL9Z5Qm&export=download", + "dest": "background_script.js" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=19fMAsWd6q4ElLdOceJ-otHbxRJA_pc_U&export=download", + "dest": "index.html" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1fwfiRPjdug8uh6z23RFO1JtlGH_L_Hl_&export=download", + "dest": "style.css" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=14YYnhCfRtHQNk8M4fBPaUQeteoFMGBsA&export=download", + "dest": "script.js" + } + ] + } +} diff --git a/quick_evaluate.py b/quick_evaluate.py new file mode 100644 index 0000000..99aafaa --- /dev/null +++ b/quick_evaluate.py @@ -0,0 +1,77 @@ +import datetime +import json +import logging +import os +import sys +import time +import argparse +from desktop_env.envs.desktop_env import DesktopEnv + +# Logger Configs {{{ # +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) + +datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") + +file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8") +debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8") +stdout_handler = logging.StreamHandler(sys.stdout) +sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8") + +file_handler.setLevel(logging.INFO) +debug_handler.setLevel(logging.DEBUG) +stdout_handler.setLevel(logging.INFO) +sdebug_handler.setLevel(logging.DEBUG) + +formatter = logging.Formatter( + fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") +file_handler.setFormatter(formatter) +debug_handler.setFormatter(formatter) +stdout_handler.setFormatter(formatter) +sdebug_handler.setFormatter(formatter) + +stdout_handler.addFilter(logging.Filter("desktopenv")) +sdebug_handler.addFilter(logging.Filter("desktopenv")) + +logger.addHandler(file_handler) +logger.addHandler(debug_handler) +logger.addHandler(stdout_handler) +logger.addHandler(sdebug_handler) +# }}} Logger Configs # + +logger = logging.getLogger("desktopenv.main") + + +def human_agent(): + """ + Runs the Gym environment with human input. + """ + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--path', type=str, required=True, help="Path to the virtual machine .vmx file.") + parser.add_argument('-s', '--snapshot', type=str, help="Name of the snapshot to restore.") + parser.add_argument('-e', '--example', type=str, help="Path to the example json file.") + args = parser.parse_args(sys.argv[1:]) + + example_path = args.example if args.example is not None and os.path.exists(args.example) else \ + 'evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json' + with open(example_path, "r") as f: + example = json.load(f) + # change to your customized snapshot + if args.snapshot is not None: example["snapshot"] = args.snapshot + + assert os.path.exists(args.path), "The specified path to the .vmx file does not exist." + env = DesktopEnv( + path_to_vm=args.path, + action_space="computer_13", + task_config=example + ) + + result = env.evaluate() + logger.info("Result: %.2f", result) + + # env.close() + logger.info("Environment closed.") + + +if __name__ == "__main__": + human_agent() From f95458dfd33a12111e1dd452b620adeb3022dec7 Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 12:07:26 +0800 Subject: [PATCH 2/5] ver Feb29th fixed a bug in load_charts --- desktop_env/evaluators/metrics/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 0cb8513..4515cd5 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -124,10 +124,14 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An value_str: str = ser.val.numRef.f elif hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f"): value_str: str = ser.val.strRef.f + else: + value_str: str = "" if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f"): categ_str: str = ser.cat.numRef.f elif hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f"): categ_str: str = ser.cat.strRef.f + else: + categ_str: str = "" series.append("{:},{:}".format(value_str, categ_str)) series: str = ";".join(series) From 6fef1cfb8fd957c42fd852ab22322174da618afc Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 20:05:03 +0800 Subject: [PATCH 3/5] ver Feb29thv2 updated a task config --- .../sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json index ad9c17f..4c2e0a1 100644 --- a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json +++ b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json @@ -1,7 +1,7 @@ { "id": "30e3e107-1cfb-46ee-a755-2cd080d7ba6a", "snapshot": "libreoffice_calc", - "instruction": "Please create a new sheet. Merge cells A1:C1 in the new sheet and write \"Demographic Profile\" with blue fill and bold white text. Then I want to create three pivot tables to show the percentage of Sex, Civil Status , and Highest Educational Attainment.", + "instruction": "Please create a new sheet. Merge cells A1:C1 in the new sheet and write \"Demographic Profile\" with blue (#0000ff) fill and bold white text. Then I want to create three pivot tables to show the percentage of Sex, Civil Status , and Highest Educational Attainment. They should be stacked one by one in the new sheet, each separated with a blank line.", "source": "SheetCopilot@9", "config": [ { From 7566555eb32d9d230ca9221048050c25bffed1e1 Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 29 Feb 2024 22:56:40 +0800 Subject: [PATCH 4/5] ver Feb29thv3 updated evaluation for a sheetcopilot task --- .../30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json index 4c2e0a1..8536295 100644 --- a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json +++ b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json @@ -71,11 +71,6 @@ }, "options": { "rules": [ - { - "type": "sheet_data", - "sheet_idx0": "RNSheet2", - "sheet_idx1": "ENSheet2" - }, { "type": "pivot_table", "sheet_idx0": "RNSheet2", @@ -103,6 +98,10 @@ "font_bold": { "method": "eq", "ref": true + }, + "value": { + "method": "eq", + "ref": "Demographic Profile" } } } From 58e21ab87171ff1595c5ecf47d9c7200aed020e7 Mon Sep 17 00:00:00 2001 From: David Chang Date: Fri, 1 Mar 2024 13:35:52 +0800 Subject: [PATCH 5/5] ver Mar1st the multi_app task proposed by tao --- desktop_env/evaluators/metrics/table.py | 1 - .../7e287123-70ca-47b9-8521-47db09b69b14.json | 96 +++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 24e098e..f593524 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -122,7 +122,6 @@ def compare_table(result: str, expected: str = None, **options) -> float: worksheetr_names: List[str] = pdworkbookr.sheet_names if expected is not None: - xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected) pdworkbooke = pd.ExcelFile(expected) worksheete_names: List[str] = pdworkbooke.sheet_names diff --git a/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json b/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json new file mode 100644 index 0000000..a398bd4 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json @@ -0,0 +1,96 @@ +{ + "id": "7e287123-70ca-47b9-8521-47db09b69b14", + "snapshot": "libreoffice_calc", + "instruction": "I am an assistant professor of CS at HKU, I want to apply for the General Research Fund next year, I need to get some insights, so I need you to help me to organise the data. First please help me to organise the pass rate of the GRF applications of the CS departments of each school for the past five years in percentage form with 2 decimal digits in a table, which I can use subsequently. Set the headers as \"Year\", \"#Applied\", \"#Supported\", and \"Success Rate\". The materials are saved under Documents/Fundings. And please save the result table as \"GRF-p5y.xlsx\" on my desktop.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Fundings/ecs", "/home/user/Documents/Fundings/grf"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Fundings/ecs/ecs15.pdf", "url": "https://drive.google.com/uc?id=1FTiT3mLlkehe2yWVdSMWr1w4ltLtQZUy&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs16.pdf", "url": "https://drive.google.com/uc?id=1DMzZyhDey3lDsQ7fcPiJm9AYGECZms3q&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs17.pdf", "url": "https://drive.google.com/uc?id=1TgAWk7FiV8fNrG2L3-Eu7BDccqtXebXY&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs23.pdf", "url": "https://drive.google.com/uc?id=11DVxH4eRjECUxZNbUhYNJhgD0Y5WoN8r&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs22.pdf", "url": "https://drive.google.com/uc?id=17IgyJADA65F40kH79S90QgEzPa7IERXx&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs21.pdf", "url": "https://drive.google.com/uc?id=1kB4eFfLisPXKOirGUHbbcOyf73t7MVqL&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs20.pdf", "url": "https://drive.google.com/uc?id=179j9tD1xRSgd9COM7rzErO6FLYO2sc_a&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs19.pdf", "url": "https://drive.google.com/uc?id=1ogZUTfKjvQhg58GXtVUxe1U8VvHU-3ap&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs18.pdf", "url": "https://drive.google.com/uc?id=1MfuvRhAnhMEMbxn5js2ffWqMHJx2fsd2&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/customer-information-sheet-for-inward-payments-to-hong-kong.pdf", "url": "https://drive.google.com/uc?id=1s-H3an7HLBM9ku6d6Hcdj1qkSwKAHngU&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf15.pdf", "url": "https://drive.google.com/uc?id=1rRQXo9XHnCVTG8XqNAv0SJwPTW36MMbm&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf16.pdf", "url": "https://drive.google.com/uc?id=18ljRqkdyXEZ464E0dpKjaEa2NFexyw3I&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf17.pdf", "url": "https://drive.google.com/uc?id=1VrqOnyhpOkMpyIJ6YMrAhixpahjYonOd&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf18.pdf", "url": "https://drive.google.com/uc?id=182CLDUr372-jpAiY4YvSbGNXF9TsWxzA&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf19.pdf", "url": "https://drive.google.com/uc?id=1YkJtjlklKN0NmLiI2Hi4f_dKtTm5SPxT&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf20.pdf", "url": "https://drive.google.com/uc?id=1a7Uc7VCMlEX6fy-5oqE6i1YLitBe7gaf&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf21.pdf", "url": "https://drive.google.com/uc?id=1s8km4Wle4lc5PkbUQfivBFK0IJQgxMiB&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf22.pdf", "url": "https://drive.google.com/uc?id=1HTEb1uK7LNvbVyeXgO8WemCPPgiASKiy&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf23.pdf", "url": "https://drive.google.com/uc?id=1XM-jZlfu_i4waDZHb8Z6Vr5b3LgULTtP&export=download"} + ] + } + } + ], + "trajectory": "trajectories/7e287123-70ca-47b9-8521-47db09b69b14", + "related_apps": [ + "libreoffice_calc", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "libreoffice", + "--convert-to", + "csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1", + "--outdir", + "/home/user/Desktop", + "/home/user/Desktop/GRF-p5y.xlsx" + ] + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/GRF-p5y.xlsx", + "/home/user/Desktop/GRF-p5y-Sheet1.csv" + ], + "dest": [ + "GRF-p5y.xlsx", + "GRF-p5y-Sheet1.csv" + ], + "multi": true + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1fDM4Y-WuFCnfksPLgynj-WSmzbqn2TcV&export=download", + "https://drive.google.com/uc?id=1waThupubGOJop0FU0b0yhT6QnjNYkLGy&export=download" + ], + "dest": [ + "GRF-p5y-gt.xlsx", + "GRF-p5y-gt-Sheet1.csv" + ], + "multi": true + }, + "options": { + "rules": [ + { + "type": "sheet_print", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1" + } + ] + } + } +}