diff --git a/branch-config/filelist b/branch-config/filelist index b386813..aec119b 100644 --- a/branch-config/filelist +++ b/branch-config/filelist @@ -12,3 +12,4 @@ experiment_screenshot_seeact.py experiment_screenshot_som.py quick_compare_table.py +quick_evaluate.py diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 5a7025c..ed0dd86 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -57,7 +57,8 @@ from .general import ( is_in_list, fuzzy_match, check_include_exclude, - check_direct_json_object + check_direct_json_object, + diff_text_file ) from .gimp import ( check_brightness_decrease_and_structure_sim, diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 550c54b..4458a69 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -12,6 +12,7 @@ import lxml.etree from lxml.cssselect import CSSSelector from lxml.etree import _Element from rapidfuzz import fuzz +import difflib from .utils import _match_record, _match_value_to_rule @@ -45,7 +46,15 @@ def is_in_list(result, rules) -> float: else: return 0. +def diff_text_file(result: str, expect: str) -> float: + if result is None: + return 0. + with open(result) as f: + result_lines: List[str] = f.read().splitlines() + with open(expect) as f: + expected_lines: List[str] = f.read().splitlines() + return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio() def fuzzy_match(result, rules) -> float: expect = rules["expected"] @@ -212,14 +221,21 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str for r in expect_rules: value = result for k in r["key"]: - value = value[k] + try: + value = value[k] + except KeyError: + return 0. metric = metric and _match_value_to_rule(value, r) for r in unexpect_rules: value = result for k in r["key"]: - value = value[k] + try: + value = value[k] + except KeyError: + value = None + break metric = metric and not _match_value_to_rule(value, r) - return metric + return float(metric) def check_direct_json_object(result, rules)->float: @@ -238,4 +254,4 @@ def check_direct_json_object(result, rules)->float: expected_value = expected_json.get(key) if expected_value != result.get(key): return 0. - return 1.0 \ No newline at end of file + return 1.0 diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 6f444f8..0cb8513 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -587,7 +587,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: bool """ - if rule["method"].startswith("re"): + if rule["method"].startswith("re"): # re.FLAGs flags: List[str] = rule["method"].split(".")[1:] flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags) flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0)) @@ -600,7 +600,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: , "ge", "gt" }: return getattr(operator, rule["method"])(value, rule["ref"]) - if rule["method"].startswith("approx"): + if rule["method"].startswith("approx"): # approx:THRESHOLD threshold: float = float(rule["method"].split(":")[1]) logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value)) try: diff --git a/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json new file mode 100644 index 0000000..731020a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json @@ -0,0 +1,107 @@ +{ + "id": "74d5859f-ed66-4d3e-aa0e-93d7a592ce41", + "snapshot": "chrome", + "instruction": "Help me to get an initial setup of web extension with help of https://webext.eu . Tag the extension as \"happy-extension v0.0.1\". Leave description as blank for now. A background script and browser action is needed, while other features are not. Place the auto-generated folder under folder \"~/Projects\".", + "source": "authors", + "config": [], + "trajectory": "trajectories/74d5859f-ed66-4d3e-aa0e-93d7a592ce41", + "related_apps": [ + "chrome", + "os" + ], + "evaluator": { + "func": [ + "check_json", + "diff_text_file", + "diff_text_file", + "diff_text_file", + "diff_text_file" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/manifest.json", + "dest": "manifest.json" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/background_script.js", + "dest": "background_script.js" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/index.html", + "dest": "index.html" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/style.css", + "dest": "style.css" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/script.js", + "dest": "script.js" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": [ + { + "key": ["name"], + "method": "eq", + "ref": "happy-extension" + }, + { + "key": ["version"], + "method": "eq", + "ref": "0.0.1" + }, + { + "key": ["background", "scripts"], + "method": "eq", + "ref": ["background_script.js"] + }, + { + "key": ["browser_action", "default_icon"], + "method": "eq", + "ref": {"64": "icons/icon.png"} + }, + { + "key": ["browser_action", "default_popup"], + "method": "eq", + "ref": "browserAction/index.html" + }, + { + "key": ["browser_action", "default_title"], + "method": "eq", + "ref": "happy-extension" + } + ] + } + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1t5Llhn6seDUXVs-eILu6CjwFEQL9Z5Qm&export=download", + "dest": "background_script.js" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=19fMAsWd6q4ElLdOceJ-otHbxRJA_pc_U&export=download", + "dest": "index.html" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1fwfiRPjdug8uh6z23RFO1JtlGH_L_Hl_&export=download", + "dest": "style.css" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=14YYnhCfRtHQNk8M4fBPaUQeteoFMGBsA&export=download", + "dest": "script.js" + } + ] + } +} diff --git a/quick_evaluate.py b/quick_evaluate.py new file mode 100644 index 0000000..99aafaa --- /dev/null +++ b/quick_evaluate.py @@ -0,0 +1,77 @@ +import datetime +import json +import logging +import os +import sys +import time +import argparse +from desktop_env.envs.desktop_env import DesktopEnv + +# Logger Configs {{{ # +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) + +datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") + +file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8") +debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8") +stdout_handler = logging.StreamHandler(sys.stdout) +sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8") + +file_handler.setLevel(logging.INFO) +debug_handler.setLevel(logging.DEBUG) +stdout_handler.setLevel(logging.INFO) +sdebug_handler.setLevel(logging.DEBUG) + +formatter = logging.Formatter( + fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") +file_handler.setFormatter(formatter) +debug_handler.setFormatter(formatter) +stdout_handler.setFormatter(formatter) +sdebug_handler.setFormatter(formatter) + +stdout_handler.addFilter(logging.Filter("desktopenv")) +sdebug_handler.addFilter(logging.Filter("desktopenv")) + +logger.addHandler(file_handler) +logger.addHandler(debug_handler) +logger.addHandler(stdout_handler) +logger.addHandler(sdebug_handler) +# }}} Logger Configs # + +logger = logging.getLogger("desktopenv.main") + + +def human_agent(): + """ + Runs the Gym environment with human input. + """ + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--path', type=str, required=True, help="Path to the virtual machine .vmx file.") + parser.add_argument('-s', '--snapshot', type=str, help="Name of the snapshot to restore.") + parser.add_argument('-e', '--example', type=str, help="Path to the example json file.") + args = parser.parse_args(sys.argv[1:]) + + example_path = args.example if args.example is not None and os.path.exists(args.example) else \ + 'evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json' + with open(example_path, "r") as f: + example = json.load(f) + # change to your customized snapshot + if args.snapshot is not None: example["snapshot"] = args.snapshot + + assert os.path.exists(args.path), "The specified path to the .vmx file does not exist." + env = DesktopEnv( + path_to_vm=args.path, + action_space="computer_13", + task_config=example + ) + + result = env.evaluate() + logger.info("Result: %.2f", result) + + # env.close() + logger.info("Environment closed.") + + +if __name__ == "__main__": + human_agent()