diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 5a7025c..ed0dd86 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -57,7 +57,8 @@ from .general import ( is_in_list, fuzzy_match, check_include_exclude, - check_direct_json_object + check_direct_json_object, + diff_text_file ) from .gimp import ( check_brightness_decrease_and_structure_sim, diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 550c54b..4458a69 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -12,6 +12,7 @@ import lxml.etree from lxml.cssselect import CSSSelector from lxml.etree import _Element from rapidfuzz import fuzz +import difflib from .utils import _match_record, _match_value_to_rule @@ -45,7 +46,15 @@ def is_in_list(result, rules) -> float: else: return 0. +def diff_text_file(result: str, expect: str) -> float: + if result is None: + return 0. + with open(result) as f: + result_lines: List[str] = f.read().splitlines() + with open(expect) as f: + expected_lines: List[str] = f.read().splitlines() + return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio() def fuzzy_match(result, rules) -> float: expect = rules["expected"] @@ -212,14 +221,21 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str for r in expect_rules: value = result for k in r["key"]: - value = value[k] + try: + value = value[k] + except KeyError: + return 0. metric = metric and _match_value_to_rule(value, r) for r in unexpect_rules: value = result for k in r["key"]: - value = value[k] + try: + value = value[k] + except KeyError: + value = None + break metric = metric and not _match_value_to_rule(value, r) - return metric + return float(metric) def check_direct_json_object(result, rules)->float: @@ -238,4 +254,4 @@ def check_direct_json_object(result, rules)->float: expected_value = expected_json.get(key) if expected_value != result.get(key): return 0. - return 1.0 \ No newline at end of file + return 1.0 diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 24e098e..f593524 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -122,7 +122,6 @@ def compare_table(result: str, expected: str = None, **options) -> float: worksheetr_names: List[str] = pdworkbookr.sheet_names if expected is not None: - xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected) pdworkbooke = pd.ExcelFile(expected) worksheete_names: List[str] = pdworkbooke.sheet_names diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 7036080..4515cd5 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -591,7 +591,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: bool """ - if rule["method"].startswith("re"): + if rule["method"].startswith("re"): # re.FLAGs flags: List[str] = rule["method"].split(".")[1:] flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags) flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0)) @@ -604,7 +604,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: , "ge", "gt" }: return getattr(operator, rule["method"])(value, rule["ref"]) - if rule["method"].startswith("approx"): + if rule["method"].startswith("approx"): # approx:THRESHOLD threshold: float = float(rule["method"].split(":")[1]) logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value)) try: diff --git a/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json new file mode 100644 index 0000000..731020a --- /dev/null +++ b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json @@ -0,0 +1,107 @@ +{ + "id": "74d5859f-ed66-4d3e-aa0e-93d7a592ce41", + "snapshot": "chrome", + "instruction": "Help me to get an initial setup of web extension with help of https://webext.eu . Tag the extension as \"happy-extension v0.0.1\". Leave description as blank for now. A background script and browser action is needed, while other features are not. Place the auto-generated folder under folder \"~/Projects\".", + "source": "authors", + "config": [], + "trajectory": "trajectories/74d5859f-ed66-4d3e-aa0e-93d7a592ce41", + "related_apps": [ + "chrome", + "os" + ], + "evaluator": { + "func": [ + "check_json", + "diff_text_file", + "diff_text_file", + "diff_text_file", + "diff_text_file" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/manifest.json", + "dest": "manifest.json" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/background_script.js", + "dest": "background_script.js" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/index.html", + "dest": "index.html" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/style.css", + "dest": "style.css" + }, + { + "type": "vm_file", + "path": "/home/user/Projects/happy-extension/browserAction/script.js", + "dest": "script.js" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "expect": [ + { + "key": ["name"], + "method": "eq", + "ref": "happy-extension" + }, + { + "key": ["version"], + "method": "eq", + "ref": "0.0.1" + }, + { + "key": ["background", "scripts"], + "method": "eq", + "ref": ["background_script.js"] + }, + { + "key": ["browser_action", "default_icon"], + "method": "eq", + "ref": {"64": "icons/icon.png"} + }, + { + "key": ["browser_action", "default_popup"], + "method": "eq", + "ref": "browserAction/index.html" + }, + { + "key": ["browser_action", "default_title"], + "method": "eq", + "ref": "happy-extension" + } + ] + } + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1t5Llhn6seDUXVs-eILu6CjwFEQL9Z5Qm&export=download", + "dest": "background_script.js" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=19fMAsWd6q4ElLdOceJ-otHbxRJA_pc_U&export=download", + "dest": "index.html" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1fwfiRPjdug8uh6z23RFO1JtlGH_L_Hl_&export=download", + "dest": "style.css" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=14YYnhCfRtHQNk8M4fBPaUQeteoFMGBsA&export=download", + "dest": "script.js" + } + ] + } +} diff --git a/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json b/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json new file mode 100644 index 0000000..a398bd4 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14.json @@ -0,0 +1,96 @@ +{ + "id": "7e287123-70ca-47b9-8521-47db09b69b14", + "snapshot": "libreoffice_calc", + "instruction": "I am an assistant professor of CS at HKU, I want to apply for the General Research Fund next year, I need to get some insights, so I need you to help me to organise the data. First please help me to organise the pass rate of the GRF applications of the CS departments of each school for the past five years in percentage form with 2 decimal digits in a table, which I can use subsequently. Set the headers as \"Year\", \"#Applied\", \"#Supported\", and \"Success Rate\". The materials are saved under Documents/Fundings. And please save the result table as \"GRF-p5y.xlsx\" on my desktop.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": ["mkdir", "-p", "/home/user/Documents/Fundings/ecs", "/home/user/Documents/Fundings/grf"] + } + }, + { + "type": "download", + "parameters": { + "files": [ + {"path": "/home/user/Documents/Fundings/ecs/ecs15.pdf", "url": "https://drive.google.com/uc?id=1FTiT3mLlkehe2yWVdSMWr1w4ltLtQZUy&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs16.pdf", "url": "https://drive.google.com/uc?id=1DMzZyhDey3lDsQ7fcPiJm9AYGECZms3q&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs17.pdf", "url": "https://drive.google.com/uc?id=1TgAWk7FiV8fNrG2L3-Eu7BDccqtXebXY&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs23.pdf", "url": "https://drive.google.com/uc?id=11DVxH4eRjECUxZNbUhYNJhgD0Y5WoN8r&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs22.pdf", "url": "https://drive.google.com/uc?id=17IgyJADA65F40kH79S90QgEzPa7IERXx&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs21.pdf", "url": "https://drive.google.com/uc?id=1kB4eFfLisPXKOirGUHbbcOyf73t7MVqL&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs20.pdf", "url": "https://drive.google.com/uc?id=179j9tD1xRSgd9COM7rzErO6FLYO2sc_a&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs19.pdf", "url": "https://drive.google.com/uc?id=1ogZUTfKjvQhg58GXtVUxe1U8VvHU-3ap&export=download"}, + {"path": "/home/user/Documents/Fundings/ecs/ecs18.pdf", "url": "https://drive.google.com/uc?id=1MfuvRhAnhMEMbxn5js2ffWqMHJx2fsd2&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/customer-information-sheet-for-inward-payments-to-hong-kong.pdf", "url": "https://drive.google.com/uc?id=1s-H3an7HLBM9ku6d6Hcdj1qkSwKAHngU&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf15.pdf", "url": "https://drive.google.com/uc?id=1rRQXo9XHnCVTG8XqNAv0SJwPTW36MMbm&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf16.pdf", "url": "https://drive.google.com/uc?id=18ljRqkdyXEZ464E0dpKjaEa2NFexyw3I&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf17.pdf", "url": "https://drive.google.com/uc?id=1VrqOnyhpOkMpyIJ6YMrAhixpahjYonOd&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf18.pdf", "url": "https://drive.google.com/uc?id=182CLDUr372-jpAiY4YvSbGNXF9TsWxzA&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf19.pdf", "url": "https://drive.google.com/uc?id=1YkJtjlklKN0NmLiI2Hi4f_dKtTm5SPxT&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf20.pdf", "url": "https://drive.google.com/uc?id=1a7Uc7VCMlEX6fy-5oqE6i1YLitBe7gaf&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf21.pdf", "url": "https://drive.google.com/uc?id=1s8km4Wle4lc5PkbUQfivBFK0IJQgxMiB&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf22.pdf", "url": "https://drive.google.com/uc?id=1HTEb1uK7LNvbVyeXgO8WemCPPgiASKiy&export=download"}, + {"path": "/home/user/Documents/Fundings/grf/grf23.pdf", "url": "https://drive.google.com/uc?id=1XM-jZlfu_i4waDZHb8Z6Vr5b3LgULTtP&export=download"} + ] + } + } + ], + "trajectory": "trajectories/7e287123-70ca-47b9-8521-47db09b69b14", + "related_apps": [ + "libreoffice_calc", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "libreoffice", + "--convert-to", + "csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1", + "--outdir", + "/home/user/Desktop", + "/home/user/Desktop/GRF-p5y.xlsx" + ] + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/GRF-p5y.xlsx", + "/home/user/Desktop/GRF-p5y-Sheet1.csv" + ], + "dest": [ + "GRF-p5y.xlsx", + "GRF-p5y-Sheet1.csv" + ], + "multi": true + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.google.com/uc?id=1fDM4Y-WuFCnfksPLgynj-WSmzbqn2TcV&export=download", + "https://drive.google.com/uc?id=1waThupubGOJop0FU0b0yhT6QnjNYkLGy&export=download" + ], + "dest": [ + "GRF-p5y-gt.xlsx", + "GRF-p5y-gt-Sheet1.csv" + ], + "multi": true + }, + "options": { + "rules": [ + { + "type": "sheet_print", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1" + } + ] + } + } +}