diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 1494872..200b56f 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -11,7 +11,7 @@ from .table import compare_table from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \ compare_videos from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions -from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3 +from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3, check_json from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed from .impress import check_slide_numbers_color, compare_pptx_files, check_for_two_lines diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 5b5492e..b0433c3 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,4 +1,5 @@ import csv +import json import functools import operator import re @@ -11,7 +12,7 @@ from lxml.cssselect import CSSSelector from lxml.etree import _Element from rapidfuzz import fuzz -from .utils import _match_record +from .utils import _match_record, _match_value_to_rule import sqlite3 @@ -153,3 +154,44 @@ def run_sqlite3(result: str, rules: Dict[str, Any]) -> float: connection: sqlite3.Connection = sqlite3.connect(result) cursor: sqlite3.Cursor = connection.execute(rules["sql"]) return float(cursor.fetchone()[0] or 0) + +def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str]]]]) -> float: + """ + Args: + result (str): path to json file + rules (Dict[str, List[Dict[str, Union[List[str], str]]]]): dict like + { + "expect": [ + { + "key": list of str + "method": str + "ref": something + } + ], + "unexpect": bool: - # function _match_pref {{{ # - if rule["method"].startswith("re"): - flags: List[str] = rule["method"].split(".")[1:] - flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags) - flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0)) - logger.debug("REFLAG: %s", repr(flag)) - - match_: Optional[Match[str]] = re.search(rule["ref"], value, flag) - return match_ is not None - if rule["method"] in { "eq", "ne" - , "le", "lt" - , "ge", "gt" - }: - return getattr(operator, rule["method"])(value, rule["ref"]) - raise NotImplementedError() - # }}} function _match_pref # - _pref_pattern: Pattern[str] = re.compile(r'^user_pref\("(?P(?:[^"]|\\")+)\", (?P.+)\);$'); def check_thunderbird_prefs(result: str, rule: Dict[str, Dict[str, Dict[str, Any]]]): """ @@ -148,9 +129,9 @@ def check_thunderbird_filter(result: str, rules: Dict[str, List[Dict[str, str]]] return float(all(expect_metrics) and unexpect_metric) if __name__ == "__main__": - import lxml.etree - from lxml.cssselect import CSSSelector - from lxml.etree import _Element + #import lxml.etree + #from lxml.cssselect import CSSSelector + #from lxml.etree import _Element #xml = "../../任务数据/Thunderbird/vertical-card-view.xml" #xml = "../../任务数据/Thunderbird/vertical-table-view.xml" diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 2ab945c..4e5e990 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -1,8 +1,11 @@ import logging import zipfile -from typing import Any -from typing import Dict, List, Set +from typing import Any, TypeVar, Union, Iterable, Optional +from typing import Dict, List, Set, Match from urllib.parse import urlparse, urlunparse +import re +import functools +import operator import lxml.cssselect import lxml.etree @@ -13,6 +16,8 @@ from openpyxl import Workbook from openpyxl.chart._chart import ChartBase from openpyxl.worksheet.worksheet import Worksheet +V = TypeVar("Value") + logger = logging.getLogger("desktopenv.metrics.utils") _xlsx_namespaces = [("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") @@ -133,6 +138,34 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]: def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool: return all(k in item and item[k] == val for k, val in pattern.items()) +def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: + """ + Args: + value (V): value to match + rule (Dict[str, Union[str, V]]): rule dict like + { + "method": str + "ref": V as ref value + } + + Returns: + bool + """ + + if rule["method"].startswith("re"): + flags: List[str] = rule["method"].split(".")[1:] + flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags) + flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0)) + logger.debug("REFLAG: %s", repr(flag)) + + match_: Optional[Match[str]] = re.search(rule["ref"], value, flag) + return match_ is not None + if rule["method"] in { "eq", "ne" + , "le", "lt" + , "ge", "gt" + }: + return getattr(operator, rule["method"])(value, rule["ref"]) + raise NotImplementedError() def are_lists_equal(list1, list2, comparison_func): # First check if both lists have the same length diff --git a/evaluation_examples/examples/thunderbird/480bcfea-d68f-4aaa-a0a9-2589ef319381.json b/evaluation_examples/examples/thunderbird/480bcfea-d68f-4aaa-a0a9-2589ef319381.json new file mode 100644 index 0000000..7e95acb --- /dev/null +++ b/evaluation_examples/examples/thunderbird/480bcfea-d68f-4aaa-a0a9-2589ef319381.json @@ -0,0 +1,80 @@ +{ + "id": "480bcfea-d68f-4aaa-a0a9-2589ef319381", + "snapshot": "thunderbird", + "instruction": "I've got a bunch of email accounts in Thunderbird, and it's a hassle to check them one by one. Can you show me how to set up a unified inbox so I can see all my emails in one place?", + "source": "https://www.reddit.com/r/Thunderbird/comments/182dg5p/unified_inbox_howto/", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xzv", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "/usr/bin/thunderbird" + ] + } + } + ], + "trajectory": "trajectories/480bcfea-d68f-4aaa-a0a9-2589ef319381", + "related_apps": [ + "thunderbird" + ], + "evaluator": { + "postconfig": [ + { + "type": "command", + "parameters": { + "command": ["wmctrl", "-xFc", "Mail.thunderbird"], + "until": { + "returncode": 1 + } + } + } + ], + "result": { + "type": "vm_file", + "path": "/home/user/.thunderbird/t5q2a5hp.default-release/xulstore.json", + "dest": "xulstore.json" + }, + "func": "check_json", + "expected": { + "type": "rule", + "rules": { + "expect": [ + { + "key": [ + "chrome://messenger/content/messenger.xhtml", + "folderTree", + "mode" + ], + "method": "re", + "ref": "\\bsmart\\b" + } + ] + } + } + } +}