diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index a960c92..7704866 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -242,6 +242,9 @@ class SetupController: logger.error("An error occurred while trying to send the request: %s", e) traceback.print_exc() + def _command_setup(self, command: List[str], stdout: str = "", stderr: str = ""): + self._execute_setup(command, stdout, stderr) + def _act_setup(self, action_seq: List[Union[Dict[str, Any], str]]): # TODO raise NotImplementedError() diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index 2c59035..2d77eba 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -1,4 +1,5 @@ from typing import Dict +from typing import Optional import os import requests @@ -27,7 +28,7 @@ def get_cloud_file(env, config: Dict[str, str]) -> str: return _path -def get_vm_file(env, config: Dict[str, str]) -> str: +def get_vm_file(env, config: Dict[str, str]) -> Optional[str]: """ Config: path (str): absolute path on the VM to fetch @@ -37,6 +38,8 @@ def get_vm_file(env, config: Dict[str, str]) -> str: _path = os.path.join(env.cache_dir, config["dest"]) file = env.controller.get_file(config["path"]) + if file is None: + return None with open(_path, "wb") as f: f.write(file) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index e8f313b..1beb03e 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -6,4 +6,4 @@ from .docs import is_first_line_centered, check_file_exists, compare_contains_im from .pdf import check_pdf_pages from .libreoffice import check_libre_locale #from .vlc import is_vlc_playing -from .general import check_csv, check_accessibility_tree +from .general import check_csv, check_accessibility_tree, check_list diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 8ede586..427198c 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -4,13 +4,14 @@ import lxml.etree from lxml.etree import _Element from lxml.cssselect import CSSSelector -from typing import Dict, List +from typing import Dict, List, Pattern from typing import Callable, Any from numbers import Number import operator from rapidfuzz import fuzz import functools +import re def _match_record(pattern: Dict[str, str], item: Dict[str, str]) -> float: return all(k in item and item[k]==val for k, val in pattern.items()) @@ -37,7 +38,33 @@ def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float: for rcd in reader: for i, r in enumerate(rules.get("expect", [])): expect_metrics[i] = expect_metrics[i] or _match_record(r, rcd) - unexpect_metric = unexpect_metric and all(_match_record(r, rcd) for r in rules.get("unexpect", [])) + unexpect_metric = unexpect_metric and not any(_match_record(r, rcd) for r in rules.get("unexpect", [])) + return float(all(expect_metrics) and unexpect_metric) + +def check_list(result: str, rules: Dict[str, List[str]]) -> float: + """ + Args: + result (str): path to list file + rules (Dict[str, List[str]]): dict like + { + "expect": list of str as regexes + "unexpect": list of str as regexes + } + + Returns: + float + """ + + expect_patterns: List[Pattern[str]] = [re.compile(ptt) for ptt in rules.get("expect", [])] + unexpect_patterns: List[Pattern[str]] = [re.compile(ptt) for ptt in rules.get("unexpect", [])] + + expect_metrics = [False] * len(expect_patterns) + unexpect_metric = True + with open(result) as f: + for l in f: + for i, r in enumerate(expect_patterns): + expect_metrics[i] = expect_metrics[i] or (r.search(l) is not None) + unexpect_metric = unexpect_metric and all(r.search(l) is None for r in unexpect_patterns) return float(all(expect_metrics) and unexpect_metric) _accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org" @@ -93,3 +120,6 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float: match_score = 1. return float(match_score) + +#def check_existence(result: str, *args) -> float: + #return 1. - (result is None) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 34ef0b0..25b55f3 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -31,6 +31,9 @@ def compare_table(actual: str, expected: str, **options) -> float: float: the score """ + if actual is None: + return 0. + df1 = pd.read_excel(expected) df2 = pd.read_excel(actual) metric: bool = df1.equals(df2) @@ -71,6 +74,9 @@ def compare_table(actual: str, expected: str, **options) -> float: return float(metric) def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float: + if result is None: + return 0. + # workbook: Workbook = openpyxl.load_workbook(filename=result) workbook = pd.ExcelFile(result) worksheet_names: List[str] = workbook.sheet_names @@ -109,10 +115,16 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float: return float(passes) def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float: + if result is None: + return 0. + worksheet: Worksheet = openpyxl.load_workbook(filename=result).active return float(worksheet.freeze_panes == rules["position"]) def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float: + if result is None: + return 0. + worksheet = openpyxl.load_workbook(filename=result).active zoom_scale: Number = worksheet.sheet_view.zoomScale or 100. return float( getattr(operator, rules["relation"])( zoom_scale diff --git a/evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json b/evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json index ea4b832..969b193 100644 --- a/evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json +++ b/evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json @@ -1,12 +1,70 @@ { "id": "06fe7178-4491-4589-810f-2e2bc9502122", "snapshot": "thunderbird", - "instruction": "Could you help me back up all the email files in my profile to PROFILE_DIR?", + "instruction": "Could you help me back up all the email files in my profile to ~/email.bak? Please save them in eml format.", "source": "https://www.quora.com/How-do-I-backup-email-files-in-Mozilla-Thunderbird", - "config": [], - "trajectory": "trajectories/", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "-xzv", + "--recursive-unlink", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "/usr/bin/thunderbird" + ] + } + } + ], + "trajectory": "trajectories/06fe7178-4491-4589-810f-2e2bc9502122", "related_apps": [ "thunderbird" ], - "evaluator": "evaluation_dir" + "evaluator": { + "postconfig": [ + { + "type": "command", + "parameters": { + "command": ["ls", "-R", "/home/user/emails.bak"], + "stdout": "emails.bak.ls" + } + } + ], + "func": "check_list", + "result": { + "type": "cache_file", + "path": "emails.bak.ls" + }, + "expected": { + "type": "rule", + "rules": { + "expect": [ + "歡迎使用新的 Outlook.com 帳戶.*\\.eml", + "A Test E-mail.*\\.eml" + ] + } + } + } } diff --git a/main.py b/main.py index b89cf79..b128766 100644 --- a/main.py +++ b/main.py @@ -44,9 +44,9 @@ def human_agent(): Runs the Gym environment with human input. """ - with open("evaluation_examples/examples/thunderbird/12086550-11c0-466b-b367-1d9e75b3910e.json", "r") as f: + with open("evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json", "r") as f: example = json.load(f) - example["snapshot"] = "Snapshot 9" + example["snapshot"] = "Snapshot 11" env = DesktopEnv( path_to_vm="../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx" , action_space="computer_13" diff --git a/requirements.txt b/requirements.txt index d97aedd..4898f1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ python-docx python-pptx pypdf PyGetWindow +rapidfuzz