diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index f62a303..659db62 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -20,6 +20,7 @@ class SetupController: def __init__(self, vm_ip: str, cache_dir: str): self.vm_ip: str = vm_ip self.http_server: str = f"http://{vm_ip}:5000" + self.http_server_setup_root: str = f"http://{vm_ip}:5000/setup" self.cache_dir: str = cache_dir def reset_cache_dir(self, cache_dir: str): diff --git a/desktop_env/evaluators/getters/general.py b/desktop_env/evaluators/getters/general.py index 22a20c7..1c1ce4c 100644 --- a/desktop_env/evaluators/getters/general.py +++ b/desktop_env/evaluators/getters/general.py @@ -1,7 +1,5 @@ -from typing import Dict - import os -import requests +from typing import Dict def get_string(env, config: Dict[str, str]) -> str: @@ -12,12 +10,13 @@ def get_string(env, config: Dict[str, str]) -> str: return config["string"] + def get_command_line(env, config: Dict[str, str]) -> str: """ Config: string (str) """ - + f = os.popen(config["command"]) - - return f.read() \ No newline at end of file + + return f.read() diff --git a/desktop_env/evaluators/getters/misc.py b/desktop_env/evaluators/getters/misc.py index f4c7bf2..b6b933a 100644 --- a/desktop_env/evaluators/getters/misc.py +++ b/desktop_env/evaluators/getters/misc.py @@ -1,6 +1,5 @@ import logging from typing import TypeVar -#from typing import Dict, List logger = logging.getLogger("desktopenv.getters.misc") @@ -13,6 +12,7 @@ def get_rule(env, config: R) -> R: """ return config["rules"] + def get_accessibility_tree(env, *args) -> str: accessibility_tree: str = env.controller.get_accessibility_tree() logger.debug("AT@eval: %s", accessibility_tree) diff --git a/desktop_env/evaluators/getters/replay.py b/desktop_env/evaluators/getters/replay.py index a0d1d62..67ad0d3 100644 --- a/desktop_env/evaluators/getters/replay.py +++ b/desktop_env/evaluators/getters/replay.py @@ -1,7 +1,7 @@ from typing import List, Dict, Any -def get_replay(env, trajectory: List[Dict[str, Any]]) -> None: +def get_replay(env, trajectory: List[Dict[str, Any]]) -> None: def parse(action): if action["type"] == "hotkey": keys = "', '".join(action["param"]) @@ -16,4 +16,4 @@ def get_replay(env, trajectory: List[Dict[str, Any]]) -> None: return f"pyautogui.press('{key}')" for action in trajectory: - env.controller.execute_python_command(parse(action)) \ No newline at end of file + env.controller.execute_python_command(parse(action)) diff --git a/desktop_env/evaluators/getters/vscode.py b/desktop_env/evaluators/getters/vscode.py index 00d280a..5072ec6 100644 --- a/desktop_env/evaluators/getters/vscode.py +++ b/desktop_env/evaluators/getters/vscode.py @@ -1,17 +1,18 @@ -from typing import Dict from typing import Any -from replay import get_replay -from file import get_vm_file +from typing import Dict + +from .file import get_vm_file +from .replay import get_replay + def get_vscode_config(env, config: Dict[str, Any]) -> str: - trajectory = [{"type": "hotkey", "param": ["command", "shift", "p"]}, - {"type": "typewrite", "param": "OpenProject"}, - {"type": "press", "param": "enter"}] + {"type": "typewrite", "param": "OpenProject"}, + {"type": "press", "param": "enter"}] get_replay(env, trajectory) return get_vm_file(env, { - "path": config["path"], - "dest": config["dest"] - }) \ No newline at end of file + "path": config["path"], + "dest": config["dest"] + }) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 44c55e9..67b8925 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -13,4 +13,4 @@ from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, co from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3 from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter - +from .vscode import compare_text_file, compare_config, compare_answer diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 48d2c03..9f2d940 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -1,12 +1,14 @@ -import xml.etree.ElementTree as ET +import logging import os +import xml.etree.ElementTree as ET from typing import List, Dict, Any + from docx import Document from docx.enum.text import WD_PARAGRAPH_ALIGNMENT -import logging logger = logging.getLogger("desktopenv.metric.docs") + def find_default_font(config_file_path, rules): """Find the default font in LibreOffice Writer.""" default_font = None diff --git a/desktop_env/evaluators/metrics/libreoffice.py b/desktop_env/evaluators/metrics/libreoffice.py index 4ca07de..441d932 100644 --- a/desktop_env/evaluators/metrics/libreoffice.py +++ b/desktop_env/evaluators/metrics/libreoffice.py @@ -1,37 +1,38 @@ -import lxml.cssselect -from lxml.etree import _Element as Element -import lxml.etree import fnmatch - from typing import Dict, List +import lxml.cssselect +import lxml.etree +from lxml.etree import _Element as Element + _libconf_namespaces = [("oor", "http://openoffice.org/2001/registry")] _libconf_ns_mapping = dict(_libconf_namespaces) -_setup_locale_selector = lxml.cssselect.CSSSelector( 'item[oor|path$=L10N]>prop[oor|name=ooSetupSystemLocale]>value' - , namespaces=_libconf_ns_mapping - ) -_locale_selector = lxml.cssselect.CSSSelector( 'item[oor|path$=L10N]>prop[oor|name=ooLocale]>value' - , namespaces=_libconf_ns_mapping - ) +_setup_locale_selector = lxml.cssselect.CSSSelector('item[oor|path$=L10N]>prop[oor|name=ooSetupSystemLocale]>value', + namespaces=_libconf_ns_mapping) +_locale_selector = lxml.cssselect.CSSSelector('item[oor|path$=L10N]>prop[oor|name=ooLocale]>value', + namespaces=_libconf_ns_mapping) + + def check_libre_locale(config_file: str, rules: Dict[str, List[str]]) -> float: config: Element = lxml.etree.parse(config_file).getroot() setup_locale_setting: List[Element] = _setup_locale_selector(config) locale_setting: List[Element] = _locale_selector(config) - setup_locale_setting: str = setup_locale_setting[0].text\ - if len(setup_locale_setting)>0\ - else locale_setting[0].text + setup_locale_setting: str = setup_locale_setting[0].text \ + if len(setup_locale_setting) > 0 \ + else locale_setting[0].text - return float( any( fnmatch.fnmatchcase(setup_locale_setting, ptn)\ - for ptn in rules["locale_set"] + return float(any(fnmatch.fnmatchcase(setup_locale_setting, ptn) \ + for ptn in rules["locale_set"] ) - ) + ) + if __name__ == "__main__": path1 = "../../任务数据/LibreOffice Calc/registrymodifications.ru.xcu" - print( check_libre_locale( path1, { "locale_set": [ "ru-*", "de-*", "fr-*" - , "pt-*", "es-*", "it-*" - ] - } + print(check_libre_locale(path1, {"locale_set": ["ru-*", "de-*", "fr-*" + , "pt-*", "es-*", "it-*" + ] + } ) - ) + ) diff --git a/desktop_env/evaluators/metrics/pdf.py b/desktop_env/evaluators/metrics/pdf.py index 51c79f3..d607733 100644 --- a/desktop_env/evaluators/metrics/pdf.py +++ b/desktop_env/evaluators/metrics/pdf.py @@ -1,13 +1,11 @@ -from pypdf import PdfReader import operator - -from typing import Dict from typing import Any +from typing import Dict + +from pypdf import PdfReader + def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float: reader = PdfReader(pdf_file) nb_pages: int = len(reader.pages) - return float( getattr(operator, rules["relation"])( nb_pages - , rules["ref_value"] - ) - ) + return float(getattr(operator, rules["relation"])(nb_pages, rules["ref_value"])) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 25b55f3..03daef5 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -1,18 +1,19 @@ -import pandas as pd +import logging +import operator +from numbers import Number +from typing import Any, Union +from typing import Dict, List + import openpyxl +import pandas as pd from openpyxl import Workbook from openpyxl.worksheet.worksheet import Worksheet from .utils import load_charts, load_sparklines -import operator -from typing import Dict, List -from typing import Any, Union -from numbers import Number - -import logging logger = logging.getLogger("desktopenv.metric.table") + def compare_table(actual: str, expected: str, **options) -> float: """ Args: @@ -44,28 +45,28 @@ def compare_table(actual: str, expected: str, **options) -> float: workbook1: Workbook = openpyxl.load_workbook(actual) workbook2: Workbook = openpyxl.load_workbook(expected) - if ftr=="sparkline": + if ftr == "sparkline": sp1 = load_sparklines(actual) sp2 = load_sparklines(expected) new_metric: bool = sp1 == sp2 logger.debug("Sparkline Metric: {:}".format(new_metric)) - elif ftr=="chart": + elif ftr == "chart": charts1 = load_charts(workbook1, **options) charts2 = load_charts(workbook2, **options) new_metric: bool = charts1 == charts2 logger.debug("Chart Metric: {:}".format(new_metric)) - elif ftr=="number_format": - number_formats1: List[str] = [ c.number_format.lower()\ - for col in workbook1.active.iter_cols()\ - for c in col\ - if c.data_type=="n" - ] - number_formats2: List[str] = [ c.number_format.lower()\ - for col in workbook2.active.iter_cols()\ - for c in col\ - if c.data_type=="n" - ] - new_metric: bool = number_formats1==number_formats2 + elif ftr == "number_format": + number_formats1: List[str] = [c.number_format.lower() \ + for col in workbook1.active.iter_cols() \ + for c in col \ + if c.data_type == "n" + ] + number_formats2: List[str] = [c.number_format.lower() \ + for col in workbook2.active.iter_cols() \ + for c in col \ + if c.data_type == "n" + ] + new_metric: bool = number_formats1 == number_formats2 logger.debug("Number Format Metric: {:}".format(new_metric)) else: raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr)) @@ -73,6 +74,7 @@ def compare_table(actual: str, expected: str, **options) -> float: return float(metric) + def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float: if result is None: return 0. @@ -114,6 +116,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float: return float(passes) + def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float: if result is None: return 0. @@ -121,16 +124,18 @@ def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float: worksheet: Worksheet = openpyxl.load_workbook(filename=result).active return float(worksheet.freeze_panes == rules["position"]) + def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float: if result is None: return 0. worksheet = openpyxl.load_workbook(filename=result).active zoom_scale: Number = worksheet.sheet_view.zoomScale or 100. - return float( getattr(operator, rules["relation"])( zoom_scale + return float(getattr(operator, rules["relation"])(zoom_scale , rules["ref_value"] ) - ) + ) + if __name__ == '__main__': # path1 = "" @@ -168,51 +173,51 @@ if __name__ == '__main__': # ] # print(check_sheet_list(path1, rule)) - #path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" - #path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" - #print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"])) + # path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" + # path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx" + # print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"])) - #path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx" - #path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx" - #path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx" - #path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx" - #workbook1: Workbook = openpyxl.load_workbook(filename=path1) - #worksheet1: Worksheet = workbook1.active - #import itertools - #for col, r in itertools.product( ['A', 'B'] - #, range(1, 20) - #): - #position: str = "{:}{:d}".format(col, r) - #print(worksheet1[position]) - #print(worksheet1[position].value) - #print(worksheet1[position].number_format) - #workbook2: Workbook = openpyxl.load_workbook(filename=path2) - #worksheet2: Worksheet = workbook2.active - #for col, r in itertools.product( ['A', 'B'] - #, range(1, 20) - #): - #position: str = "{:}{:d}".format(col, r) - #print(worksheet2[position]) - #print(worksheet2[position].value) - #print(worksheet2[position].number_format) - #print(compare_table(path1, path2, features=["number_format"])) + # path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx" + # path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx" + # path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx" + # path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx" + # workbook1: Workbook = openpyxl.load_workbook(filename=path1) + # worksheet1: Worksheet = workbook1.active + # import itertools + # for col, r in itertools.product( ['A', 'B'] + # , range(1, 20) + # ): + # position: str = "{:}{:d}".format(col, r) + # print(worksheet1[position]) + # print(worksheet1[position].value) + # print(worksheet1[position].number_format) + # workbook2: Workbook = openpyxl.load_workbook(filename=path2) + # worksheet2: Worksheet = workbook2.active + # for col, r in itertools.product( ['A', 'B'] + # , range(1, 20) + # ): + # position: str = "{:}{:d}".format(col, r) + # print(worksheet2[position]) + # print(worksheet2[position].value) + # print(worksheet2[position].number_format) + # print(compare_table(path1, path2, features=["number_format"])) - #path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx" - #path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx" - #workbook1: Workbook = openpyxl.load_workbook(filename=path1) - #worksheet1: Worksheet = workbook1.active - #print(worksheet1.sheet_view.zoomScale) - #print(type(worksheet1.sheet_view.zoomScale)) -# - #import os - #import os.path - #for wb in filter( lambda f: f.endswith(".xlsx") - #, os.listdir("../../任务数据/LibreOffice Calc/") - #): - #path = os.path.join("../../任务数据/LibreOffice Calc/", wb) - #print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale) - #print(check_zoom(path1, {"relation": "lt", "ref_value": 100})) - #print(check_zoom(path2, {"relation": "lt", "ref_value": 100})) + # path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx" + # path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx" + # workbook1: Workbook = openpyxl.load_workbook(filename=path1) + # worksheet1: Worksheet = workbook1.active + # print(worksheet1.sheet_view.zoomScale) + # print(type(worksheet1.sheet_view.zoomScale)) + # + # import os + # import os.path + # for wb in filter( lambda f: f.endswith(".xlsx") + # , os.listdir("../../任务数据/LibreOffice Calc/") + # ): + # path = os.path.join("../../任务数据/LibreOffice Calc/", wb) + # print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale) + # print(check_zoom(path1, {"relation": "lt", "ref_value": 100})) + # print(check_zoom(path2, {"relation": "lt", "ref_value": 100})) path1 = "../../任务数据/LibreOffice Calc/Padding_Decimals_In_Formular_gold.xlsx" data_frame: pd.DataFrame = pd.read_excel(path1) diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index a76910b..053bc75 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -1,3 +1,6 @@ +from typing import Dict + + def compare_text_file(actual: str, expected: str, **options) -> float: """ Args: @@ -7,7 +10,7 @@ def compare_text_file(actual: str, expected: str, **options) -> float: Return: float: the score """ - + with open(actual) as f1: actual_text = f1.read() with open(expected) as f2: @@ -17,24 +20,17 @@ def compare_text_file(actual: str, expected: str, **options) -> float: return 1.0 return 0.0 -def compare_config(actual: str, expected: str, **options) -> float: - """ - Args: - actual (str): path to result text file - expected (str): gold string - Return: - float: the score - """ - +def compare_config(actual: str, rules: Dict, **options) -> float: with open(actual) as f1: actual_text = f1.read() - if actual_text == expected: + if actual_text == rules['expect']: return 1.0 return 0.0 -def compare_answer(actual: str, expected: str, **options) -> float: + +def compare_answer(actual: str, rules: Dict, **options) -> float: """ Args: actual (str): result string @@ -44,11 +40,8 @@ def compare_answer(actual: str, expected: str, **options) -> float: float: the score """ - if actual == expected: + if actual == rules['expect']: return 1.0 - + # TODO: can use text embedding to get non-zero return return 0.0 - -if __name__ == '__main__': - print(compare_text_file("README.md", "README.md")) \ No newline at end of file diff --git a/evaluation_examples/examples/vs_code/0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json b/evaluation_examples/examples/vs_code/0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json index c61b8bd..cbbfd16 100644 --- a/evaluation_examples/examples/vs_code/0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json +++ b/evaluation_examples/examples/vs_code/0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json @@ -16,9 +16,9 @@ } }, { - "type": "open", + "type": "launch", "parameters": { - "path": "Desktop/vscode_replace_text.txt" + "command": ["code", "Desktop/vscode_replace_text.txt"] } } ], diff --git a/evaluation_examples/examples/vs_code/53ad5833-3455-407b-bbc6-45b4c79ab8fb.json b/evaluation_examples/examples/vs_code/53ad5833-3455-407b-bbc6-45b4c79ab8fb.json index 030a2ac..fb0d892 100644 --- a/evaluation_examples/examples/vs_code/53ad5833-3455-407b-bbc6-45b4c79ab8fb.json +++ b/evaluation_examples/examples/vs_code/53ad5833-3455-407b-bbc6-45b4c79ab8fb.json @@ -4,6 +4,12 @@ "instruction": "Could you help me open the project at /home/user/project?", "source": "https://www.youtube.com/watch?v=VqCgcpAypFQ", "config": [ + { + "type": "launch", + "parameters": { + "command": ["code"] + } + }, { "type": "command", "parameters": { @@ -18,8 +24,10 @@ "evaluator": { "func": "compare_config", "expected": { - "type": "string", - "string": "project" + "type": "rule", + "rules": { + "expect": "project" + } }, "result": { "type": "vscode_config", diff --git a/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json b/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json index 7975f75..b3201d8 100644 --- a/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json +++ b/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json @@ -29,8 +29,10 @@ "evaluator": { "func": "compare_config", "expected": { - "type": "string", - "string": "100" + "type": "rule", + "rules": { + "expect": "100" + } }, "result": { "type": "vscode_config",