ver Feb28th

a new multi app task --- init a web extension project with web tool
2024-02-28 22:35:04 +08:00
parent b2e5218c42
commit 33ace6937b
6 changed files with 209 additions and 7 deletions
--- a/branch-config/filelist
+++ b/branch-config/filelist
@@ -12,3 +12,4 @@ experiment_screenshot_seeact.py
 experiment_screenshot_som.py

 quick_compare_table.py
+quick_evaluate.py
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -57,7 +57,8 @@ from .general import (
    is_in_list,
    fuzzy_match,
    check_include_exclude,
-    check_direct_json_object
+    check_direct_json_object,
+    diff_text_file
 )
 from .gimp import (
    check_brightness_decrease_and_structure_sim,
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -12,6 +12,7 @@ import lxml.etree
 from lxml.cssselect import CSSSelector
 from lxml.etree import _Element
 from rapidfuzz import fuzz
+import difflib

 from .utils import _match_record, _match_value_to_rule

@@ -45,7 +46,15 @@ def is_in_list(result, rules) -> float:
    else:
        return 0.

+def diff_text_file(result: str, expect: str) -> float:
+    if result is None:
+        return 0.

+    with open(result) as f:
+        result_lines: List[str] = f.read().splitlines()
+    with open(expect) as f:
+        expected_lines: List[str] = f.read().splitlines()
+    return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio()

 def fuzzy_match(result, rules) -> float:
    expect = rules["expected"]
@@ -212,14 +221,21 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
    for r in expect_rules:
        value = result
        for k in r["key"]:
-            value = value[k]
+            try:
+                value = value[k]
+            except KeyError:
+                return 0.
        metric = metric and _match_value_to_rule(value, r)
    for r in unexpect_rules:
        value = result
        for k in r["key"]:
-            value = value[k]
+            try:
+                value = value[k]
+            except KeyError:
+                value = None
+                break
        metric = metric and not _match_value_to_rule(value, r)
-    return metric
+    return float(metric)


 def check_direct_json_object(result, rules)->float:
@@ -238,4 +254,4 @@ def check_direct_json_object(result, rules)->float:
        expected_value = expected_json.get(key)
        if expected_value != result.get(key):
            return 0.
-    return 1.0
+    return 1.0
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -587,7 +587,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
        bool
    """

-    if rule["method"].startswith("re"):
+    if rule["method"].startswith("re"): # re.FLAGs
        flags: List[str] = rule["method"].split(".")[1:]
        flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags)
        flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0))
@@ -600,7 +600,7 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
        , "ge", "gt"
                          }:
        return getattr(operator, rule["method"])(value, rule["ref"])
-    if rule["method"].startswith("approx"):
+    if rule["method"].startswith("approx"): # approx:THRESHOLD
        threshold: float = float(rule["method"].split(":")[1])
        logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value))
        try:
--- a/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
+++ b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
@@ -0,0 +1,107 @@
+{
+	"id": "74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
+	"snapshot": "chrome",
+	"instruction": "Help me to get an initial setup of web extension with help of https://webext.eu . Tag the extension as \"happy-extension v0.0.1\". Leave description as blank for now. A background script and browser action is needed, while other features are not. Place the auto-generated folder under folder \"~/Projects\".",
+	"source": "authors",
+	"config": [],
+	"trajectory": "trajectories/74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
+	"related_apps": [
+		"chrome",
+		"os"
+	],
+	"evaluator": {
+		"func": [
+			"check_json",
+			"diff_text_file",
+			"diff_text_file",
+			"diff_text_file",
+			"diff_text_file"
+		],
+		"result": [
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/manifest.json",
+				"dest": "manifest.json"
+			},
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/background_script.js",
+				"dest": "background_script.js"
+			},
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/browserAction/index.html",
+				"dest": "index.html"
+			},
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/browserAction/style.css",
+				"dest": "style.css"
+			},
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/browserAction/script.js",
+				"dest": "script.js"
+			}
+		],
+		"expected": [
+			{
+				"type": "rule",
+				"rules": {
+					"expect": [
+						{
+							"key": ["name"],
+							"method": "eq",
+							"ref": "happy-extension"
+						},
+						{
+							"key": ["version"],
+							"method": "eq",
+							"ref": "0.0.1"
+						},
+						{
+							"key": ["background", "scripts"],
+							"method": "eq",
+							"ref": ["background_script.js"]
+						},
+						{
+							"key": ["browser_action", "default_icon"],
+							"method": "eq",
+							"ref": {"64": "icons/icon.png"}
+						},
+						{
+							"key": ["browser_action", "default_popup"],
+							"method": "eq",
+							"ref": "browserAction/index.html"
+						},
+						{
+							"key": ["browser_action", "default_title"],
+							"method": "eq",
+							"ref": "happy-extension"
+						}
+					]
+				}
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=1t5Llhn6seDUXVs-eILu6CjwFEQL9Z5Qm&export=download",
+				"dest": "background_script.js"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=19fMAsWd6q4ElLdOceJ-otHbxRJA_pc_U&export=download",
+				"dest": "index.html"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=1fwfiRPjdug8uh6z23RFO1JtlGH_L_Hl_&export=download",
+				"dest": "style.css"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=14YYnhCfRtHQNk8M4fBPaUQeteoFMGBsA&export=download",
+				"dest": "script.js"
+			}
+		]
+	}
+}
--- a/quick_evaluate.py
+++ b/quick_evaluate.py
@@ -0,0 +1,77 @@
+import datetime
+import json
+import logging
+import os
+import sys
+import time
+import argparse
+from desktop_env.envs.desktop_env import DesktopEnv
+
+#  Logger Configs {{{ # 
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(
+    fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+#  }}} Logger Configs # 
+
+logger = logging.getLogger("desktopenv.main")
+
+
+def human_agent():
+    """
+    Runs the Gym environment with human input.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p', '--path', type=str, required=True, help="Path to the virtual machine .vmx file.")
+    parser.add_argument('-s', '--snapshot', type=str, help="Name of the snapshot to restore.")
+    parser.add_argument('-e', '--example', type=str, help="Path to the example json file.")
+    args = parser.parse_args(sys.argv[1:])
+
+    example_path = args.example if args.example is not None and os.path.exists(args.example) else \
+        'evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json'
+    with open(example_path, "r") as f:
+        example = json.load(f)
+    # change to your customized snapshot
+    if args.snapshot is not None: example["snapshot"] = args.snapshot
+
+    assert os.path.exists(args.path), "The specified path to the .vmx file does not exist."
+    env = DesktopEnv(
+        path_to_vm=args.path,
+        action_space="computer_13",
+        task_config=example
+    )
+
+    result = env.evaluate()
+    logger.info("Result: %.2f", result)
+
+    # env.close()
+    logger.info("Environment closed.")
+
+
+if __name__ == "__main__":
+    human_agent()