Merge branch 'zdy'

2024-01-05 15:55:41 +08:00
parent 3cbb57f24c eeb8a120d6
commit 26b7d9010d
19 changed files with 522 additions and 180 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -171,3 +171,4 @@ tags-opts
 snapshots
 branch_flag
 branch-config
+*.syncthing.*.tmp
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -3,6 +3,8 @@ from typing import Any, Dict
 import requests
 from desktop_env.envs.actions import KEYBOARD_KEYS

+import logging
+logger = logging.getLogger("desktopenv.pycontroller")

 class PythonController:
    def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
@@ -17,7 +19,7 @@ class PythonController:
        if response.status_code == 200:
            return response.content
        else:
-            print("Failed to get screenshot. Status code:", response.status_code)
+            logger.error("Failed to get screenshot. Status code: %d", response.status_code)
            return None

    def get_file(self, file_path: str):
@@ -26,10 +28,10 @@ class PythonController:
        """
        response = requests.post(self.http_server + "/file", data={"file_path": file_path})
        if response.status_code == 200:
-            print("File downloaded successfully")
+            logger.info("File downloaded successfully")
            return response.content
        else:
-            print("Failed to get file. Status code:", response.status_code)
+            logger.error("Failed to get file. Status code: %d", response.status_code)
            return None

    def execute_python_command(self, command: str) -> None:
@@ -38,7 +40,7 @@ class PythonController:
        It can be used to execute the pyautogui commands, or... any other python command. who knows?
        """
        command = self.pkgs_prefix.format(command=command)
-        payload = json.dumps({"command": command})
+        payload = json.dumps({"command": command, "shell": True})
        headers = {
            'Content-Type': 'application/json'
        }
@@ -46,12 +48,12 @@ class PythonController:
        try:
            response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
            if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
            else:
-                print("Failed to execute command. Status code:", response.status_code)
+                logger.error("Failed to execute command. Status code: %d", response.status_code)
            return response.json()
        except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to execute the command:", e)
+            logger.error("An error occurred while trying to execute the command: %s", e)

    def execute_action(self, action: Dict[str, Any]):
        """
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -6,8 +6,12 @@ import uuid
 import os.path

 from typing import Dict, List
-from typing import Any
+from typing import Any, Union

+import logging
+logger = logging.getLogger("desktopenv.setup")
+
+import traceback

 class SetupController:
    def __init__(self, http_server: str, cache_dir: str):
@@ -47,31 +51,32 @@ class SetupController:
        # self._open_setup(config)
        # can add other setup steps

-    def _command_setup(self, command: str):
-        """
-        Directly send a command into the virtual machine os for setting up.
-        """
-        payload = json.dumps({"command": command})
-        headers = {
-            'Content-Type': 'application/json'
-        }
-        timeout = 5
-        timout_whitelist = ["vlc"]
-
-        try:
-
-            response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
-            if response.status_code == 200:
-                print("Command executed successfully:", response.text)
-            else:
-                print("Failed to execute command. Status code:", response.status_code)
-        except requests.exceptions.Timeout as e:
-            if command in timout_whitelist:
-                print("Command executed successfully:", command)
-            else:
-                print("An error occurred while trying to execute the command:", e)
-        except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to execute the command:", e)
+    # ZDY_COMMENT: merged with launch
+    #def _command_setup(self, command: str):
+        #"""
+        #Directly send a command into the virtual machine os for setting up.
+        #"""
+        #payload = json.dumps({"command": command})
+        #headers = {
+            #'Content-Type': 'application/json'
+        #}
+        #timeout = 5
+        #timout_whitelist = ["vlc"]
+#
+        #try:
+#
+            #response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
+            #if response.status_code == 200:
+                #print("Command executed successfully:", response.text)
+            #else:
+                #print("Failed to execute command. Status code:", response.status_code)
+        #except requests.exceptions.Timeout as e:
+            #if command in timout_whitelist:
+                #print("Command executed successfully:", command)
+            #else:
+                #print("An error occurred while trying to execute the command:", e)
+        #except requests.exceptions.RequestException as e:
+            #print("An error occurred while trying to execute the command:", e)

    def _download_setup(self, files: List[Dict[str, str]]):
        """
@@ -110,12 +115,12 @@ class SetupController:
                            for chunk in response.iter_content(chunk_size=8192):
                                if chunk:
                                    f.write(chunk)
-                        print("File downloaded successfully")
+                        logger.info("File downloaded successfully")
                        downloaded = True
                        break

                    except requests.RequestException as e:
-                        print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
+                        logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
                if not downloaded:
                    raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")

@@ -129,17 +134,18 @@ class SetupController:
                "file_data": (os.path.basename(path), open(cache_path, "rb"))
            })
            headers = {"Content-Type": form.content_type}
-            print(form.content_type)
+            logger.debug(form.content_type)

            # send request to server to upload file
            try:
+                logger.debug("REQUEST ADDRESS: %s", self.http_server_setup_root + "/upload")
                response = requests.post(self.http_server_setup_root + "/upload", headers=headers, data=form)
                if response.status_code == 200:
-                    print("Command executed successfully:", response.text)
+                    logger.info("Command executed successfully: %s", response.text)
                else:
-                    print("Failed to upload file. Status code:", response.text)
+                    logger.error("Failed to upload file. Status code: %s", response.text)
            except requests.exceptions.RequestException as e:
-                print("An error occurred while trying to send the request:", e)
+                logger.error("An error occurred while trying to send the request: %s", e)

    def _change_wallpaper_setup(self, path: str):
        # if not config:
@@ -160,14 +166,14 @@ class SetupController:
        try:
            response = requests.post(self.http_server_setup_root + "/change_wallpaper", headers=headers, data=payload)
            if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
            else:
-                print("Failed to change wallpaper. Status code:", response.text)
+                logger.error("Failed to change wallpaper. Status code: %s", response.text)
        except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to send the request:", e)
+            logger.error("An error occurred while trying to send the request: %s", e)

    def _tidy_desktop_setup(self, **config):
-        raise NotImplementedError
+        raise NotImplementedError()

    def _open_setup(self, path: str):
        # if not config:
@@ -187,8 +193,63 @@ class SetupController:
        try:
            response = requests.post(self.http_server_setup_root + "/open_file", headers=headers, data=payload)
            if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
            else:
-                print("Failed to open file. Status code:", response.text)
+                logger.error("Failed to open file. Status code: %s", response.text)
        except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to send the request:", e)
+            logger.error("An error occurred while trying to send the request: %s", e)
+
+    def _launch_setup(self, command: List[str]):
+        if not command:
+            raise Exception("Empty comman to launch.")
+
+        payload = json.dumps({"command": command})
+        headers = {"Content-Type": "application/json"}
+
+        try:
+            response = requests.post(self.http_server_setup_root + "/launch", headers=headers, data=payload)
+            if response.status_code == 200:
+                logger.info("Command executed successfully: %s", response.text)
+            else:
+                logger.error("Failed to launch application. Status code: %s", response.text)
+        except requests.exceptions.RequestException as e:
+            logger.error("An error occurred while trying to send the request: %s", e)
+
+    def _execute_setup(self, command: List[str], stdout: str = "", stderr: str = ""):
+        if not command:
+            raise Exception("Empty comman to launch.")
+
+        payload = json.dumps({"command": command})
+        headers = {"Content-Type": "application/json"}
+
+        try:
+            response = requests.post(self.http_server_setup_root + "/execute", headers=headers, data=payload)
+            if response.status_code == 200:
+                results: Dict[str, str] = response.json()
+                if stdout:
+                    with open(os.path.join(self.cache_dir, stdout), "w") as f:
+                        f.write(results["output"])
+                if stderr:
+                    with open(os.path.join(self.cache_dir, stderr), "w") as f:
+                        f.write(results["error"])
+                logger.info( "Command executed successfully: %s -> %s"
+                           , " ".join(command)
+                           , response.text
+                           )
+            else:
+                logger.error("Failed to launch application. Status code: %s", response.text)
+        except requests.exceptions.RequestException as e:
+            logger.error("An error occurred while trying to send the request: %s", e)
+            traceback.print_exc()
+
+    def _act_setup(self, action_seq: List[Union[Dict[str, Any], str]]):
+        # TODO
+        raise NotImplementedError()
+    def _replay_setup(self, trajectory: str):
+        """
+        Args:
+            trajectory (str): path to the replay trajectory file
+        """
+
+        # TODO
+        raise NotImplementedError()
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -17,10 +17,12 @@ from desktop_env.controllers.setup import SetupController
 # from desktop_env.evaluators import eval_funcs
 from desktop_env.evaluators import metrics, getters

+import logging
+logger = logging.getLogger("desktopenv.env")
+
 Metric = Callable[[Any, Any], float]
 Getter = Callable[[gym.Env, Dict[str, Any]], Any]

-
 def _execute_command(command: List[str]) -> None:
    if command[:4] == ["vmrun", "-T", "ws", "start"]:
        p = subprocess.Popen(command)
@@ -68,22 +70,11 @@ class DesktopEnv(gym.Env):
        self.cache_dir_base: str = cache_dir

        # task-aware stuffs
-        self.snapshot_path = task_config["snapshot"]  # todo: handling the logic of snapshot directory
-        self.task_id: str = task_config["id"]
-        self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
-        os.makedirs(self.cache_dir, exist_ok=True)
-        self.instruction = task_config["instruction"]
-        self.config = task_config["config"]
-
-        self.evaluator = task_config["evaluator"]
-        self.metric: Metric = getattr(metrics, self.evaluator["func"])
-        self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
-        self.expected_getter: Getter = getattr(getters, "get_{:}".format(
-            self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
-        self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
+        # todo: handling the logic of snapshot directory
+        self._set_task_info(task_config)

        # Initialize emulator and controller
-        print("Initializing...")
+        logger.info("Initializing...")
        self._start_emulator()
        self.vm_ip = self._get_vm_ip()
        self.host = f"http://{self.vm_ip}:5000"
@@ -110,26 +101,26 @@ class DesktopEnv(gym.Env):
                output: List[str] = output.splitlines()
                # if self.path_to_vm.lstrip("~/") in output:
                if self.path_to_vm in output:
-                    print("VM is running.")
+                    logger.info("VM is running.")
                    break
                else:
-                    print("Starting VM...")
+                    logger.info("Starting VM...")
                    _execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
                    time.sleep(3)
            except subprocess.CalledProcessError as e:
-                print(f"Error executing command: {e.output.decode().strip()}")
+                logger.error(f"Error executing command: {e.output.decode().strip()}")

    def _get_vm_ip(self):
        max_retries = 10
-        print("Getting IP Address...")
+        logger.info("Getting IP Address...")
        for _ in range(max_retries):
            try:
                output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip()
-                print(f"IP address: {output}")
+                logger.info(f"IP address: {output}")
                return output
            except:
                time.sleep(5)
-                print("Retrying...")
+                logger.info("Retrying...")
        raise Exception("Failed to get VM IP address!")

    def _save_state(self):
@@ -152,52 +143,54 @@ class DesktopEnv(gym.Env):
        screenshot_image_path = self._get_screenshot()
        return screenshot_image_path

+    def _set_task_info(self, task_config: Dict[str, Any]):
+        self.snapshot_path = task_config["snapshot"]
+        self.task_id: str = task_config["id"]
+        self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
+        os.makedirs(self.cache_dir, exist_ok=True)
+        self.instruction = task_config["instruction"]
+        self.config = task_config["config"]
+
+        self.evaluator = task_config["evaluator"]
+        self.metric: Metric = getattr(metrics, self.evaluator["func"])
+        self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
+        self.expected_getter: Getter = getattr(getters, "get_{:}".format(
+            self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
+        self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
+
    def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None):
-        print("Resetting environment...")
+        logger.info("Resetting environment...")

-        print("Switching task...")
+        logger.info("Switching task...")
        if task_config is not None:
-            self.snapshot_path = task_config["snapshot"]
-            self.task_id = task_config["id"]
-            self.cache_dir = os.path.join(self.cache_dir_base, self.task_id)
-            os.makedirs(self.cache_dir, exist_ok=True)
-            self.instruction = task_config["instruction"]
-            self.config = task_config["config"]
-
-            self.evaluator = task_config["evaluator"]
-            self.metric: Metric = getattr(metrics, self.evaluator["func"])
-            self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
-            self.expected_getter: Getter = getattr(getters, "get_{:}".format(
-                self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
-            self.metric_options = self.evaluator.get("options", {})
-
+            self._set_task_info(task_config)
            self.setup_controller.reset_cache_dir(self.cache_dir)

-        print("Setting counters...")
+        logger.info("Setting counters...")
        self._traj_no += 1
        self._step_no = 0
        self.action_history.clear()

-        print("Setup new temp dir...")
+        logger.info("Setup new temp dir...")
        self.tmp_dir = tempfile.mkdtemp(
            prefix="{:d}.{:}.".format(self._traj_no, self.task_id),
            dir=self.tmp_dir_base
        )
        os.makedirs(os.path.join(self.tmp_dir, "screenshots"))

-        print("Reverting to snapshot to {}...".format(self.snapshot_path))
+        logger.info("Reverting to snapshot to {}...".format(self.snapshot_path))
        _execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
        time.sleep(5)

-        print("Starting emulator...")
+        logger.info("Starting emulator...")
        self._start_emulator()
-        print("Emulator started.")
+        logger.info("Emulator started.")

-        print("Setting up environment...")
+        logger.info("Setting up environment...")
        self.setup_controller.setup(self.config)

        time.sleep(5)
-        print("Environment setup complete.")
+        logger.info("Environment setup complete.")

        observation = self._get_obs()
        return observation
@@ -229,6 +222,9 @@ class DesktopEnv(gym.Env):
        """
        Evaluate whether the task is successfully completed.
        """
+
+        self.setup_controller.setup(self.evaluator["postconfig"])
+
        result_state = self.result_getter(self, self.evaluator["result"])
        expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
            else None
--- a/desktop_env/evaluators/getters/init.py
+++ b/desktop_env/evaluators/getters/init.py
@@ -1,3 +1,3 @@
-from .file import get_cloud_file, get_vm_file
+from .file import get_cloud_file, get_vm_file, get_cache_file
 from .misc import get_rule
 from .vlc import get_vlc_playing_info
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -41,3 +41,13 @@ def get_vm_file(env, config: Dict[str, str]) -> str:
        f.write(file)

    return _path
+
+def get_cache_file(env, config: Dict[str, str]) -> str:
+    """
+    Config:
+        path (str): relative path in cache dir
+    """
+
+    _path = os.path.join(env.cache_dir, config["path"])
+    assert os.path.exists(_path)
+    return _path
--- a/desktop_env/evaluators/getters/misc.py
+++ b/desktop_env/evaluators/getters/misc.py
@@ -1,8 +1,66 @@
 from typing import TypeVar

+import platform
+import subprocess
+import ctypes
+import os
+
+import logging
+logger = logging.getLogger("desktopenv.getters.misc")
+
 R = TypeVar("Rule")
 def get_rule(env, config: R) -> R:
    """
    Returns the rule as-is.
    """
    return config["rules"]
+
+
+def get_desktop_path():
+    username = os.getlogin()  # Get the current username
+    if platform.system() == "Windows":
+        return os.path.join("C:", "Users", username, "Desktop")
+    elif platform.system() == "Darwin":  # macOS is identified as 'Darwin'
+        return os.path.join("/Users", username, "Desktop")
+    elif platform.system() == "Linux":
+        return os.path.join("/home", username, "Desktop")
+    else:
+        raise Exception("Unsupported operating system")
+
+
+def get_wallpaper():
+    def get_wallpaper_windows():
+        SPI_GETDESKWALLPAPER = 0x73
+        MAX_PATH = 260
+        buffer = ctypes.create_unicode_buffer(MAX_PATH)
+        ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
+        return buffer.value
+
+    def get_wallpaper_macos():
+        script = """
+        tell application "System Events" to tell every desktop to get picture
+        """
+        process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE)
+        output, error = process.communicate()
+        if error:
+            logger.error("Error: %s", error)
+        else:
+            return output.strip().decode('utf-8')
+
+    def get_wallpaper_linux():
+        try:
+            output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"])
+            return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
+        except Exception as e:
+            logger.error("Error: %s", e)
+            return None
+
+    os_name = platform.system()
+    if os_name == 'Windows':
+        return get_wallpaper_windows()
+    elif os_name == 'Darwin':
+        return get_wallpaper_macos()
+    elif os_name == 'Linux':
+        return get_wallpaper_linux()
+    else:
+        return "Unsupported OS"
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -6,3 +6,4 @@ from .docs import is_first_line_centered, check_file_exists, compare_contains_im
 from .pdf import check_pdf_pages
 from .libreoffice import check_libre_locale
 from .vlc import is_vlc_playing
+from .general import check_csv
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -5,6 +5,9 @@ import sqlite3

 from playwright.sync_api import sync_playwright

+import logging
+logger = logging.getLogger("desktopenv.metrics.chrome")
+
 """
 WARNING: 
 1. Functions from this script assume that no account is registered on Chrome, otherwise the default file path needs to be changed.
@@ -36,7 +39,7 @@ def get_default_search_engine():
                                                                                                      'Google')
        return search_engine
    except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
        return "Google"


@@ -61,7 +64,7 @@ def get_cookie_data():

        return cookies
    except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
        return None


@@ -85,7 +88,7 @@ def get_bookmarks():
        return bookmarks

    except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
        return None


@@ -117,7 +120,7 @@ def get_extensions_installed_from_shop():
                            manifest = json.load(file)
                            manifests.append(manifest)
                        except json.JSONDecodeError:
-                            print(f"Error reading {manifest_path}")
+                            logger.error(f"Error reading {manifest_path}")
    return manifests


--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -4,6 +4,8 @@ from typing import List, Dict, Any
 from docx import Document
 from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

+import logging
+logger = logging.getLogger("desktopenv.metric.docs")

 def find_default_font(config_file_path, rules):
    """Find the default font in LibreOffice Writer."""
@@ -23,7 +25,7 @@ def find_default_font(config_file_path, rules):
                for value in prop.findall('value', namespace):
                    default_font = value.text
    except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")

    return 1 if default_font == expected_font else 0

@@ -192,4 +194,4 @@ def compare_contains_image(docx_file1, docx_file2):
 # print(result)

 # config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
-# print(find_default_font("Ani", config_path))
+# print(find_default_font("Ani", config_path))
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -1,57 +1,30 @@
-import platform
-import subprocess
-import ctypes
-import os
+import csv
+from typing import Dict, List

+def _match_record(pattern: Dict[str, str], item: Dict[str, str]) -> float:
+    return all(k in item and item[k]==val for k, val in pattern.items())

-# todo: move to getter module
-def get_desktop_path():
-    username = os.getlogin()  # Get the current username
-    if platform.system() == "Windows":
-        return os.path.join("C:", "Users", username, "Desktop")
-    elif platform.system() == "Darwin":  # macOS is identified as 'Darwin'
-        return os.path.join("/Users", username, "Desktop")
-    elif platform.system() == "Linux":
-        return os.path.join("/home", username, "Desktop")
-    else:
-        raise Exception("Unsupported operating system")
-
-
-def get_wallpaper():
-    def get_wallpaper_windows():
-        SPI_GETDESKWALLPAPER = 0x73
-        MAX_PATH = 260
-        buffer = ctypes.create_unicode_buffer(MAX_PATH)
-        ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
-        return buffer.value
-
-    def get_wallpaper_macos():
-        script = """
-        tell application "System Events" to tell every desktop to get picture
-        """
-        process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE)
-        output, error = process.communicate()
-        if error:
-            print("Error:", error)
-        else:
-            return output.strip().decode('utf-8')
-
-    def get_wallpaper_linux():
-        try:
-            output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"])
-            return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
-        except Exception as e:
-            print("Error:", e)
-            return None
-
-    os_name = platform.system()
-    if os_name == 'Windows':
-        return get_wallpaper_windows()
-    elif os_name == 'Darwin':
-        return get_wallpaper_macos()
-    elif os_name == 'Linux':
-        return get_wallpaper_linux()
-    else:
-        return "Unsupported OS"
+def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
+    """
+    Args:
+        result (str): path to csv file
+        rules (Dict[str, List[Dict[str, str]]]): dict like
+          {
+            "expect": [{key: value}]
+            "unexpect": [{key: value}]
+          }

+    Returns:
+        float
+    """

+    expect_metrics = [False] * len(rules.get("expect", []))
+    unexpect_metric = True
+    with open(result) as f:
+        reader = csv.DictReader(f)
+        
+        for rcd in reader:
+            for i, r in enumerate(rules.get("expect", [])):
+                expect_metrics[i] = expect_metrics[i] or _match_record(r, rcd)
+            unexpect_metric = unexpect_metric and all(_match_record(r, rcd) for r in rules.get("unexpect", []))
+    return float(all(expect_metrics) and unexpect_metric)
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -10,6 +10,8 @@ from typing import Dict, List
 from typing import Any, Union
 from numbers import Number

+import logging
+logger = logging.getLogger("desktopenv.metric.table")

 def compare_table(actual: str, expected: str, **options) -> float:
    """
@@ -32,7 +34,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
    df1 = pd.read_excel(expected)
    df2 = pd.read_excel(actual)
    metric: bool = df1.equals(df2)
-    print("Normal Contents Metric: {:}".format(metric))
+    logger.debug("Normal Contents Metric: {:}".format(metric))

    features: List[str] = options.get("features", [])
    for ftr in features:
@@ -43,12 +45,12 @@ def compare_table(actual: str, expected: str, **options) -> float:
            sp1 = load_sparklines(actual)
            sp2 = load_sparklines(expected)
            new_metric: bool = sp1 == sp2
-            print("Sparkline Metric: {:}".format(new_metric))
+            logger.debug("Sparkline Metric: {:}".format(new_metric))
        elif ftr=="chart":
            charts1 = load_charts(workbook1, **options)
            charts2 = load_charts(workbook2, **options)
            new_metric: bool = charts1 == charts2
-            print("Chart Metric: {:}".format(new_metric))
+            logger.debug("Chart Metric: {:}".format(new_metric))
        elif ftr=="number_format":
            number_formats1: List[str] = [ c.number_format.lower()\
                                           for col in workbook1.active.iter_cols()\
@@ -61,7 +63,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
                                            if c.data_type=="n"
                                         ]
            new_metric: bool = number_formats1==number_formats2
-            print("Number Format Metric: {:}".format(new_metric))
+            logger.debug("Number Format Metric: {:}".format(new_metric))
        else:
            raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
        metric = metric and new_metric
@@ -79,7 +81,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
            expected_name: str = worksheet_names[r["sheet_idx"]]
            actual_name: str = r["sheet_name"]
            metric: bool = expected_name == actual_name
-            print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
+            logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
            passes = passes and metric
        elif r["type"] == "sheet_data":
            if isinstance(r["sheet_idx0"], int):
@@ -99,7 +101,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
                sheet_idx: int = int(sheet_idx)
                df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
            metric: bool = df1.equals(df2)
-            print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
+            logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
            passes = passes and metric
        else:
            raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -11,6 +11,9 @@ from openpyxl.chart._chart import ChartBase
 from typing import Dict, List, Set
 from typing import Any

+import logging
+logger = logging.getLogger("desktopenv.metrics.utils")
+
 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
                   , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
                   ]
--- a/desktop_env/evaluators/metrics/vlc.py
+++ b/desktop_env/evaluators/metrics/vlc.py
@@ -5,6 +5,8 @@ import pygetwindow as gw
 import pyautogui
 from typing import Dict

+import logging
+logger = logging.getLogger("desktopenv.metrics.vlc")

 def get_vlc_config(setting_name):
    """
@@ -25,7 +27,7 @@ def get_vlc_config(setting_name):
    config_path = paths.get(os_type)

    if not config_path or not os.path.exists(config_path):
-        print("VLC config file not found for this operating system.")
+        logger.warning("VLC config file not found for this operating system.")
        return None

    try:
@@ -34,7 +36,7 @@ def get_vlc_config(setting_name):
                if line.startswith(setting_name):
                    return line.strip()
    except IOError as e:
-        print(f"Error reading config file: {e}")
+        logger.error(f"Error reading config file: {e}")

    return None

@@ -77,8 +79,8 @@ def is_vlc_fullscreen():

    except IndexError:
        # VLC window not found
-        print("VLC window not found.")
+        logger.error("VLC window not found.")
        return False
    except Exception as e:
-        print(f"An error occurred: {e}")
+        logger.error(f"An error occurred: {e}")
        return False
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -9,22 +9,26 @@ import pyautogui
 # from PIL import ImageGrab, Image
 from PIL import Image
 from flask import Flask, request, jsonify, send_file
+from typing import List

 app = Flask(__name__)

 pyautogui.PAUSE = 0
 pyautogui.DARWIN_CATCH_UP_TIME = 0

+logger = app.logger

+@app.route('/setup/execute', methods=['POST'])
@app.route('/execute', methods=['POST'])
 def execute_command():
    data = request.json
    # The 'command' key in the JSON request should contain the command to be executed.
-    command = data.get('command', '')
+    shell = data.get('shell', False)
+    command = data.get('command', "" if shell else [])

    # Execute the command without any safety checks.
    try:
-        result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True)
        return jsonify({
            'status': 'success',
            'output': result.stdout,
@@ -36,6 +40,21 @@ def execute_command():
            'message': str(e)
        }), 500

+@app.route('/setup/launch', methods=["POST"])
+def launch_app():
+    data = request.json
+    command: List[str] = data.get("command", [])
+
+    try:
+        subprocess.Popen(command)
+        return "{:} launched successfully".format(" ".join(command))
+    except Exception as e:
+        return jsonify( { "status": "error"
+                        , "message": str(e)
+                        }
+                      )\
+             , 500
+

@app.route('/screenshot', methods=['GET'])
 def capture_screen_with_cursor():
@@ -77,7 +96,7 @@ def capture_screen_with_cursor():
        # Use the screencapture utility to capture the screen with the cursor
        subprocess.run(["screencapture", "-C", file_path])
    else:
-        print(f"The platform you're using ({user_platform}) is not currently supported")
+        logger.warning(f"The platform you're using ({user_platform}) is not currently supported")

    return send_file(file_path, mimetype='image/png')

@@ -173,7 +192,7 @@ def download_file():
            return "File downloaded successfully"

        except requests.RequestException as e:
-            print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
+            logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")

    return f"Failed to download {url}. No retries left. Error: {e}", 500

--- a/evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
+++ b/evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
@@ -3,10 +3,98 @@
  "snapshot": "thunderbird",
  "instruction": "Help me access my gmail account with address \"xx@gmail.com\" and password \"xxx\"",
  "source": "https://www.wikihow.com/Access-Gmail-With-Mozilla-Thunderbird",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
+            "path": "/home/david/thunderbird-profile.tar.gz"
+          }
+        ]
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+          "tar",
+          "-xzv",
+          "--recursive-unlink",
+          "-f",
+          "/home/david/thunderbird-profile.tar.gz",
+          "-C",
+          "/home/david/"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "/usr/bin/thunderbird"
+        ]
+      }
+    }
+  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "thunderbird"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "download",
+        "parameters": {
+          "files": [
+            {
+              "url": "https://raw.githubusercontent.com/unode/firefox_decrypt/main/firefox_decrypt.py",
+              "path": "/home/david/firefox_decrypt.py"
+            }
+          ]
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python3",
+            "/home/david/firefox_decrypt.py",
+            "/home/david/.thunderbird",
+            "-n",
+            "-c",
+            "2",
+            "-f",
+            "csv",
+            "-d",
+            ","
+          ],
+          "stdout": "thunderbird-accounts.csv"
+        }
+      }
+    ],
+    "func": "check_csv",
+    "result": {
+      "type": "cache_file",
+      "path": "thunderbird-accounts.csv"
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": [
+          {
+            "url": "imap://imap.gmail.com",
+            "user": "xx@gmail.com",
+            "password": "xxx"
+          },
+          {
+            "url": "smtp://smtp.gmail.com",
+            "user": "xx@gmail.com",
+            "password": "xxx"
+          }
+        ]
+      }
+    }
+  }
 }
--- a/evaluation_examples/examples/thunderbird/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json
+++ b/evaluation_examples/examples/thunderbird/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json
@@ -1,12 +1,94 @@
 {
  "id": "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
  "snapshot": "thunderbird",
-  "instruction": "Help remove the account \"xx@xx\"",
+  "instruction": "Help me to remove the account \"anonym-x2024@outlook.com\"",
  "source": "https://www.wikihow.com/Remove-an-Email-Account-from-Thunderbird",
-  "config": [],
-  "trajectory": "trajectories/",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
+            "path": "/home/david/thunderbird-profile.tar.gz"
+          }
+        ]
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+          "tar",
+          "-xzv",
+          "--recursive-unlink",
+          "-f",
+          "/home/david/thunderbird-profile.tar.gz",
+          "-C",
+          "/home/david/"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "/usr/bin/thunderbird"
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
  "related_apps": [
    "thunderbird"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "download",
+        "parameters": {
+          "files": [
+            {
+              "url": "https://raw.githubusercontent.com/unode/firefox_decrypt/main/firefox_decrypt.py",
+              "path": "/home/david/firefox_decrypt.py"
+            }
+          ]
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python3",
+            "/home/david/firefox_decrypt.py",
+            "/home/david/.thunderbird",
+            "-n",
+            "-c",
+            "2",
+            "-f",
+            "csv",
+            "-d",
+            ","
+          ],
+          "stdout": "thunderbird-accounts.csv"
+        }
+      }
+    ],
+    "func": "check_csv",
+    "result": {
+      "type": "cache_file",
+      "path": "thunderbird-accounts.csv"
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "unexpect": [
+          {
+            "url": "imap://outlook.office365.com",
+            "user": "anonym-x2024@outlook.com"
+          }
+        ]
+      }
+    }
+  }
 }
--- a/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
+++ b/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
@@ -16,7 +16,7 @@
      }
    },
    {
-      "type": "command",
+      "type": "launch",
      "parameters": {
        "command": "vlc"
      }
--- a/main.py
+++ b/main.py
@@ -1,13 +1,50 @@
 import json
 from desktop_env.envs.desktop_env import DesktopEnv

+import logging
+import os
+import sys
+import datetime
+
+#  Logger Configs {{{ # 
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+#  }}} Logger Configs # 
+
+logger = logging.getLogger("desktopenv.main")

 def human_agent():
    """
    Runs the Gym environment with human input.
    """

-    with open("evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json", "r") as f:
+    with open("evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json", "r") as f:
        example = json.load(f)
    example["snapshot"] = "base_setup3"

@@ -39,24 +76,26 @@ def human_agent():
        #     "action_type": 0,
        #     "click_type": 3,
        # }
-        print(trajectory[i])
+        logger.info(trajectory[i])

        observation, reward, done, info = env.step(trajectory[i], pause=5)
-        print("Observation:", observation)
-        print("Reward:", reward)
-        print("Info:", info)
+        logger.info("Observation: %s", observation)
+        logger.info("Reward: %.2f", reward)
+        logger.info("Info: %s", info)

-        print("================================\n")
+        logger.info("================================\n")

        if done:
-            print("The episode is done.")
+            logger.info("The episode is done.")
            break

    result = env.evaluate()
-    print("Result:", result)
+    logger.info("Result: %.2f", result)
+
+    #input("PAUSING")

    env.close()
-    print("Environment closed.")
+    logger.info("Environment closed.")


 if __name__ == "__main__":