Merge branch 'zdy'

2024-01-05 15:55:41 +08:00
parent 3cbb57f24c eeb8a120d6
commit 26b7d9010d
19 changed files with 522 additions and 180 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -171,3 +171,4 @@ tags-opts
 snapshots
 branch_flag
 branch-config
 *.syncthing.*.tmp
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -3,6 +3,8 @@ from typing import Any, Dict
 import requests
 from desktop_env.envs.actions import KEYBOARD_KEYS
 import logging
 logger = logging.getLogger("desktopenv.pycontroller")
 class PythonController:
    def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
@@ -17,7 +19,7 @@ class PythonController:
        if response.status_code == 200:
            return response.content
        else:
-            print("Failed to get screenshot. Status code:", response.status_code)
+            logger.error("Failed to get screenshot. Status code: %d", response.status_code)
            return None
    def get_file(self, file_path: str):
@@ -26,10 +28,10 @@ class PythonController:
        """
        response = requests.post(self.http_server + "/file", data={"file_path": file_path})
        if response.status_code == 200:
-            print("File downloaded successfully")
+            logger.info("File downloaded successfully")
            return response.content
        else:
-            print("Failed to get file. Status code:", response.status_code)
+            logger.error("Failed to get file. Status code: %d", response.status_code)
            return None
    def execute_python_command(self, command: str) -> None:
@@ -38,7 +40,7 @@ class PythonController:
        It can be used to execute the pyautogui commands, or... any other python command. who knows?
        """
        command = self.pkgs_prefix.format(command=command)
-        payload = json.dumps({"command": command})
+        payload = json.dumps({"command": command, "shell": True})
        headers = {
            'Content-Type': 'application/json'
        }
@@ -46,12 +48,12 @@ class PythonController:
        try:
            response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
            if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
            else:
-                print("Failed to execute command. Status code:", response.status_code)
+                logger.error("Failed to execute command. Status code: %d", response.status_code)
            return response.json()
        except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to execute the command:", e)
+            logger.error("An error occurred while trying to execute the command: %s", e)
    def execute_action(self, action: Dict[str, Any]):
        """
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -6,8 +6,12 @@ import uuid
 import os.path
 from typing import Dict, List
-from typing import Any
+from typing import Any, Union
 import logging
 logger = logging.getLogger("desktopenv.setup")
 import traceback
 class SetupController:
    def __init__(self, http_server: str, cache_dir: str):
@@ -47,31 +51,32 @@ class SetupController:
        # self._open_setup(config)
        # can add other setup steps
-    def _command_setup(self, command: str):
+    # ZDY_COMMENT: merged with launch
-        """
+    #def _command_setup(self, command: str):
-        Directly send a command into the virtual machine os for setting up.
+        #"""
-        """
+        #Directly send a command into the virtual machine os for setting up.
-        payload = json.dumps({"command": command})
+        #"""
-        headers = {
+        #payload = json.dumps({"command": command})
-            'Content-Type': 'application/json'
+        #headers = {
-        }
+            #'Content-Type': 'application/json'
-        timeout = 5
+        #}
-        timout_whitelist = ["vlc"]
+        #timeout = 5
-
+        #timout_whitelist = ["vlc"]
-        try:
+#
-
+        #try:
-            response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
+#
-            if response.status_code == 200:
+            #response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
-                print("Command executed successfully:", response.text)
+            #if response.status_code == 200:
-            else:
+                #print("Command executed successfully:", response.text)
-                print("Failed to execute command. Status code:", response.status_code)
+            #else:
-        except requests.exceptions.Timeout as e:
+                #print("Failed to execute command. Status code:", response.status_code)
-            if command in timout_whitelist:
+        #except requests.exceptions.Timeout as e:
-                print("Command executed successfully:", command)
+            #if command in timout_whitelist:
-            else:
+                #print("Command executed successfully:", command)
-                print("An error occurred while trying to execute the command:", e)
+            #else:
-        except requests.exceptions.RequestException as e:
+                #print("An error occurred while trying to execute the command:", e)
-            print("An error occurred while trying to execute the command:", e)
+        #except requests.exceptions.RequestException as e:
            #print("An error occurred while trying to execute the command:", e)
    def _download_setup(self, files: List[Dict[str, str]]):
        """
@@ -110,12 +115,12 @@ class SetupController:
                            for chunk in response.iter_content(chunk_size=8192):
                                if chunk:
                                    f.write(chunk)
-                        print("File downloaded successfully")
+                        logger.info("File downloaded successfully")
                        downloaded = True
                        break
                    except requests.RequestException as e:
-                        print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
+                        logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
                if not downloaded:
                    raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
@@ -129,17 +134,18 @@ class SetupController:
                "file_data": (os.path.basename(path), open(cache_path, "rb"))
            })
            headers = {"Content-Type": form.content_type}
-            print(form.content_type)
+            logger.debug(form.content_type)
            # send request to server to upload file
            try:
                logger.debug("REQUEST ADDRESS: %s", self.http_server_setup_root + "/upload")
                response = requests.post(self.http_server_setup_root + "/upload", headers=headers, data=form)
                if response.status_code == 200:
-                    print("Command executed successfully:", response.text)
+                    logger.info("Command executed successfully: %s", response.text)
                else:
-                    print("Failed to upload file. Status code:", response.text)
+                    logger.error("Failed to upload file. Status code: %s", response.text)
            except requests.exceptions.RequestException as e:
-                print("An error occurred while trying to send the request:", e)
+                logger.error("An error occurred while trying to send the request: %s", e)
    def _change_wallpaper_setup(self, path: str):
        # if not config:
@@ -160,14 +166,14 @@ class SetupController:
        try:
            response = requests.post(self.http_server_setup_root + "/change_wallpaper", headers=headers, data=payload)
            if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
            else:
-                print("Failed to change wallpaper. Status code:", response.text)
+                logger.error("Failed to change wallpaper. Status code: %s", response.text)
        except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to send the request:", e)
+            logger.error("An error occurred while trying to send the request: %s", e)
    def _tidy_desktop_setup(self, **config):
-        raise NotImplementedError
+        raise NotImplementedError()
    def _open_setup(self, path: str):
        # if not config:
@@ -187,8 +193,63 @@ class SetupController:
        try:
            response = requests.post(self.http_server_setup_root + "/open_file", headers=headers, data=payload)
            if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
            else:
-                print("Failed to open file. Status code:", response.text)
+                logger.error("Failed to open file. Status code: %s", response.text)
        except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to send the request:", e)
+            logger.error("An error occurred while trying to send the request: %s", e)
    def _launch_setup(self, command: List[str]):
        if not command:
            raise Exception("Empty comman to launch.")
        payload = json.dumps({"command": command})
        headers = {"Content-Type": "application/json"}
        try:
            response = requests.post(self.http_server_setup_root + "/launch", headers=headers, data=payload)
            if response.status_code == 200:
                logger.info("Command executed successfully: %s", response.text)
            else:
                logger.error("Failed to launch application. Status code: %s", response.text)
        except requests.exceptions.RequestException as e:
            logger.error("An error occurred while trying to send the request: %s", e)
    def _execute_setup(self, command: List[str], stdout: str = "", stderr: str = ""):
        if not command:
            raise Exception("Empty comman to launch.")
        payload = json.dumps({"command": command})
        headers = {"Content-Type": "application/json"}
        try:
            response = requests.post(self.http_server_setup_root + "/execute", headers=headers, data=payload)
            if response.status_code == 200:
                results: Dict[str, str] = response.json()
                if stdout:
                    with open(os.path.join(self.cache_dir, stdout), "w") as f:
                        f.write(results["output"])
                if stderr:
                    with open(os.path.join(self.cache_dir, stderr), "w") as f:
                        f.write(results["error"])
                logger.info( "Command executed successfully: %s -> %s"
                           , " ".join(command)
                           , response.text
                           )
            else:
                logger.error("Failed to launch application. Status code: %s", response.text)
        except requests.exceptions.RequestException as e:
            logger.error("An error occurred while trying to send the request: %s", e)
            traceback.print_exc()
    def _act_setup(self, action_seq: List[Union[Dict[str, Any], str]]):
        # TODO
        raise NotImplementedError()
    def _replay_setup(self, trajectory: str):
        """
        Args:
            trajectory (str): path to the replay trajectory file
        """
        # TODO
        raise NotImplementedError()
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -17,10 +17,12 @@ from desktop_env.controllers.setup import SetupController
 # from desktop_env.evaluators import eval_funcs
 from desktop_env.evaluators import metrics, getters
 import logging
 logger = logging.getLogger("desktopenv.env")
 Metric = Callable[[Any, Any], float]
 Getter = Callable[[gym.Env, Dict[str, Any]], Any]
 def _execute_command(command: List[str]) -> None:
    if command[:4] == ["vmrun", "-T", "ws", "start"]:
        p = subprocess.Popen(command)
@@ -68,22 +70,11 @@ class DesktopEnv(gym.Env):
        self.cache_dir_base: str = cache_dir
        # task-aware stuffs
-        self.snapshot_path = task_config["snapshot"]  # todo: handling the logic of snapshot directory
+        # todo: handling the logic of snapshot directory
-        self.task_id: str = task_config["id"]
+        self._set_task_info(task_config)
        self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
        os.makedirs(self.cache_dir, exist_ok=True)
        self.instruction = task_config["instruction"]
        self.config = task_config["config"]
        self.evaluator = task_config["evaluator"]
        self.metric: Metric = getattr(metrics, self.evaluator["func"])
        self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
        self.expected_getter: Getter = getattr(getters, "get_{:}".format(
            self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
        self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
        # Initialize emulator and controller
-        print("Initializing...")
+        logger.info("Initializing...")
        self._start_emulator()
        self.vm_ip = self._get_vm_ip()
        self.host = f"http://{self.vm_ip}:5000"
@@ -110,26 +101,26 @@ class DesktopEnv(gym.Env):
                output: List[str] = output.splitlines()
                # if self.path_to_vm.lstrip("~/") in output:
                if self.path_to_vm in output:
-                    print("VM is running.")
+                    logger.info("VM is running.")
                    break
                else:
-                    print("Starting VM...")
+                    logger.info("Starting VM...")
                    _execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
                    time.sleep(3)
            except subprocess.CalledProcessError as e:
-                print(f"Error executing command: {e.output.decode().strip()}")
+                logger.error(f"Error executing command: {e.output.decode().strip()}")
    def _get_vm_ip(self):
        max_retries = 10
-        print("Getting IP Address...")
+        logger.info("Getting IP Address...")
        for _ in range(max_retries):
            try:
                output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip()
-                print(f"IP address: {output}")
+                logger.info(f"IP address: {output}")
                return output
            except:
                time.sleep(5)
-                print("Retrying...")
+                logger.info("Retrying...")
        raise Exception("Failed to get VM IP address!")
    def _save_state(self):
@@ -152,52 +143,54 @@ class DesktopEnv(gym.Env):
        screenshot_image_path = self._get_screenshot()
        return screenshot_image_path
    def _set_task_info(self, task_config: Dict[str, Any]):
        self.snapshot_path = task_config["snapshot"]
        self.task_id: str = task_config["id"]
        self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
        os.makedirs(self.cache_dir, exist_ok=True)
        self.instruction = task_config["instruction"]
        self.config = task_config["config"]
        self.evaluator = task_config["evaluator"]
        self.metric: Metric = getattr(metrics, self.evaluator["func"])
        self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
        self.expected_getter: Getter = getattr(getters, "get_{:}".format(
            self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
        self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
    def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None):
-        print("Resetting environment...")
+        logger.info("Resetting environment...")
-        print("Switching task...")
+        logger.info("Switching task...")
        if task_config is not None:
-            self.snapshot_path = task_config["snapshot"]
+            self._set_task_info(task_config)
            self.task_id = task_config["id"]
            self.cache_dir = os.path.join(self.cache_dir_base, self.task_id)
            os.makedirs(self.cache_dir, exist_ok=True)
            self.instruction = task_config["instruction"]
            self.config = task_config["config"]
            self.evaluator = task_config["evaluator"]
            self.metric: Metric = getattr(metrics, self.evaluator["func"])
            self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
            self.expected_getter: Getter = getattr(getters, "get_{:}".format(
                self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
            self.metric_options = self.evaluator.get("options", {})
            self.setup_controller.reset_cache_dir(self.cache_dir)
-        print("Setting counters...")
+        logger.info("Setting counters...")
        self._traj_no += 1
        self._step_no = 0
        self.action_history.clear()
-        print("Setup new temp dir...")
+        logger.info("Setup new temp dir...")
        self.tmp_dir = tempfile.mkdtemp(
            prefix="{:d}.{:}.".format(self._traj_no, self.task_id),
            dir=self.tmp_dir_base
        )
        os.makedirs(os.path.join(self.tmp_dir, "screenshots"))
-        print("Reverting to snapshot to {}...".format(self.snapshot_path))
+        logger.info("Reverting to snapshot to {}...".format(self.snapshot_path))
        _execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
        time.sleep(5)
-        print("Starting emulator...")
+        logger.info("Starting emulator...")
        self._start_emulator()
-        print("Emulator started.")
+        logger.info("Emulator started.")
-        print("Setting up environment...")
+        logger.info("Setting up environment...")
        self.setup_controller.setup(self.config)
        time.sleep(5)
-        print("Environment setup complete.")
+        logger.info("Environment setup complete.")
        observation = self._get_obs()
        return observation
@@ -229,6 +222,9 @@ class DesktopEnv(gym.Env):
        """
        Evaluate whether the task is successfully completed.
        """
        self.setup_controller.setup(self.evaluator["postconfig"])
        result_state = self.result_getter(self, self.evaluator["result"])
        expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
            else None
--- a/desktop_env/evaluators/getters/init.py
+++ b/desktop_env/evaluators/getters/init.py
@@ -1,3 +1,3 @@
-from .file import get_cloud_file, get_vm_file
+from .file import get_cloud_file, get_vm_file, get_cache_file
 from .misc import get_rule
 from .vlc import get_vlc_playing_info
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -41,3 +41,13 @@ def get_vm_file(env, config: Dict[str, str]) -> str:
        f.write(file)
    return _path
 def get_cache_file(env, config: Dict[str, str]) -> str:
    """
    Config:
        path (str): relative path in cache dir
    """
    _path = os.path.join(env.cache_dir, config["path"])
    assert os.path.exists(_path)
    return _path
--- a/desktop_env/evaluators/getters/misc.py
+++ b/desktop_env/evaluators/getters/misc.py
@@ -1,8 +1,66 @@
 from typing import TypeVar
 import platform
 import subprocess
 import ctypes
 import os
 import logging
 logger = logging.getLogger("desktopenv.getters.misc")
 R = TypeVar("Rule")
 def get_rule(env, config: R) -> R:
    """
    Returns the rule as-is.
    """
    return config["rules"]
 def get_desktop_path():
    username = os.getlogin()  # Get the current username
    if platform.system() == "Windows":
        return os.path.join("C:", "Users", username, "Desktop")
    elif platform.system() == "Darwin":  # macOS is identified as 'Darwin'
        return os.path.join("/Users", username, "Desktop")
    elif platform.system() == "Linux":
        return os.path.join("/home", username, "Desktop")
    else:
        raise Exception("Unsupported operating system")
 def get_wallpaper():
    def get_wallpaper_windows():
        SPI_GETDESKWALLPAPER = 0x73
        MAX_PATH = 260
        buffer = ctypes.create_unicode_buffer(MAX_PATH)
        ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
        return buffer.value
    def get_wallpaper_macos():
        script = """
        tell application "System Events" to tell every desktop to get picture
        """
        process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE)
        output, error = process.communicate()
        if error:
            logger.error("Error: %s", error)
        else:
            return output.strip().decode('utf-8')
    def get_wallpaper_linux():
        try:
            output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"])
            return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
        except Exception as e:
            logger.error("Error: %s", e)
            return None
    os_name = platform.system()
    if os_name == 'Windows':
        return get_wallpaper_windows()
    elif os_name == 'Darwin':
        return get_wallpaper_macos()
    elif os_name == 'Linux':
        return get_wallpaper_linux()
    else:
        return "Unsupported OS"
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -6,3 +6,4 @@ from .docs import is_first_line_centered, check_file_exists, compare_contains_im
 from .pdf import check_pdf_pages
 from .libreoffice import check_libre_locale
 from .vlc import is_vlc_playing
 from .general import check_csv
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -5,6 +5,9 @@ import sqlite3
 from playwright.sync_api import sync_playwright
 import logging
 logger = logging.getLogger("desktopenv.metrics.chrome")
 """
 WARNING: 
 1. Functions from this script assume that no account is registered on Chrome, otherwise the default file path needs to be changed.
@@ -36,7 +39,7 @@ def get_default_search_engine():
                                                                                                      'Google')
        return search_engine
    except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
        return "Google"
@@ -61,7 +64,7 @@ def get_cookie_data():
        return cookies
    except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
        return None
@@ -85,7 +88,7 @@ def get_bookmarks():
        return bookmarks
    except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
        return None
@@ -117,7 +120,7 @@ def get_extensions_installed_from_shop():
                            manifest = json.load(file)
                            manifests.append(manifest)
                        except json.JSONDecodeError:
-                            print(f"Error reading {manifest_path}")
+                            logger.error(f"Error reading {manifest_path}")
    return manifests
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -4,6 +4,8 @@ from typing import List, Dict, Any
 from docx import Document
 from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
 import logging
 logger = logging.getLogger("desktopenv.metric.docs")
 def find_default_font(config_file_path, rules):
    """Find the default font in LibreOffice Writer."""
@@ -23,7 +25,7 @@ def find_default_font(config_file_path, rules):
                for value in prop.findall('value', namespace):
                    default_font = value.text
    except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
    return 1 if default_font == expected_font else 0
@@ -192,4 +194,4 @@ def compare_contains_image(docx_file1, docx_file2):
 # print(result)
 # config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
-# print(find_default_font("Ani", config_path))
+# print(find_default_font("Ani", config_path))
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -1,57 +1,30 @@
-import platform
+import csv
-import subprocess
+from typing import Dict, List
 import ctypes
 import os
 def _match_record(pattern: Dict[str, str], item: Dict[str, str]) -> float:
    return all(k in item and item[k]==val for k, val in pattern.items())
-# todo: move to getter module
+def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
-def get_desktop_path():
+    """
-    username = os.getlogin()  # Get the current username
+    Args:
-    if platform.system() == "Windows":
+        result (str): path to csv file
-        return os.path.join("C:", "Users", username, "Desktop")
+        rules (Dict[str, List[Dict[str, str]]]): dict like
-    elif platform.system() == "Darwin":  # macOS is identified as 'Darwin'
+          {
-        return os.path.join("/Users", username, "Desktop")
+            "expect": [{key: value}]
-    elif platform.system() == "Linux":
+            "unexpect": [{key: value}]
-        return os.path.join("/home", username, "Desktop")
+          }
    else:
        raise Exception("Unsupported operating system")
 def get_wallpaper():
    def get_wallpaper_windows():
        SPI_GETDESKWALLPAPER = 0x73
        MAX_PATH = 260
        buffer = ctypes.create_unicode_buffer(MAX_PATH)
        ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
        return buffer.value
    def get_wallpaper_macos():
        script = """
        tell application "System Events" to tell every desktop to get picture
        """
        process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE)
        output, error = process.communicate()
        if error:
            print("Error:", error)
        else:
            return output.strip().decode('utf-8')
    def get_wallpaper_linux():
        try:
            output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"])
            return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
        except Exception as e:
            print("Error:", e)
            return None
    os_name = platform.system()
    if os_name == 'Windows':
        return get_wallpaper_windows()
    elif os_name == 'Darwin':
        return get_wallpaper_macos()
    elif os_name == 'Linux':
        return get_wallpaper_linux()
    else:
        return "Unsupported OS"
    Returns:
        float
    """
    expect_metrics = [False] * len(rules.get("expect", []))
    unexpect_metric = True
    with open(result) as f:
        reader = csv.DictReader(f)
        for rcd in reader:
            for i, r in enumerate(rules.get("expect", [])):
                expect_metrics[i] = expect_metrics[i] or _match_record(r, rcd)
            unexpect_metric = unexpect_metric and all(_match_record(r, rcd) for r in rules.get("unexpect", []))
    return float(all(expect_metrics) and unexpect_metric)
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -10,6 +10,8 @@ from typing import Dict, List
 from typing import Any, Union
 from numbers import Number
 import logging
 logger = logging.getLogger("desktopenv.metric.table")
 def compare_table(actual: str, expected: str, **options) -> float:
    """
@@ -32,7 +34,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
    df1 = pd.read_excel(expected)
    df2 = pd.read_excel(actual)
    metric: bool = df1.equals(df2)
-    print("Normal Contents Metric: {:}".format(metric))
+    logger.debug("Normal Contents Metric: {:}".format(metric))
    features: List[str] = options.get("features", [])
    for ftr in features:
@@ -43,12 +45,12 @@ def compare_table(actual: str, expected: str, **options) -> float:
            sp1 = load_sparklines(actual)
            sp2 = load_sparklines(expected)
            new_metric: bool = sp1 == sp2
-            print("Sparkline Metric: {:}".format(new_metric))
+            logger.debug("Sparkline Metric: {:}".format(new_metric))
        elif ftr=="chart":
            charts1 = load_charts(workbook1, **options)
            charts2 = load_charts(workbook2, **options)
            new_metric: bool = charts1 == charts2
-            print("Chart Metric: {:}".format(new_metric))
+            logger.debug("Chart Metric: {:}".format(new_metric))
        elif ftr=="number_format":
            number_formats1: List[str] = [ c.number_format.lower()\
                                           for col in workbook1.active.iter_cols()\
@@ -61,7 +63,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
                                            if c.data_type=="n"
                                         ]
            new_metric: bool = number_formats1==number_formats2
-            print("Number Format Metric: {:}".format(new_metric))
+            logger.debug("Number Format Metric: {:}".format(new_metric))
        else:
            raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
        metric = metric and new_metric
@@ -79,7 +81,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
            expected_name: str = worksheet_names[r["sheet_idx"]]
            actual_name: str = r["sheet_name"]
            metric: bool = expected_name == actual_name
-            print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
+            logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
            passes = passes and metric
        elif r["type"] == "sheet_data":
            if isinstance(r["sheet_idx0"], int):
@@ -99,7 +101,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
                sheet_idx: int = int(sheet_idx)
                df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
            metric: bool = df1.equals(df2)
-            print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
+            logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
            passes = passes and metric
        else:
            raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -11,6 +11,9 @@ from openpyxl.chart._chart import ChartBase
 from typing import Dict, List, Set
 from typing import Any
 import logging
 logger = logging.getLogger("desktopenv.metrics.utils")
 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
                   , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
                   ]
--- a/desktop_env/evaluators/metrics/vlc.py
+++ b/desktop_env/evaluators/metrics/vlc.py
@@ -5,6 +5,8 @@ import pygetwindow as gw
 import pyautogui
 from typing import Dict
 import logging
 logger = logging.getLogger("desktopenv.metrics.vlc")
 def get_vlc_config(setting_name):
    """
@@ -25,7 +27,7 @@ def get_vlc_config(setting_name):
    config_path = paths.get(os_type)
    if not config_path or not os.path.exists(config_path):
-        print("VLC config file not found for this operating system.")
+        logger.warning("VLC config file not found for this operating system.")
        return None
    try:
@@ -34,7 +36,7 @@ def get_vlc_config(setting_name):
                if line.startswith(setting_name):
                    return line.strip()
    except IOError as e:
-        print(f"Error reading config file: {e}")
+        logger.error(f"Error reading config file: {e}")
    return None
@@ -77,8 +79,8 @@ def is_vlc_fullscreen():
    except IndexError:
        # VLC window not found
-        print("VLC window not found.")
+        logger.error("VLC window not found.")
        return False
    except Exception as e:
-        print(f"An error occurred: {e}")
+        logger.error(f"An error occurred: {e}")
        return False
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -9,22 +9,26 @@ import pyautogui
 # from PIL import ImageGrab, Image
 from PIL import Image
 from flask import Flask, request, jsonify, send_file
 from typing import List
 app = Flask(__name__)
 pyautogui.PAUSE = 0
 pyautogui.DARWIN_CATCH_UP_TIME = 0
 logger = app.logger
@app.route('/setup/execute', methods=['POST'])
@app.route('/execute', methods=['POST'])
 def execute_command():
    data = request.json
    # The 'command' key in the JSON request should contain the command to be executed.
-    command = data.get('command', '')
+    shell = data.get('shell', False)
    command = data.get('command', "" if shell else [])
    # Execute the command without any safety checks.
    try:
-        result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True)
        return jsonify({
            'status': 'success',
            'output': result.stdout,
@@ -36,6 +40,21 @@ def execute_command():
            'message': str(e)
        }), 500
@app.route('/setup/launch', methods=["POST"])
 def launch_app():
    data = request.json
    command: List[str] = data.get("command", [])
    try:
        subprocess.Popen(command)
        return "{:} launched successfully".format(" ".join(command))
    except Exception as e:
        return jsonify( { "status": "error"
                        , "message": str(e)
                        }
                      )\
             , 500
@app.route('/screenshot', methods=['GET'])
 def capture_screen_with_cursor():
@@ -77,7 +96,7 @@ def capture_screen_with_cursor():
        # Use the screencapture utility to capture the screen with the cursor
        subprocess.run(["screencapture", "-C", file_path])
    else:
-        print(f"The platform you're using ({user_platform}) is not currently supported")
+        logger.warning(f"The platform you're using ({user_platform}) is not currently supported")
    return send_file(file_path, mimetype='image/png')
@@ -173,7 +192,7 @@ def download_file():
            return "File downloaded successfully"
        except requests.RequestException as e:
-            print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
+            logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
    return f"Failed to download {url}. No retries left. Error: {e}", 500
--- a/evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
+++ b/evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
@@ -3,10 +3,98 @@
  "snapshot": "thunderbird",
  "instruction": "Help me access my gmail account with address \"xx@gmail.com\" and password \"xxx\"",
  "source": "https://www.wikihow.com/Access-Gmail-With-Mozilla-Thunderbird",
-  "config": [],
+  "config": [
    {
      "type": "download",
      "parameters": {
        "files": [
          {
            "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
            "path": "/home/david/thunderbird-profile.tar.gz"
          }
        ]
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "tar",
          "-xzv",
          "--recursive-unlink",
          "-f",
          "/home/david/thunderbird-profile.tar.gz",
          "-C",
          "/home/david/"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "/usr/bin/thunderbird"
        ]
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "thunderbird"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
    "postconfig": [
      {
        "type": "download",
        "parameters": {
          "files": [
            {
              "url": "https://raw.githubusercontent.com/unode/firefox_decrypt/main/firefox_decrypt.py",
              "path": "/home/david/firefox_decrypt.py"
            }
          ]
        }
      },
      {
        "type": "execute",
        "parameters": {
          "command": [
            "python3",
            "/home/david/firefox_decrypt.py",
            "/home/david/.thunderbird",
            "-n",
            "-c",
            "2",
            "-f",
            "csv",
            "-d",
            ","
          ],
          "stdout": "thunderbird-accounts.csv"
        }
      }
    ],
    "func": "check_csv",
    "result": {
      "type": "cache_file",
      "path": "thunderbird-accounts.csv"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "expect": [
          {
            "url": "imap://imap.gmail.com",
            "user": "xx@gmail.com",
            "password": "xxx"
          },
          {
            "url": "smtp://smtp.gmail.com",
            "user": "xx@gmail.com",
            "password": "xxx"
          }
        ]
      }
    }
  }
 }
--- a/evaluation_examples/examples/thunderbird/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json
+++ b/evaluation_examples/examples/thunderbird/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json
@@ -1,12 +1,94 @@
 {
  "id": "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
  "snapshot": "thunderbird",
-  "instruction": "Help remove the account \"xx@xx\"",
+  "instruction": "Help me to remove the account \"anonym-x2024@outlook.com\"",
  "source": "https://www.wikihow.com/Remove-an-Email-Account-from-Thunderbird",
-  "config": [],
+  "config": [
-  "trajectory": "trajectories/",
+    {
      "type": "download",
      "parameters": {
        "files": [
          {
            "url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
            "path": "/home/david/thunderbird-profile.tar.gz"
          }
        ]
      }
    },
    {
      "type": "execute",
      "parameters": {
        "command": [
          "tar",
          "-xzv",
          "--recursive-unlink",
          "-f",
          "/home/david/thunderbird-profile.tar.gz",
          "-C",
          "/home/david/"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "/usr/bin/thunderbird"
        ]
      }
    }
  ],
  "trajectory": "trajectories/bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
  "related_apps": [
    "thunderbird"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
    "postconfig": [
      {
        "type": "download",
        "parameters": {
          "files": [
            {
              "url": "https://raw.githubusercontent.com/unode/firefox_decrypt/main/firefox_decrypt.py",
              "path": "/home/david/firefox_decrypt.py"
            }
          ]
        }
      },
      {
        "type": "execute",
        "parameters": {
          "command": [
            "python3",
            "/home/david/firefox_decrypt.py",
            "/home/david/.thunderbird",
            "-n",
            "-c",
            "2",
            "-f",
            "csv",
            "-d",
            ","
          ],
          "stdout": "thunderbird-accounts.csv"
        }
      }
    ],
    "func": "check_csv",
    "result": {
      "type": "cache_file",
      "path": "thunderbird-accounts.csv"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "unexpect": [
          {
            "url": "imap://outlook.office365.com",
            "user": "anonym-x2024@outlook.com"
          }
        ]
      }
    }
  }
 }
--- a/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
+++ b/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
@@ -16,7 +16,7 @@
      }
    },
    {
-      "type": "command",
+      "type": "launch",
      "parameters": {
        "command": "vlc"
      }
--- a/main.py
+++ b/main.py
@@ -1,13 +1,50 @@
 import json
 from desktop_env.envs.desktop_env import DesktopEnv
 import logging
 import os
 import sys
 import datetime
 #  Logger Configs {{{ # 
 logger = logging.getLogger()
 logger.setLevel(logging.DEBUG)
 datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
 file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
 debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
 stdout_handler = logging.StreamHandler(sys.stdout)
 sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
 file_handler.setLevel(logging.INFO)
 debug_handler.setLevel(logging.DEBUG)
 stdout_handler.setLevel(logging.INFO)
 sdebug_handler.setLevel(logging.DEBUG)
 formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
 file_handler.setFormatter(formatter)
 debug_handler.setFormatter(formatter)
 stdout_handler.setFormatter(formatter)
 sdebug_handler.setFormatter(formatter)
 stdout_handler.addFilter(logging.Filter("desktopenv"))
 sdebug_handler.addFilter(logging.Filter("desktopenv"))
 logger.addHandler(file_handler)
 logger.addHandler(debug_handler)
 logger.addHandler(stdout_handler)
 logger.addHandler(sdebug_handler)
 #  }}} Logger Configs # 
 logger = logging.getLogger("desktopenv.main")
 def human_agent():
    """
    Runs the Gym environment with human input.
    """
-    with open("evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json", "r") as f:
+    with open("evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json", "r") as f:
        example = json.load(f)
    example["snapshot"] = "base_setup3"
@@ -39,24 +76,26 @@ def human_agent():
        #     "action_type": 0,
        #     "click_type": 3,
        # }
-        print(trajectory[i])
+        logger.info(trajectory[i])
        observation, reward, done, info = env.step(trajectory[i], pause=5)
-        print("Observation:", observation)
+        logger.info("Observation: %s", observation)
-        print("Reward:", reward)
+        logger.info("Reward: %.2f", reward)
-        print("Info:", info)
+        logger.info("Info: %s", info)
-        print("================================\n")
+        logger.info("================================\n")
        if done:
-            print("The episode is done.")
+            logger.info("The episode is done.")
            break
    result = env.evaluate()
-    print("Result:", result)
+    logger.info("Result: %.2f", result)
    #input("PAUSING")
    env.close()
-    print("Environment closed.")
+    logger.info("Environment closed.")
 if __name__ == "__main__":