From eeb8a120d6d30f400fe7b3cd0d7c2ee0b30fc7a4 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Fri, 5 Jan 2024 15:20:47 +0800
Subject: [PATCH] ver Jan5th

debugged
---
 .gitignore                                    |  1 +
 branch-config/filelist                        |  1 +
 branch-config/ignorelist                      |  1 +
 desktop_env/controllers/python.py             | 16 ++---
 desktop_env/controllers/setup.py              | 49 ++++++++-------
 desktop_env/envs/desktop_env.py               | 36 +++++------
 desktop_env/evaluators/getters/misc.py        |  7 ++-
 desktop_env/evaluators/metrics/chrome.py      | 11 ++--
 desktop_env/evaluators/metrics/docs.py        |  6 +-
 desktop_env/evaluators/metrics/general.py     |  6 +-
 desktop_env/evaluators/metrics/table.py       | 14 +++--
 desktop_env/evaluators/metrics/utils.py       |  3 +
 desktop_env/evaluators/metrics/vlc.py         | 13 ++--
 desktop_env/server/main.py                    | 10 ++--
 .../7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json |  5 +-
 .../bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json |  5 +-
 main.py                                       | 60 +++++++++++++++----
 17 files changed, 158 insertions(+), 86 deletions(-)

diff --git a/.gitignore b/.gitignore
index 490eebb..775592d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,4 @@ frontend/.idea
 tags
 tags-opts
 snapshots
+*.syncthing.*.tmp
diff --git a/branch-config/filelist b/branch-config/filelist
index eb3b3a1..03e1bc0 100644
--- a/branch-config/filelist
+++ b/branch-config/filelist
@@ -2,3 +2,4 @@ desktop_env
 main.py
 requirements.txt
 evaluation_examples
+logs
diff --git a/branch-config/ignorelist b/branch-config/ignorelist
index e69de29..c75dc7c 100644
--- a/branch-config/ignorelist
+++ b/branch-config/ignorelist
@@ -0,0 +1 @@
+logs/*
diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py
index 2b0dfb7..55cdb2f 100644
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -3,6 +3,8 @@ from typing import Any, Dict
 import requests
 from desktop_env.envs.actions import KEYBOARD_KEYS
 
+import logging
+logger = logging.getLogger("desktopenv.pycontroller")
 
 class PythonController:
     def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
@@ -17,7 +19,7 @@ class PythonController:
         if response.status_code == 200:
             return response.content
         else:
-            print("Failed to get screenshot. Status code:", response.status_code)
+            logger.error("Failed to get screenshot. Status code: %d", response.status_code)
             return None
 
     def get_file(self, file_path: str):
@@ -26,10 +28,10 @@ class PythonController:
         """
         response = requests.post(self.http_server + "/file", data={"file_path": file_path})
         if response.status_code == 200:
-            print("File downloaded successfully")
+            logger.info("File downloaded successfully")
             return response.content
         else:
-            print("Failed to get file. Status code:", response.status_code)
+            logger.error("Failed to get file. Status code: %d", response.status_code)
             return None
 
     def execute_python_command(self, command: str) -> None:
@@ -38,7 +40,7 @@ class PythonController:
         It can be used to execute the pyautogui commands, or... any other python command. who knows?
         """
         command = self.pkgs_prefix.format(command=command)
-        payload = json.dumps({"command": command})
+        payload = json.dumps({"command": command, "shell": True})
         headers = {
             'Content-Type': 'application/json'
         }
@@ -46,12 +48,12 @@ class PythonController:
         try:
             response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
             if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
             else:
-                print("Failed to execute command. Status code:", response.status_code)
+                logger.error("Failed to execute command. Status code: %d", response.status_code)
             return response.json()
         except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to execute the command:", e)
+            logger.error("An error occurred while trying to execute the command: %s", e)
 
     def execute_action(self, action: Dict[str, Any]):
         """
diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py
index 3c28d3f..aa12c43 100644
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -8,6 +8,10 @@ import os.path
 from typing import Dict, List
 from typing import Any, Union
 
+import logging
+logger = logging.getLogger("desktopenv.setup")
+
+import traceback
 
 class SetupController:
     def __init__( self
@@ -88,12 +92,12 @@ class SetupController:
                             for chunk in response.iter_content(chunk_size=8192):
                                 if chunk:
                                     f.write(chunk)
-                        print("File downloaded successfully")
+                        logger.info("File downloaded successfully")
                         downloaded = True
                         break
 
                     except requests.RequestException as e:
-                        print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
+                        logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
                 if not downloaded:
                     raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
 
@@ -107,17 +111,18 @@ class SetupController:
                                      }
                                    )
             headers = {"Content-Type": form.content_type}
-            print(form.content_type)
+            logger.debug(form.content_type)
 
             # send request to server to upload file
             try:
+                logger.debug("REQUEST ADDRESS: %s", self.http_server + "/upload")
                 response = requests.post(self.http_server + "/upload", headers=headers, data=form)
                 if response.status_code == 200:
-                    print("Command executed successfully:", response.text)
+                    logger.info("Command executed successfully: %s", response.text)
                 else:
-                    print("Failed to upload file. Status code:", response.text)
+                    logger.error("Failed to upload file. Status code: %s", response.text)
             except requests.exceptions.RequestException as e:
-                print("An error occurred while trying to send the request:", e)
+                logger.error("An error occurred while trying to send the request: %s", e)
 
     def _change_wallpaper_setup(self, path: str):
         # if not config:
@@ -138,11 +143,11 @@ class SetupController:
         try:
             response = requests.post(self.http_server + "/change_wallpaper", headers=headers, data=payload)
             if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
             else:
-                print("Failed to change wallpaper. Status code:", response.text)
+                logger.error("Failed to change wallpaper. Status code: %s", response.text)
         except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to send the request:", e)
+            logger.error("An error occurred while trying to send the request: %s", e)
 
     def _tidy_desktop_setup(self, **config):
         raise NotImplementedError()
@@ -165,11 +170,11 @@ class SetupController:
         try:
             response = requests.post(self.http_server + "/open_file", headers=headers, data=payload)
             if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
             else:
-                print("Failed to open file. Status code:", response.text)
+                logger.error("Failed to open file. Status code: %s", response.text)
         except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to send the request:", e)
+            logger.error("An error occurred while trying to send the request: %s", e)
 
     def _launch_setup(self, command: List[str]):
         if not command:
@@ -181,11 +186,11 @@ class SetupController:
         try:
             response = requests.post(self.http_server + "/launch", headers=headers, data=payload)
             if response.status_code == 200:
-                print("Command executed successfully:", response.text)
+                logger.info("Command executed successfully: %s", response.text)
             else:
-                print("Failed to launch application. Status code:", response.text)
+                logger.error("Failed to launch application. Status code: %s", response.text)
         except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to send the request:", e)
+            logger.error("An error occurred while trying to send the request: %s", e)
 
     def _execute_setup(self, command: List[str], stdout: str = "", stderr: str = ""):
         if not command:
@@ -195,7 +200,7 @@ class SetupController:
         headers = {"Content-Type": "application/json"}
 
         try:
-            response = requests.post(self.http_server + "/launch", headers=headers, data=payload)
+            response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
             if response.status_code == 200:
                 results: Dict[str, str] = response.json()
                 if stdout:
@@ -204,13 +209,15 @@ class SetupController:
                 if stderr:
                     with open(os.path.join(self.cache_dir, stderr), "w") as f:
                         f.write(results["error"])
-                print( "Command executed successfully: {:} ->".format(" ".join(command))
-                     , response.text
-                     )
+                logger.info( "Command executed successfully: %s -> %s"
+                           , " ".join(command)
+                           , response.text
+                           )
             else:
-                print("Failed to launch application. Status code:", response.text)
+                logger.error("Failed to launch application. Status code: %s", response.text)
         except requests.exceptions.RequestException as e:
-            print("An error occurred while trying to send the request:", e)
+            logger.error("An error occurred while trying to send the request: %s", e)
+            traceback.print_exc()
 
     def _act_setup(self, action_seq: List[Union[Dict[str, Any], str]]):
         # TODO
diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index 1845f94..1a70b76 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -17,10 +17,12 @@ from desktop_env.controllers.setup import SetupController
 # from desktop_env.evaluators import eval_funcs
 from desktop_env.evaluators import metrics, getters
 
+import logging
+logger = logging.getLogger("desktopenv.env")
+
 Metric = Callable[[Any, Any], float]
 Getter = Callable[[gym.Env, Dict[str, Any]], Any]
 
-
 def _execute_command(command: List[str]) -> None:
     if command[:4] == ["vmrun", "-T", "ws", "start"]:
         p = subprocess.Popen(command)
@@ -72,7 +74,7 @@ class DesktopEnv(gym.Env):
         self._set_task_info(task_config)
 
         # Initialize emulator and controller
-        print("Initializing...")
+        logger.info("Initializing...")
         self._start_emulator()
         self.host = f"http://{self._get_vm_ip()}:5000"
         self.controller = PythonController(http_server=self.host)
@@ -98,26 +100,26 @@ class DesktopEnv(gym.Env):
                 output: List[str] = output.splitlines()
                 # if self.path_to_vm.lstrip("~/") in output:
                 if self.path_to_vm in output:
-                    print("VM is running.")
+                    logger.info("VM is running.")
                     break
                 else:
-                    print("Starting VM...")
+                    logger.info("Starting VM...")
                     _execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
                     time.sleep(3)
             except subprocess.CalledProcessError as e:
-                print(f"Error executing command: {e.output.decode().strip()}")
+                logger.error(f"Error executing command: {e.output.decode().strip()}")
 
     def _get_vm_ip(self):
         max_retries = 10
-        print("Getting IP Address...")
+        logger.info("Getting IP Address...")
         for _ in range(max_retries):
             try:
                 output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip()
-                print(f"IP address: {output}")
+                logger.info(f"IP address: {output}")
                 return output
             except:
                 time.sleep(5)
-                print("Retrying...")
+                logger.info("Retrying...")
         raise Exception("Failed to get VM IP address!")
 
     def _save_state(self):
@@ -156,38 +158,38 @@ class DesktopEnv(gym.Env):
         self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
 
     def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None):
-        print("Resetting environment...")
+        logger.info("Resetting environment...")
 
-        print("Switching task...")
+        logger.info("Switching task...")
         if task_config is not None:
             self._set_task_info(task_config)
             self.setup_controller.reset_cache_dir(self.cache_dir)
 
-        print("Setting counters...")
+        logger.info("Setting counters...")
         self._traj_no += 1
         self._step_no = 0
         self.action_history.clear()
 
-        print("Setup new temp dir...")
+        logger.info("Setup new temp dir...")
         self.tmp_dir = tempfile.mkdtemp(
             prefix="{:d}.{:}.".format(self._traj_no, self.task_id),
             dir=self.tmp_dir_base
         )
         os.makedirs(os.path.join(self.tmp_dir, "screenshots"))
 
-        print("Reverting to snapshot to {}...".format(self.snapshot_path))
+        logger.info("Reverting to snapshot to {}...".format(self.snapshot_path))
         _execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
         time.sleep(5)
 
-        print("Starting emulator...")
+        logger.info("Starting emulator...")
         self._start_emulator()
-        print("Emulator started.")
+        logger.info("Emulator started.")
 
-        print("Setting up environment...")
+        logger.info("Setting up environment...")
         self.setup_controller.setup(self.config)
 
         time.sleep(5)
-        print("Environment setup complete.")
+        logger.info("Environment setup complete.")
 
         observation = self._get_obs()
         return observation
diff --git a/desktop_env/evaluators/getters/misc.py b/desktop_env/evaluators/getters/misc.py
index 4f82666..f66322b 100644
--- a/desktop_env/evaluators/getters/misc.py
+++ b/desktop_env/evaluators/getters/misc.py
@@ -5,6 +5,9 @@ import subprocess
 import ctypes
 import os
 
+import logging
+logger = logging.getLogger("desktopenv.getters.misc")
+
 R = TypeVar("Rule")
 def get_rule(env, config: R) -> R:
     """
@@ -40,7 +43,7 @@ def get_wallpaper():
         process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE)
         output, error = process.communicate()
         if error:
-            print("Error:", error)
+            logger.error("Error: %s", error)
         else:
             return output.strip().decode('utf-8')
 
@@ -49,7 +52,7 @@ def get_wallpaper():
             output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"])
             return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
         except Exception as e:
-            print("Error:", e)
+            logger.error("Error: %s", e)
             return None
 
     os_name = platform.system()
diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py
index 360b28c..91cd38d 100644
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -5,6 +5,9 @@ import sqlite3
 
 from playwright.sync_api import sync_playwright
 
+import logging
+logger = logging.getLogger("desktopenv.metrics.chrome")
+
 """
 WARNING: 
 1. Functions from this script assume that no account is registered on Chrome, otherwise the default file path needs to be changed.
@@ -36,7 +39,7 @@ def get_default_search_engine():
                                                                                                       'Google')
         return search_engine
     except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
         return "Google"
 
 
@@ -61,7 +64,7 @@ def get_cookie_data():
 
         return cookies
     except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
         return None
 
 
@@ -85,7 +88,7 @@ def get_bookmarks():
         return bookmarks
 
     except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
         return None
 
 
@@ -117,7 +120,7 @@ def get_extensions_installed_from_shop():
                             manifest = json.load(file)
                             manifests.append(manifest)
                         except json.JSONDecodeError:
-                            print(f"Error reading {manifest_path}")
+                            logger.error(f"Error reading {manifest_path}")
     return manifests
 
 
diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py
index 017ba1c..48d2c03 100644
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -4,6 +4,8 @@ from typing import List, Dict, Any
 from docx import Document
 from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
 
+import logging
+logger = logging.getLogger("desktopenv.metric.docs")
 
 def find_default_font(config_file_path, rules):
     """Find the default font in LibreOffice Writer."""
@@ -23,7 +25,7 @@ def find_default_font(config_file_path, rules):
                 for value in prop.findall('value', namespace):
                     default_font = value.text
     except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
 
     return 1 if default_font == expected_font else 0
 
@@ -192,4 +194,4 @@ def compare_contains_image(docx_file1, docx_file2):
 # print(result)
 
 # config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
-# print(find_default_font("Ani", config_path))
\ No newline at end of file
+# print(find_default_font("Ani", config_path))
diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py
index d0776eb..37a3bc0 100644
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -18,13 +18,13 @@ def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
         float
     """
 
-    expect_metrics = [False] * len(rules["except"])
+    expect_metrics = [False] * len(rules.get("expect", []))
     unexpect_metric = True
     with open(result) as f:
         reader = csv.DictReader(f)
         
         for rcd in reader:
-            for i, r in enumerate(rules["expect"]):
+            for i, r in enumerate(rules.get("expect", [])):
                 expect_metrics[i] = expect_metrics[i] or _match_record(r, rcd)
-            unexpect_metric = unexpect_metric and all(_match_record(r, rcd) for r in rules["unexpect"])
+            unexpect_metric = unexpect_metric and all(_match_record(r, rcd) for r in rules.get("unexpect", []))
     return float(all(expect_metrics) and unexpect_metric)
diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py
index 4cca1d7..34ef0b0 100644
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -10,6 +10,8 @@ from typing import Dict, List
 from typing import Any, Union
 from numbers import Number
 
+import logging
+logger = logging.getLogger("desktopenv.metric.table")
 
 def compare_table(actual: str, expected: str, **options) -> float:
     """
@@ -32,7 +34,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
     df1 = pd.read_excel(expected)
     df2 = pd.read_excel(actual)
     metric: bool = df1.equals(df2)
-    print("Normal Contents Metric: {:}".format(metric))
+    logger.debug("Normal Contents Metric: {:}".format(metric))
 
     features: List[str] = options.get("features", [])
     for ftr in features:
@@ -43,12 +45,12 @@ def compare_table(actual: str, expected: str, **options) -> float:
             sp1 = load_sparklines(actual)
             sp2 = load_sparklines(expected)
             new_metric: bool = sp1 == sp2
-            print("Sparkline Metric: {:}".format(new_metric))
+            logger.debug("Sparkline Metric: {:}".format(new_metric))
         elif ftr=="chart":
             charts1 = load_charts(workbook1, **options)
             charts2 = load_charts(workbook2, **options)
             new_metric: bool = charts1 == charts2
-            print("Chart Metric: {:}".format(new_metric))
+            logger.debug("Chart Metric: {:}".format(new_metric))
         elif ftr=="number_format":
             number_formats1: List[str] = [ c.number_format.lower()\
                                            for col in workbook1.active.iter_cols()\
@@ -61,7 +63,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
                                             if c.data_type=="n"
                                          ]
             new_metric: bool = number_formats1==number_formats2
-            print("Number Format Metric: {:}".format(new_metric))
+            logger.debug("Number Format Metric: {:}".format(new_metric))
         else:
             raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
         metric = metric and new_metric
@@ -79,7 +81,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
             expected_name: str = worksheet_names[r["sheet_idx"]]
             actual_name: str = r["sheet_name"]
             metric: bool = expected_name == actual_name
-            print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
+            logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
             passes = passes and metric
         elif r["type"] == "sheet_data":
             if isinstance(r["sheet_idx0"], int):
@@ -99,7 +101,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
                 sheet_idx: int = int(sheet_idx)
                 df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
             metric: bool = df1.equals(df2)
-            print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
+            logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
             passes = passes and metric
         else:
             raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py
index b826d87..93bc778 100644
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -11,6 +11,9 @@ from openpyxl.chart._chart import ChartBase
 from typing import Dict, List, Set
 from typing import Any
 
+import logging
+logger = logging.getLogger("desktopenv.metrics.utils")
+
 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
                    , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
                    ]
diff --git a/desktop_env/evaluators/metrics/vlc.py b/desktop_env/evaluators/metrics/vlc.py
index d3083c2..2f5a9ff 100644
--- a/desktop_env/evaluators/metrics/vlc.py
+++ b/desktop_env/evaluators/metrics/vlc.py
@@ -5,6 +5,9 @@ from xml.etree import ElementTree
 import pygetwindow as gw
 import pyautogui
 
+import logging
+logger = logging.getLogger("desktopenv.metrics.vlc")
+
 def read_vlc_config(setting_name):
     """
     Reads the VLC configuration file to check for a specific setting.
@@ -24,7 +27,7 @@ def read_vlc_config(setting_name):
     config_path = paths.get(os_type)
 
     if not config_path or not os.path.exists(config_path):
-        print("VLC config file not found for this operating system.")
+        logger.warning("VLC config file not found for this operating system.")
         return None
 
     try:
@@ -33,7 +36,7 @@ def read_vlc_config(setting_name):
                 if line.startswith(setting_name):
                     return line.strip()
     except IOError as e:
-        print(f"Error reading config file: {e}")
+        logger.error(f"Error reading config file: {e}")
 
     return None
 
@@ -53,7 +56,7 @@ def get_vlc_playing_info(host='localhost', port=8080, password='password'):
                 return status, file_info
             return status, None
     except Exception as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
 
     return None, None
 
@@ -78,10 +81,10 @@ def is_vlc_fullscreen():
 
     except IndexError:
         # VLC window not found
-        print("VLC window not found.")
+        logger.error("VLC window not found.")
         return False
     except Exception as e:
-        print(f"An error occurred: {e}")
+        logger.error(f"An error occurred: {e}")
         return False
 
 
diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index e565787..26fc2a4 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -16,17 +16,19 @@ app = Flask(__name__)
 pyautogui.PAUSE = 0
 pyautogui.DARWIN_CATCH_UP_TIME = 0
 
+logger = app.logger
 
 @app.route('/setup/execute', methods=['POST'])
 @app.route('/execute', methods=['POST'])
 def execute_command():
     data = request.json
     # The 'command' key in the JSON request should contain the command to be executed.
-    command = data.get('command', [])
+    shell = data.get('shell', False)
+    command = data.get('command', "" if shell else [])
 
     # Execute the command without any safety checks.
     try:
-        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True)
         return jsonify({
             'status': 'success',
             'output': result.stdout,
@@ -94,7 +96,7 @@ def capture_screen_with_cursor():
         # Use the screencapture utility to capture the screen with the cursor
         subprocess.run(["screencapture", "-C", file_path])
     else:
-        print(f"The platform you're using ({user_platform}) is not currently supported")
+        logger.warning(f"The platform you're using ({user_platform}) is not currently supported")
 
     return send_file(file_path, mimetype='image/png')
 
@@ -190,7 +192,7 @@ def download_file():
             return "File downloaded successfully"
 
         except requests.RequestException as e:
-            print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
+            logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
 
     return f"Failed to download {url}. No retries left. Error: {e}", 500
 
diff --git a/evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json b/evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
index 21adb0d..449fa26 100644
--- a/evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
+++ b/evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
@@ -25,7 +25,7 @@
           "-f",
           "/home/david/thunderbird-profile.tar.gz",
           "-C",
-          "$HOME/"
+          "/home/david/"
         ]
       }
     },
@@ -61,7 +61,7 @@
           "command": [
             "python3",
             "/home/david/firefox_decrypt.py",
-            "$HOME/.thunderbird",
+            "/home/david/.thunderbird",
             "-n",
             "-c",
             "2",
@@ -80,6 +80,7 @@
       "path": "thunderbird-accounts.csv"
     },
     "expected": {
+      "type": "rule",
       "rules": {
         "expect": [
           {
diff --git a/evaluation_examples/examples/thunderbird/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json b/evaluation_examples/examples/thunderbird/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json
index e659ede..7e977de 100644
--- a/evaluation_examples/examples/thunderbird/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json
+++ b/evaluation_examples/examples/thunderbird/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json
@@ -25,7 +25,7 @@
           "-f",
           "/home/david/thunderbird-profile.tar.gz",
           "-C",
-          "$HOME/"
+          "/home/david/"
         ]
       }
     },
@@ -61,7 +61,7 @@
           "command": [
             "python3",
             "/home/david/firefox_decrypt.py",
-            "$HOME/.thunderbird",
+            "/home/david/.thunderbird",
             "-n",
             "-c",
             "2",
@@ -80,6 +80,7 @@
       "path": "thunderbird-accounts.csv"
     },
     "expected": {
+      "type": "rule",
       "rules": {
         "unexpect": [
           {
diff --git a/main.py b/main.py
index 806ca28..cebb934 100644
--- a/main.py
+++ b/main.py
@@ -1,21 +1,57 @@
 import json
 from desktop_env.envs.desktop_env import DesktopEnv
 
+import logging
+import os
+import sys
+import datetime
+
+#  Logger Configs {{{ # 
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+#  }}} Logger Configs # 
+
+logger = logging.getLogger("desktopenv.main")
 
 def human_agent():
     """
     Runs the Gym environment with human input.
     """
 
-    with open("evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json", "r") as f:
+    with open("evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json", "r") as f:
         example = json.load(f)
-    example["snapshot"] = "Snapshot 10"
+    example["snapshot"] = "Snapshot 13"
 
     env = DesktopEnv( path_to_vm="/home/david/vmware/KUbuntu 64-bit/KUbuntu 64-bit.vmx"
                     , action_space="computer_13"
                     , task_config=example
                     )
-
     # reset the environment to certain snapshot
     observation = env.reset()
     done = False
@@ -38,24 +74,26 @@ def human_agent():
         #     "action_type": 0,
         #     "click_type": 3,
         # }
-        print(trajectory[i])
+        logger.info(trajectory[i])
 
         observation, reward, done, info = env.step(trajectory[i], pause=5)
-        print("Observation:", observation)
-        print("Reward:", reward)
-        print("Info:", info)
+        logger.info("Observation: %s", observation)
+        logger.info("Reward: %.2f", reward)
+        logger.info("Info: %s", info)
 
-        print("================================\n")
+        logger.info("================================\n")
 
         if done:
-            print("The episode is done.")
+            logger.info("The episode is done.")
             break
 
     result = env.evaluate()
-    print("Result:", result)
+    logger.info("Result: %.2f", result)
+
+    #input("PAUSING")
 
     env.close()
-    print("Environment closed.")
+    logger.info("Environment closed.")
 
 
 if __name__ == "__main__":