Merge branch 'zdy'

This commit is contained in:
David Chang
2024-01-05 15:55:41 +08:00
19 changed files with 522 additions and 180 deletions

1
.gitignore vendored
View File

@@ -171,3 +171,4 @@ tags-opts
snapshots snapshots
branch_flag branch_flag
branch-config branch-config
*.syncthing.*.tmp

View File

@@ -3,6 +3,8 @@ from typing import Any, Dict
import requests import requests
from desktop_env.envs.actions import KEYBOARD_KEYS from desktop_env.envs.actions import KEYBOARD_KEYS
import logging
logger = logging.getLogger("desktopenv.pycontroller")
class PythonController: class PythonController:
def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""): def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
@@ -17,7 +19,7 @@ class PythonController:
if response.status_code == 200: if response.status_code == 200:
return response.content return response.content
else: else:
print("Failed to get screenshot. Status code:", response.status_code) logger.error("Failed to get screenshot. Status code: %d", response.status_code)
return None return None
def get_file(self, file_path: str): def get_file(self, file_path: str):
@@ -26,10 +28,10 @@ class PythonController:
""" """
response = requests.post(self.http_server + "/file", data={"file_path": file_path}) response = requests.post(self.http_server + "/file", data={"file_path": file_path})
if response.status_code == 200: if response.status_code == 200:
print("File downloaded successfully") logger.info("File downloaded successfully")
return response.content return response.content
else: else:
print("Failed to get file. Status code:", response.status_code) logger.error("Failed to get file. Status code: %d", response.status_code)
return None return None
def execute_python_command(self, command: str) -> None: def execute_python_command(self, command: str) -> None:
@@ -38,7 +40,7 @@ class PythonController:
It can be used to execute the pyautogui commands, or... any other python command. who knows? It can be used to execute the pyautogui commands, or... any other python command. who knows?
""" """
command = self.pkgs_prefix.format(command=command) command = self.pkgs_prefix.format(command=command)
payload = json.dumps({"command": command}) payload = json.dumps({"command": command, "shell": True})
headers = { headers = {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
} }
@@ -46,12 +48,12 @@ class PythonController:
try: try:
response = requests.post(self.http_server + "/execute", headers=headers, data=payload) response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
if response.status_code == 200: if response.status_code == 200:
print("Command executed successfully:", response.text) logger.info("Command executed successfully: %s", response.text)
else: else:
print("Failed to execute command. Status code:", response.status_code) logger.error("Failed to execute command. Status code: %d", response.status_code)
return response.json() return response.json()
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print("An error occurred while trying to execute the command:", e) logger.error("An error occurred while trying to execute the command: %s", e)
def execute_action(self, action: Dict[str, Any]): def execute_action(self, action: Dict[str, Any]):
""" """

View File

@@ -6,8 +6,12 @@ import uuid
import os.path import os.path
from typing import Dict, List from typing import Dict, List
from typing import Any from typing import Any, Union
import logging
logger = logging.getLogger("desktopenv.setup")
import traceback
class SetupController: class SetupController:
def __init__(self, http_server: str, cache_dir: str): def __init__(self, http_server: str, cache_dir: str):
@@ -47,31 +51,32 @@ class SetupController:
# self._open_setup(config) # self._open_setup(config)
# can add other setup steps # can add other setup steps
def _command_setup(self, command: str): # ZDY_COMMENT: merged with launch
""" #def _command_setup(self, command: str):
Directly send a command into the virtual machine os for setting up. #"""
""" #Directly send a command into the virtual machine os for setting up.
payload = json.dumps({"command": command}) #"""
headers = { #payload = json.dumps({"command": command})
'Content-Type': 'application/json' #headers = {
} #'Content-Type': 'application/json'
timeout = 5 #}
timout_whitelist = ["vlc"] #timeout = 5
#timout_whitelist = ["vlc"]
try: #
#try:
response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout) #
if response.status_code == 200: #response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
print("Command executed successfully:", response.text) #if response.status_code == 200:
else: #print("Command executed successfully:", response.text)
print("Failed to execute command. Status code:", response.status_code) #else:
except requests.exceptions.Timeout as e: #print("Failed to execute command. Status code:", response.status_code)
if command in timout_whitelist: #except requests.exceptions.Timeout as e:
print("Command executed successfully:", command) #if command in timout_whitelist:
else: #print("Command executed successfully:", command)
print("An error occurred while trying to execute the command:", e) #else:
except requests.exceptions.RequestException as e: #print("An error occurred while trying to execute the command:", e)
print("An error occurred while trying to execute the command:", e) #except requests.exceptions.RequestException as e:
#print("An error occurred while trying to execute the command:", e)
def _download_setup(self, files: List[Dict[str, str]]): def _download_setup(self, files: List[Dict[str, str]]):
""" """
@@ -110,12 +115,12 @@ class SetupController:
for chunk in response.iter_content(chunk_size=8192): for chunk in response.iter_content(chunk_size=8192):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
print("File downloaded successfully") logger.info("File downloaded successfully")
downloaded = True downloaded = True
break break
except requests.RequestException as e: except requests.RequestException as e:
print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)") logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
if not downloaded: if not downloaded:
raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}") raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
@@ -129,17 +134,18 @@ class SetupController:
"file_data": (os.path.basename(path), open(cache_path, "rb")) "file_data": (os.path.basename(path), open(cache_path, "rb"))
}) })
headers = {"Content-Type": form.content_type} headers = {"Content-Type": form.content_type}
print(form.content_type) logger.debug(form.content_type)
# send request to server to upload file # send request to server to upload file
try: try:
logger.debug("REQUEST ADDRESS: %s", self.http_server_setup_root + "/upload")
response = requests.post(self.http_server_setup_root + "/upload", headers=headers, data=form) response = requests.post(self.http_server_setup_root + "/upload", headers=headers, data=form)
if response.status_code == 200: if response.status_code == 200:
print("Command executed successfully:", response.text) logger.info("Command executed successfully: %s", response.text)
else: else:
print("Failed to upload file. Status code:", response.text) logger.error("Failed to upload file. Status code: %s", response.text)
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print("An error occurred while trying to send the request:", e) logger.error("An error occurred while trying to send the request: %s", e)
def _change_wallpaper_setup(self, path: str): def _change_wallpaper_setup(self, path: str):
# if not config: # if not config:
@@ -160,14 +166,14 @@ class SetupController:
try: try:
response = requests.post(self.http_server_setup_root + "/change_wallpaper", headers=headers, data=payload) response = requests.post(self.http_server_setup_root + "/change_wallpaper", headers=headers, data=payload)
if response.status_code == 200: if response.status_code == 200:
print("Command executed successfully:", response.text) logger.info("Command executed successfully: %s", response.text)
else: else:
print("Failed to change wallpaper. Status code:", response.text) logger.error("Failed to change wallpaper. Status code: %s", response.text)
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print("An error occurred while trying to send the request:", e) logger.error("An error occurred while trying to send the request: %s", e)
def _tidy_desktop_setup(self, **config): def _tidy_desktop_setup(self, **config):
raise NotImplementedError raise NotImplementedError()
def _open_setup(self, path: str): def _open_setup(self, path: str):
# if not config: # if not config:
@@ -187,8 +193,63 @@ class SetupController:
try: try:
response = requests.post(self.http_server_setup_root + "/open_file", headers=headers, data=payload) response = requests.post(self.http_server_setup_root + "/open_file", headers=headers, data=payload)
if response.status_code == 200: if response.status_code == 200:
print("Command executed successfully:", response.text) logger.info("Command executed successfully: %s", response.text)
else: else:
print("Failed to open file. Status code:", response.text) logger.error("Failed to open file. Status code: %s", response.text)
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print("An error occurred while trying to send the request:", e) logger.error("An error occurred while trying to send the request: %s", e)
def _launch_setup(self, command: List[str]):
if not command:
raise Exception("Empty comman to launch.")
payload = json.dumps({"command": command})
headers = {"Content-Type": "application/json"}
try:
response = requests.post(self.http_server_setup_root + "/launch", headers=headers, data=payload)
if response.status_code == 200:
logger.info("Command executed successfully: %s", response.text)
else:
logger.error("Failed to launch application. Status code: %s", response.text)
except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e)
def _execute_setup(self, command: List[str], stdout: str = "", stderr: str = ""):
if not command:
raise Exception("Empty comman to launch.")
payload = json.dumps({"command": command})
headers = {"Content-Type": "application/json"}
try:
response = requests.post(self.http_server_setup_root + "/execute", headers=headers, data=payload)
if response.status_code == 200:
results: Dict[str, str] = response.json()
if stdout:
with open(os.path.join(self.cache_dir, stdout), "w") as f:
f.write(results["output"])
if stderr:
with open(os.path.join(self.cache_dir, stderr), "w") as f:
f.write(results["error"])
logger.info( "Command executed successfully: %s -> %s"
, " ".join(command)
, response.text
)
else:
logger.error("Failed to launch application. Status code: %s", response.text)
except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e)
traceback.print_exc()
def _act_setup(self, action_seq: List[Union[Dict[str, Any], str]]):
# TODO
raise NotImplementedError()
def _replay_setup(self, trajectory: str):
"""
Args:
trajectory (str): path to the replay trajectory file
"""
# TODO
raise NotImplementedError()

View File

@@ -17,10 +17,12 @@ from desktop_env.controllers.setup import SetupController
# from desktop_env.evaluators import eval_funcs # from desktop_env.evaluators import eval_funcs
from desktop_env.evaluators import metrics, getters from desktop_env.evaluators import metrics, getters
import logging
logger = logging.getLogger("desktopenv.env")
Metric = Callable[[Any, Any], float] Metric = Callable[[Any, Any], float]
Getter = Callable[[gym.Env, Dict[str, Any]], Any] Getter = Callable[[gym.Env, Dict[str, Any]], Any]
def _execute_command(command: List[str]) -> None: def _execute_command(command: List[str]) -> None:
if command[:4] == ["vmrun", "-T", "ws", "start"]: if command[:4] == ["vmrun", "-T", "ws", "start"]:
p = subprocess.Popen(command) p = subprocess.Popen(command)
@@ -68,22 +70,11 @@ class DesktopEnv(gym.Env):
self.cache_dir_base: str = cache_dir self.cache_dir_base: str = cache_dir
# task-aware stuffs # task-aware stuffs
self.snapshot_path = task_config["snapshot"] # todo: handling the logic of snapshot directory # todo: handling the logic of snapshot directory
self.task_id: str = task_config["id"] self._set_task_info(task_config)
self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
os.makedirs(self.cache_dir, exist_ok=True)
self.instruction = task_config["instruction"]
self.config = task_config["config"]
self.evaluator = task_config["evaluator"]
self.metric: Metric = getattr(metrics, self.evaluator["func"])
self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
self.expected_getter: Getter = getattr(getters, "get_{:}".format(
self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
# Initialize emulator and controller # Initialize emulator and controller
print("Initializing...") logger.info("Initializing...")
self._start_emulator() self._start_emulator()
self.vm_ip = self._get_vm_ip() self.vm_ip = self._get_vm_ip()
self.host = f"http://{self.vm_ip}:5000" self.host = f"http://{self.vm_ip}:5000"
@@ -110,26 +101,26 @@ class DesktopEnv(gym.Env):
output: List[str] = output.splitlines() output: List[str] = output.splitlines()
# if self.path_to_vm.lstrip("~/") in output: # if self.path_to_vm.lstrip("~/") in output:
if self.path_to_vm in output: if self.path_to_vm in output:
print("VM is running.") logger.info("VM is running.")
break break
else: else:
print("Starting VM...") logger.info("Starting VM...")
_execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm]) _execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
time.sleep(3) time.sleep(3)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print(f"Error executing command: {e.output.decode().strip()}") logger.error(f"Error executing command: {e.output.decode().strip()}")
def _get_vm_ip(self): def _get_vm_ip(self):
max_retries = 10 max_retries = 10
print("Getting IP Address...") logger.info("Getting IP Address...")
for _ in range(max_retries): for _ in range(max_retries):
try: try:
output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip() output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip()
print(f"IP address: {output}") logger.info(f"IP address: {output}")
return output return output
except: except:
time.sleep(5) time.sleep(5)
print("Retrying...") logger.info("Retrying...")
raise Exception("Failed to get VM IP address!") raise Exception("Failed to get VM IP address!")
def _save_state(self): def _save_state(self):
@@ -152,52 +143,54 @@ class DesktopEnv(gym.Env):
screenshot_image_path = self._get_screenshot() screenshot_image_path = self._get_screenshot()
return screenshot_image_path return screenshot_image_path
def _set_task_info(self, task_config: Dict[str, Any]):
self.snapshot_path = task_config["snapshot"]
self.task_id: str = task_config["id"]
self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
os.makedirs(self.cache_dir, exist_ok=True)
self.instruction = task_config["instruction"]
self.config = task_config["config"]
self.evaluator = task_config["evaluator"]
self.metric: Metric = getattr(metrics, self.evaluator["func"])
self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
self.expected_getter: Getter = getattr(getters, "get_{:}".format(
self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None): def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None):
print("Resetting environment...") logger.info("Resetting environment...")
print("Switching task...") logger.info("Switching task...")
if task_config is not None: if task_config is not None:
self.snapshot_path = task_config["snapshot"] self._set_task_info(task_config)
self.task_id = task_config["id"]
self.cache_dir = os.path.join(self.cache_dir_base, self.task_id)
os.makedirs(self.cache_dir, exist_ok=True)
self.instruction = task_config["instruction"]
self.config = task_config["config"]
self.evaluator = task_config["evaluator"]
self.metric: Metric = getattr(metrics, self.evaluator["func"])
self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
self.expected_getter: Getter = getattr(getters, "get_{:}".format(
self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
self.metric_options = self.evaluator.get("options", {})
self.setup_controller.reset_cache_dir(self.cache_dir) self.setup_controller.reset_cache_dir(self.cache_dir)
print("Setting counters...") logger.info("Setting counters...")
self._traj_no += 1 self._traj_no += 1
self._step_no = 0 self._step_no = 0
self.action_history.clear() self.action_history.clear()
print("Setup new temp dir...") logger.info("Setup new temp dir...")
self.tmp_dir = tempfile.mkdtemp( self.tmp_dir = tempfile.mkdtemp(
prefix="{:d}.{:}.".format(self._traj_no, self.task_id), prefix="{:d}.{:}.".format(self._traj_no, self.task_id),
dir=self.tmp_dir_base dir=self.tmp_dir_base
) )
os.makedirs(os.path.join(self.tmp_dir, "screenshots")) os.makedirs(os.path.join(self.tmp_dir, "screenshots"))
print("Reverting to snapshot to {}...".format(self.snapshot_path)) logger.info("Reverting to snapshot to {}...".format(self.snapshot_path))
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path]) _execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
time.sleep(5) time.sleep(5)
print("Starting emulator...") logger.info("Starting emulator...")
self._start_emulator() self._start_emulator()
print("Emulator started.") logger.info("Emulator started.")
print("Setting up environment...") logger.info("Setting up environment...")
self.setup_controller.setup(self.config) self.setup_controller.setup(self.config)
time.sleep(5) time.sleep(5)
print("Environment setup complete.") logger.info("Environment setup complete.")
observation = self._get_obs() observation = self._get_obs()
return observation return observation
@@ -229,6 +222,9 @@ class DesktopEnv(gym.Env):
""" """
Evaluate whether the task is successfully completed. Evaluate whether the task is successfully completed.
""" """
self.setup_controller.setup(self.evaluator["postconfig"])
result_state = self.result_getter(self, self.evaluator["result"]) result_state = self.result_getter(self, self.evaluator["result"])
expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \ expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
else None else None

View File

@@ -1,3 +1,3 @@
from .file import get_cloud_file, get_vm_file from .file import get_cloud_file, get_vm_file, get_cache_file
from .misc import get_rule from .misc import get_rule
from .vlc import get_vlc_playing_info from .vlc import get_vlc_playing_info

View File

@@ -41,3 +41,13 @@ def get_vm_file(env, config: Dict[str, str]) -> str:
f.write(file) f.write(file)
return _path return _path
def get_cache_file(env, config: Dict[str, str]) -> str:
"""
Config:
path (str): relative path in cache dir
"""
_path = os.path.join(env.cache_dir, config["path"])
assert os.path.exists(_path)
return _path

View File

@@ -1,8 +1,66 @@
from typing import TypeVar from typing import TypeVar
import platform
import subprocess
import ctypes
import os
import logging
logger = logging.getLogger("desktopenv.getters.misc")
R = TypeVar("Rule") R = TypeVar("Rule")
def get_rule(env, config: R) -> R: def get_rule(env, config: R) -> R:
""" """
Returns the rule as-is. Returns the rule as-is.
""" """
return config["rules"] return config["rules"]
def get_desktop_path():
username = os.getlogin() # Get the current username
if platform.system() == "Windows":
return os.path.join("C:", "Users", username, "Desktop")
elif platform.system() == "Darwin": # macOS is identified as 'Darwin'
return os.path.join("/Users", username, "Desktop")
elif platform.system() == "Linux":
return os.path.join("/home", username, "Desktop")
else:
raise Exception("Unsupported operating system")
def get_wallpaper():
def get_wallpaper_windows():
SPI_GETDESKWALLPAPER = 0x73
MAX_PATH = 260
buffer = ctypes.create_unicode_buffer(MAX_PATH)
ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
return buffer.value
def get_wallpaper_macos():
script = """
tell application "System Events" to tell every desktop to get picture
"""
process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE)
output, error = process.communicate()
if error:
logger.error("Error: %s", error)
else:
return output.strip().decode('utf-8')
def get_wallpaper_linux():
try:
output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"])
return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
except Exception as e:
logger.error("Error: %s", e)
return None
os_name = platform.system()
if os_name == 'Windows':
return get_wallpaper_windows()
elif os_name == 'Darwin':
return get_wallpaper_macos()
elif os_name == 'Linux':
return get_wallpaper_linux()
else:
return "Unsupported OS"

View File

@@ -6,3 +6,4 @@ from .docs import is_first_line_centered, check_file_exists, compare_contains_im
from .pdf import check_pdf_pages from .pdf import check_pdf_pages
from .libreoffice import check_libre_locale from .libreoffice import check_libre_locale
from .vlc import is_vlc_playing from .vlc import is_vlc_playing
from .general import check_csv

View File

@@ -5,6 +5,9 @@ import sqlite3
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
import logging
logger = logging.getLogger("desktopenv.metrics.chrome")
""" """
WARNING: WARNING:
1. Functions from this script assume that no account is registered on Chrome, otherwise the default file path needs to be changed. 1. Functions from this script assume that no account is registered on Chrome, otherwise the default file path needs to be changed.
@@ -36,7 +39,7 @@ def get_default_search_engine():
'Google') 'Google')
return search_engine return search_engine
except Exception as e: except Exception as e:
print(f"Error: {e}") logger.error(f"Error: {e}")
return "Google" return "Google"
@@ -61,7 +64,7 @@ def get_cookie_data():
return cookies return cookies
except Exception as e: except Exception as e:
print(f"Error: {e}") logger.error(f"Error: {e}")
return None return None
@@ -85,7 +88,7 @@ def get_bookmarks():
return bookmarks return bookmarks
except Exception as e: except Exception as e:
print(f"Error: {e}") logger.error(f"Error: {e}")
return None return None
@@ -117,7 +120,7 @@ def get_extensions_installed_from_shop():
manifest = json.load(file) manifest = json.load(file)
manifests.append(manifest) manifests.append(manifest)
except json.JSONDecodeError: except json.JSONDecodeError:
print(f"Error reading {manifest_path}") logger.error(f"Error reading {manifest_path}")
return manifests return manifests

View File

@@ -4,6 +4,8 @@ from typing import List, Dict, Any
from docx import Document from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
import logging
logger = logging.getLogger("desktopenv.metric.docs")
def find_default_font(config_file_path, rules): def find_default_font(config_file_path, rules):
"""Find the default font in LibreOffice Writer.""" """Find the default font in LibreOffice Writer."""
@@ -23,7 +25,7 @@ def find_default_font(config_file_path, rules):
for value in prop.findall('value', namespace): for value in prop.findall('value', namespace):
default_font = value.text default_font = value.text
except Exception as e: except Exception as e:
print(f"Error: {e}") logger.error(f"Error: {e}")
return 1 if default_font == expected_font else 0 return 1 if default_font == expected_font else 0
@@ -192,4 +194,4 @@ def compare_contains_image(docx_file1, docx_file2):
# print(result) # print(result)
# config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu" # config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
# print(find_default_font("Ani", config_path)) # print(find_default_font("Ani", config_path))

View File

@@ -1,57 +1,30 @@
import platform import csv
import subprocess from typing import Dict, List
import ctypes
import os
def _match_record(pattern: Dict[str, str], item: Dict[str, str]) -> float:
return all(k in item and item[k]==val for k, val in pattern.items())
# todo: move to getter module def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
def get_desktop_path(): """
username = os.getlogin() # Get the current username Args:
if platform.system() == "Windows": result (str): path to csv file
return os.path.join("C:", "Users", username, "Desktop") rules (Dict[str, List[Dict[str, str]]]): dict like
elif platform.system() == "Darwin": # macOS is identified as 'Darwin' {
return os.path.join("/Users", username, "Desktop") "expect": [{key: value}]
elif platform.system() == "Linux": "unexpect": [{key: value}]
return os.path.join("/home", username, "Desktop") }
else:
raise Exception("Unsupported operating system")
def get_wallpaper():
def get_wallpaper_windows():
SPI_GETDESKWALLPAPER = 0x73
MAX_PATH = 260
buffer = ctypes.create_unicode_buffer(MAX_PATH)
ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
return buffer.value
def get_wallpaper_macos():
script = """
tell application "System Events" to tell every desktop to get picture
"""
process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE)
output, error = process.communicate()
if error:
print("Error:", error)
else:
return output.strip().decode('utf-8')
def get_wallpaper_linux():
try:
output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"])
return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
except Exception as e:
print("Error:", e)
return None
os_name = platform.system()
if os_name == 'Windows':
return get_wallpaper_windows()
elif os_name == 'Darwin':
return get_wallpaper_macos()
elif os_name == 'Linux':
return get_wallpaper_linux()
else:
return "Unsupported OS"
Returns:
float
"""
expect_metrics = [False] * len(rules.get("expect", []))
unexpect_metric = True
with open(result) as f:
reader = csv.DictReader(f)
for rcd in reader:
for i, r in enumerate(rules.get("expect", [])):
expect_metrics[i] = expect_metrics[i] or _match_record(r, rcd)
unexpect_metric = unexpect_metric and all(_match_record(r, rcd) for r in rules.get("unexpect", []))
return float(all(expect_metrics) and unexpect_metric)

View File

@@ -10,6 +10,8 @@ from typing import Dict, List
from typing import Any, Union from typing import Any, Union
from numbers import Number from numbers import Number
import logging
logger = logging.getLogger("desktopenv.metric.table")
def compare_table(actual: str, expected: str, **options) -> float: def compare_table(actual: str, expected: str, **options) -> float:
""" """
@@ -32,7 +34,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
df1 = pd.read_excel(expected) df1 = pd.read_excel(expected)
df2 = pd.read_excel(actual) df2 = pd.read_excel(actual)
metric: bool = df1.equals(df2) metric: bool = df1.equals(df2)
print("Normal Contents Metric: {:}".format(metric)) logger.debug("Normal Contents Metric: {:}".format(metric))
features: List[str] = options.get("features", []) features: List[str] = options.get("features", [])
for ftr in features: for ftr in features:
@@ -43,12 +45,12 @@ def compare_table(actual: str, expected: str, **options) -> float:
sp1 = load_sparklines(actual) sp1 = load_sparklines(actual)
sp2 = load_sparklines(expected) sp2 = load_sparklines(expected)
new_metric: bool = sp1 == sp2 new_metric: bool = sp1 == sp2
print("Sparkline Metric: {:}".format(new_metric)) logger.debug("Sparkline Metric: {:}".format(new_metric))
elif ftr=="chart": elif ftr=="chart":
charts1 = load_charts(workbook1, **options) charts1 = load_charts(workbook1, **options)
charts2 = load_charts(workbook2, **options) charts2 = load_charts(workbook2, **options)
new_metric: bool = charts1 == charts2 new_metric: bool = charts1 == charts2
print("Chart Metric: {:}".format(new_metric)) logger.debug("Chart Metric: {:}".format(new_metric))
elif ftr=="number_format": elif ftr=="number_format":
number_formats1: List[str] = [ c.number_format.lower()\ number_formats1: List[str] = [ c.number_format.lower()\
for col in workbook1.active.iter_cols()\ for col in workbook1.active.iter_cols()\
@@ -61,7 +63,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
if c.data_type=="n" if c.data_type=="n"
] ]
new_metric: bool = number_formats1==number_formats2 new_metric: bool = number_formats1==number_formats2
print("Number Format Metric: {:}".format(new_metric)) logger.debug("Number Format Metric: {:}".format(new_metric))
else: else:
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr)) raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
metric = metric and new_metric metric = metric and new_metric
@@ -79,7 +81,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
expected_name: str = worksheet_names[r["sheet_idx"]] expected_name: str = worksheet_names[r["sheet_idx"]]
actual_name: str = r["sheet_name"] actual_name: str = r["sheet_name"]
metric: bool = expected_name == actual_name metric: bool = expected_name == actual_name
print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric)) logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
passes = passes and metric passes = passes and metric
elif r["type"] == "sheet_data": elif r["type"] == "sheet_data":
if isinstance(r["sheet_idx0"], int): if isinstance(r["sheet_idx0"], int):
@@ -99,7 +101,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
sheet_idx: int = int(sheet_idx) sheet_idx: int = int(sheet_idx)
df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx) df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
metric: bool = df1.equals(df2) metric: bool = df1.equals(df2)
print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric)) logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
passes = passes and metric passes = passes and metric
else: else:
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"])) raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))

View File

@@ -11,6 +11,9 @@ from openpyxl.chart._chart import ChartBase
from typing import Dict, List, Set from typing import Dict, List, Set
from typing import Any from typing import Any
import logging
logger = logging.getLogger("desktopenv.metrics.utils")
_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main") , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
] ]

View File

@@ -5,6 +5,8 @@ import pygetwindow as gw
import pyautogui import pyautogui
from typing import Dict from typing import Dict
import logging
logger = logging.getLogger("desktopenv.metrics.vlc")
def get_vlc_config(setting_name): def get_vlc_config(setting_name):
""" """
@@ -25,7 +27,7 @@ def get_vlc_config(setting_name):
config_path = paths.get(os_type) config_path = paths.get(os_type)
if not config_path or not os.path.exists(config_path): if not config_path or not os.path.exists(config_path):
print("VLC config file not found for this operating system.") logger.warning("VLC config file not found for this operating system.")
return None return None
try: try:
@@ -34,7 +36,7 @@ def get_vlc_config(setting_name):
if line.startswith(setting_name): if line.startswith(setting_name):
return line.strip() return line.strip()
except IOError as e: except IOError as e:
print(f"Error reading config file: {e}") logger.error(f"Error reading config file: {e}")
return None return None
@@ -77,8 +79,8 @@ def is_vlc_fullscreen():
except IndexError: except IndexError:
# VLC window not found # VLC window not found
print("VLC window not found.") logger.error("VLC window not found.")
return False return False
except Exception as e: except Exception as e:
print(f"An error occurred: {e}") logger.error(f"An error occurred: {e}")
return False return False

View File

@@ -9,22 +9,26 @@ import pyautogui
# from PIL import ImageGrab, Image # from PIL import ImageGrab, Image
from PIL import Image from PIL import Image
from flask import Flask, request, jsonify, send_file from flask import Flask, request, jsonify, send_file
from typing import List
app = Flask(__name__) app = Flask(__name__)
pyautogui.PAUSE = 0 pyautogui.PAUSE = 0
pyautogui.DARWIN_CATCH_UP_TIME = 0 pyautogui.DARWIN_CATCH_UP_TIME = 0
logger = app.logger
@app.route('/setup/execute', methods=['POST'])
@app.route('/execute', methods=['POST']) @app.route('/execute', methods=['POST'])
def execute_command(): def execute_command():
data = request.json data = request.json
# The 'command' key in the JSON request should contain the command to be executed. # The 'command' key in the JSON request should contain the command to be executed.
command = data.get('command', '') shell = data.get('shell', False)
command = data.get('command', "" if shell else [])
# Execute the command without any safety checks. # Execute the command without any safety checks.
try: try:
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True)
return jsonify({ return jsonify({
'status': 'success', 'status': 'success',
'output': result.stdout, 'output': result.stdout,
@@ -36,6 +40,21 @@ def execute_command():
'message': str(e) 'message': str(e)
}), 500 }), 500
@app.route('/setup/launch', methods=["POST"])
def launch_app():
data = request.json
command: List[str] = data.get("command", [])
try:
subprocess.Popen(command)
return "{:} launched successfully".format(" ".join(command))
except Exception as e:
return jsonify( { "status": "error"
, "message": str(e)
}
)\
, 500
@app.route('/screenshot', methods=['GET']) @app.route('/screenshot', methods=['GET'])
def capture_screen_with_cursor(): def capture_screen_with_cursor():
@@ -77,7 +96,7 @@ def capture_screen_with_cursor():
# Use the screencapture utility to capture the screen with the cursor # Use the screencapture utility to capture the screen with the cursor
subprocess.run(["screencapture", "-C", file_path]) subprocess.run(["screencapture", "-C", file_path])
else: else:
print(f"The platform you're using ({user_platform}) is not currently supported") logger.warning(f"The platform you're using ({user_platform}) is not currently supported")
return send_file(file_path, mimetype='image/png') return send_file(file_path, mimetype='image/png')
@@ -173,7 +192,7 @@ def download_file():
return "File downloaded successfully" return "File downloaded successfully"
except requests.RequestException as e: except requests.RequestException as e:
print(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)") logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
return f"Failed to download {url}. No retries left. Error: {e}", 500 return f"Failed to download {url}. No retries left. Error: {e}", 500

View File

@@ -3,10 +3,98 @@
"snapshot": "thunderbird", "snapshot": "thunderbird",
"instruction": "Help me access my gmail account with address \"xx@gmail.com\" and password \"xxx\"", "instruction": "Help me access my gmail account with address \"xx@gmail.com\" and password \"xxx\"",
"source": "https://www.wikihow.com/Access-Gmail-With-Mozilla-Thunderbird", "source": "https://www.wikihow.com/Access-Gmail-With-Mozilla-Thunderbird",
"config": [], "config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
"path": "/home/david/thunderbird-profile.tar.gz"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"-xzv",
"--recursive-unlink",
"-f",
"/home/david/thunderbird-profile.tar.gz",
"-C",
"/home/david/"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"/usr/bin/thunderbird"
]
}
}
],
"trajectory": "trajectories/", "trajectory": "trajectories/",
"related_apps": [ "related_apps": [
"thunderbird" "thunderbird"
], ],
"evaluator": "evaluation_dir" "evaluator": {
"postconfig": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://raw.githubusercontent.com/unode/firefox_decrypt/main/firefox_decrypt.py",
"path": "/home/david/firefox_decrypt.py"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"python3",
"/home/david/firefox_decrypt.py",
"/home/david/.thunderbird",
"-n",
"-c",
"2",
"-f",
"csv",
"-d",
","
],
"stdout": "thunderbird-accounts.csv"
}
}
],
"func": "check_csv",
"result": {
"type": "cache_file",
"path": "thunderbird-accounts.csv"
},
"expected": {
"type": "rule",
"rules": {
"expect": [
{
"url": "imap://imap.gmail.com",
"user": "xx@gmail.com",
"password": "xxx"
},
{
"url": "smtp://smtp.gmail.com",
"user": "xx@gmail.com",
"password": "xxx"
}
]
}
}
}
} }

View File

@@ -1,12 +1,94 @@
{ {
"id": "bb5e4c0d-f964-439c-97b6-bdb9747de3f4", "id": "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"snapshot": "thunderbird", "snapshot": "thunderbird",
"instruction": "Help remove the account \"xx@xx\"", "instruction": "Help me to remove the account \"anonym-x2024@outlook.com\"",
"source": "https://www.wikihow.com/Remove-an-Email-Account-from-Thunderbird", "source": "https://www.wikihow.com/Remove-an-Email-Account-from-Thunderbird",
"config": [], "config": [
"trajectory": "trajectories/", {
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
"path": "/home/david/thunderbird-profile.tar.gz"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"-xzv",
"--recursive-unlink",
"-f",
"/home/david/thunderbird-profile.tar.gz",
"-C",
"/home/david/"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"/usr/bin/thunderbird"
]
}
}
],
"trajectory": "trajectories/bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"related_apps": [ "related_apps": [
"thunderbird" "thunderbird"
], ],
"evaluator": "evaluation_dir" "evaluator": {
"postconfig": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://raw.githubusercontent.com/unode/firefox_decrypt/main/firefox_decrypt.py",
"path": "/home/david/firefox_decrypt.py"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"python3",
"/home/david/firefox_decrypt.py",
"/home/david/.thunderbird",
"-n",
"-c",
"2",
"-f",
"csv",
"-d",
","
],
"stdout": "thunderbird-accounts.csv"
}
}
],
"func": "check_csv",
"result": {
"type": "cache_file",
"path": "thunderbird-accounts.csv"
},
"expected": {
"type": "rule",
"rules": {
"unexpect": [
{
"url": "imap://outlook.office365.com",
"user": "anonym-x2024@outlook.com"
}
]
}
}
}
} }

View File

@@ -16,7 +16,7 @@
} }
}, },
{ {
"type": "command", "type": "launch",
"parameters": { "parameters": {
"command": "vlc" "command": "vlc"
} }

57
main.py
View File

@@ -1,13 +1,50 @@
import json import json
from desktop_env.envs.desktop_env import DesktopEnv from desktop_env.envs.desktop_env import DesktopEnv
import logging
import os
import sys
import datetime
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.main")
def human_agent(): def human_agent():
""" """
Runs the Gym environment with human input. Runs the Gym environment with human input.
""" """
with open("evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json", "r") as f: with open("evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json", "r") as f:
example = json.load(f) example = json.load(f)
example["snapshot"] = "base_setup3" example["snapshot"] = "base_setup3"
@@ -39,24 +76,26 @@ def human_agent():
# "action_type": 0, # "action_type": 0,
# "click_type": 3, # "click_type": 3,
# } # }
print(trajectory[i]) logger.info(trajectory[i])
observation, reward, done, info = env.step(trajectory[i], pause=5) observation, reward, done, info = env.step(trajectory[i], pause=5)
print("Observation:", observation) logger.info("Observation: %s", observation)
print("Reward:", reward) logger.info("Reward: %.2f", reward)
print("Info:", info) logger.info("Info: %s", info)
print("================================\n") logger.info("================================\n")
if done: if done:
print("The episode is done.") logger.info("The episode is done.")
break break
result = env.evaluate() result = env.evaluate()
print("Result:", result) logger.info("Result: %.2f", result)
#input("PAUSING")
env.close() env.close()
print("Environment closed.") logger.info("Environment closed.")
if __name__ == "__main__": if __name__ == "__main__":