From d52b692ee5d27e2407f22b628f55f0970525eddf Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Sun, 14 Jan 2024 18:30:49 +0800 Subject: [PATCH] Finish loading the vscode examples v1; Improve on the infra: Add accessibility tree into the observation; Add activate window function, etc --- desktop_env/controllers/setup.py | 106 +++++++----- desktop_env/envs/desktop_env.py | 1 + desktop_env/evaluators/getters/__init__.py | 6 +- desktop_env/evaluators/getters/general.py | 31 ++-- desktop_env/evaluators/getters/replay.py | 1 + desktop_env/evaluators/getters/vscode.py | 26 ++- desktop_env/evaluators/metrics/__init__.py | 2 +- desktop_env/evaluators/metrics/vscode.py | 20 +++ desktop_env/server/README.md | 7 + desktop_env/server/main.py | 158 +++++++++++------- .../0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json | 8 +- .../53ad5833-3455-407b-bbc6-45b4c79ab8fb.json | 46 ++++- .../59ed65c7-e9a6-43db-833f-76d6730c0004.json | 11 +- .../982d12a5-beab-424f-8d38-d2a48429e511.json | 24 ++- .../eabc805a-bfcf-4460-b250-ac92135819f6.json | 40 ++++- main.py | 38 +++-- 16 files changed, 368 insertions(+), 157 deletions(-) diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 659db62..bc0f391 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -1,18 +1,18 @@ import json -import time +import logging import os.path +import time import traceback import uuid - -from typing import Dict, List from typing import Any, Union, Optional +from typing import Dict, List import requests from playwright.sync_api import sync_playwright from requests_toolbelt.multipart.encoder import MultipartEncoder + from desktop_env.evaluators.metrics.utils import compare_urls -import logging logger = logging.getLogger("desktopenv.setup") @@ -58,31 +58,31 @@ class SetupController: # can add other setup steps # ZDY_COMMENT: merged with launch - #def _command_setup(self, command: str): - #""" - #Directly send a command into the virtual machine os for setting up. - #""" - #payload = json.dumps({"command": command}) - #headers = { - #'Content-Type': 'application/json' - #} - #timeout = 5 - #timout_whitelist = ["vlc"] -# - #try: -# - #response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout) - #if response.status_code == 200: - #print("Command executed successfully:", response.text) - #else: - #print("Failed to execute command. Status code:", response.status_code) - #except requests.exceptions.Timeout as e: - #if command in timout_whitelist: - #print("Command executed successfully:", command) - #else: - #print("An error occurred while trying to execute the command:", e) - #except requests.exceptions.RequestException as e: - #print("An error occurred while trying to execute the command:", e) + # def _command_setup(self, command: str): + # """ + # Directly send a command into the virtual machine os for setting up. + # """ + # payload = json.dumps({"command": command}) + # headers = { + # 'Content-Type': 'application/json' + # } + # timeout = 5 + # timout_whitelist = ["vlc"] + # + # try: + # + # response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout) + # if response.status_code == 200: + # print("Command executed successfully:", response.text) + # else: + # print("Failed to execute command. Status code:", response.status_code) + # except requests.exceptions.Timeout as e: + # if command in timout_whitelist: + # print("Command executed successfully:", command) + # else: + # print("An error occurred while trying to execute the command:", e) + # except requests.exceptions.RequestException as e: + # print("An error occurred while trying to execute the command:", e) def _download_setup(self, files: List[Dict[str, str]]): """ @@ -225,9 +225,14 @@ class SetupController: except requests.exceptions.RequestException as e: logger.error("An error occurred while trying to send the request: %s", e) - def _execute_setup( self, command: List[str] - , stdout: str = "", stderr: str = "" - , shell: bool = False, until: Optional[Dict[str, Any]] = None): + def _execute_setup( + self, + command: List[str], + stdout: str = "", + stderr: str = "", + shell: bool = False, + until: Optional[Dict[str, Any]] = None + ): if not command: raise Exception("Empty comman to launch.") @@ -249,10 +254,10 @@ class SetupController: if stderr: with open(os.path.join(self.cache_dir, stderr), "w") as f: f.write(results["error"]) - logger.info( "Command executed successfully: %s -> %s" - , " ".join(command) - , response.text - ) + logger.info("Command executed successfully: %s -> %s" + , " ".join(command) + , response.text + ) else: logger.error("Failed to launch application. Status code: %s", response.text) results = None @@ -264,13 +269,13 @@ class SetupController: results = None nb_failings += 1 - if len(until)==0: + if len(until) == 0: terminates = True elif results is not None: - terminates = "returncode" in until and results["returncode"]==until["returncode"]\ - or "stdout" in until and until["stdout"] in results["output"]\ - or "stderr" in until and until["stderr"] in results["error"] - terminates = terminates or nb_failings>=5 + terminates = "returncode" in until and results["returncode"] == until["returncode"] \ + or "stdout" in until and until["stdout"] in results["output"] \ + or "stderr" in until and until["stderr"] in results["error"] + terminates = terminates or nb_failings >= 5 if not terminates: time.sleep(0.3) @@ -293,6 +298,25 @@ class SetupController: # TODO raise NotImplementedError() + def _activate_window_setup(self, window_name: str): + if not window_name: + raise Exception(f"Setup Open - Invalid path ({window_name}).") + + payload = json.dumps({"window_name": window_name}) + headers = { + 'Content-Type': 'application/json' + } + + # send request to server to open file + try: + response = requests.post(self.http_server + "/setup" + "/activate_window", headers=headers, data=payload) + if response.status_code == 200: + logger.info("Command executed successfully: %s", response.text) + else: + logger.error(f"Failed to activate window {window_name}. Status code: %s", response.text) + except requests.exceptions.RequestException as e: + logger.error("An error occurred while trying to send the request: %s", e) + # Chrome setup def _chrome_open_tabs_setup(self, urls_to_open: List[str]): host = self.vm_ip diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index c7a2fee..f9627be 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -221,6 +221,7 @@ class DesktopEnv(gym.Env): time.sleep(pause) observation = { "screenshot": self._get_obs(), + "accessibility_tree": self.controller.get_accessibility_tree(), "terminal": self.controller.get_terminal_output(), "instruction": self.instruction } diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index a978b42..7e472f4 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -1,7 +1,9 @@ +from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \ + get_shortcuts_on_desktop from .file import get_cloud_file, get_vm_file, get_cache_file +from .general import get_vm_command_line from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper from .misc import get_rule, get_accessibility_tree -from .vlc import get_vlc_playing_info, get_vlc_config -from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, get_shortcuts_on_desktop from .replay import get_replay +from .vlc import get_vlc_playing_info, get_vlc_config from .vscode import get_vscode_config diff --git a/desktop_env/evaluators/getters/general.py b/desktop_env/evaluators/getters/general.py index 1c1ce4c..e4e4c99 100644 --- a/desktop_env/evaluators/getters/general.py +++ b/desktop_env/evaluators/getters/general.py @@ -1,22 +1,19 @@ -import os +import logging from typing import Dict +import requests + +logger = logging.getLogger("desktopenv.getters.general") -def get_string(env, config: Dict[str, str]) -> str: - """ - Config: - string (str) - """ +def get_vm_command_line(env, config: Dict[str, str]): + vm_ip = env.vm_ip + port = 5000 + command = config["command"] - return config["string"] + response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command}) - -def get_command_line(env, config: Dict[str, str]) -> str: - """ - Config: - string (str) - """ - - f = os.popen(config["command"]) - - return f.read() + if response.status_code == 200: + return response.json()["output"] + else: + logger.error("Failed to get vm command line. Status code: %d", response.status_code) + return None diff --git a/desktop_env/evaluators/getters/replay.py b/desktop_env/evaluators/getters/replay.py index 67ad0d3..c850986 100644 --- a/desktop_env/evaluators/getters/replay.py +++ b/desktop_env/evaluators/getters/replay.py @@ -2,6 +2,7 @@ from typing import List, Dict, Any def get_replay(env, trajectory: List[Dict[str, Any]]) -> None: + # fixme: need to be combined with the accessibility tree to activate the selection of the target window def parse(action): if action["type"] == "hotkey": keys = "', '".join(action["param"]) diff --git a/desktop_env/evaluators/getters/vscode.py b/desktop_env/evaluators/getters/vscode.py index 5072ec6..8a725ef 100644 --- a/desktop_env/evaluators/getters/vscode.py +++ b/desktop_env/evaluators/getters/vscode.py @@ -1,14 +1,30 @@ -from typing import Any -from typing import Dict +import logging +from typing import Any, Dict from .file import get_vm_file from .replay import get_replay +logger = logging.getLogger("desktopenv.getters.vscode") + def get_vscode_config(env, config: Dict[str, Any]) -> str: - trajectory = [{"type": "hotkey", "param": ["command", "shift", "p"]}, - {"type": "typewrite", "param": "OpenProject"}, - {"type": "press", "param": "enter"}] + os_type = env.vm_platform + vscode_extension_command = config["vscode_extension_command"] + + # fixme: depends on how we config and install the vscode in virtual machine, need to be aligned and double-checked + + if os_type == "MacOS": + trajectory = [ + {"type": "hotkey", "param": ["command", "shift", "p"]}, + {"type": "typewrite", "param": vscode_extension_command}, + {"type": "press", "param": "enter"} + ] + else: + trajectory = [ + {"type": "hotkey", "param": ["ctrl", "shift", "p"]}, + {"type": "typewrite", "param": vscode_extension_command}, + {"type": "press", "param": "enter"} + ] get_replay(env, trajectory) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 67b8925..a4f6607 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -13,4 +13,4 @@ from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, co from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3 from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter -from .vscode import compare_text_file, compare_config, compare_answer +from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index 053bc75..ac98d72 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -10,6 +10,8 @@ def compare_text_file(actual: str, expected: str, **options) -> float: Return: float: the score """ + if not actual: + return 0. with open(actual) as f1: actual_text = f1.read() @@ -22,6 +24,9 @@ def compare_text_file(actual: str, expected: str, **options) -> float: def compare_config(actual: str, rules: Dict, **options) -> float: + if not actual: + return 0. + with open(actual) as f1: actual_text = f1.read() @@ -39,9 +44,24 @@ def compare_answer(actual: str, rules: Dict, **options) -> float: Return: float: the score """ + if not actual: + return 0. if actual == rules['expect']: return 1.0 # TODO: can use text embedding to get non-zero return return 0.0 + + +def is_extension_installed(actual: str, rules: Dict, **options): + if rules['type'] == 'contain': + if rules['expected'] in actual: + return 1.0 + return 0.0 + elif rules['type'] == 'not_contain': + if rules['expected'] not in actual: + return 1.0 + return 0.0 + else: + raise NotImplementedError diff --git a/desktop_env/server/README.md b/desktop_env/server/README.md index 571081a..479ab82 100644 --- a/desktop_env/server/README.md +++ b/desktop_env/server/README.md @@ -71,3 +71,10 @@ You can use accerciser to check the accessibility tree on GNOME VM. ```sh sudo apt install accerciser ``` + + +### Additional Installation +Activating the window manager control requires the installation of `wmctrl`: +```bash +sudo apt install wmctrl +``` diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 06fc308..55ebd74 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -3,29 +3,26 @@ import os import platform import subprocess from pathlib import Path +from typing import Any, Optional +from typing import List, Dict +import Xlib import lxml.etree -from lxml.etree import _Element import pyatspi +import pyautogui +import requests +from PIL import Image +from Xlib import display, X +from flask import Flask, request, jsonify, send_file, abort +from lxml.etree import _Element from pyatspi import Accessible, StateType +from pyatspi import Action as ATAction from pyatspi import Component, Document from pyatspi import Text as ATText from pyatspi import Value as ATValue -from pyatspi import Action as ATAction -from typing import List, Dict -from typing import Any, Optional - -import Xlib -import pyautogui -from PIL import Image -from Xlib import display, X from pyxcursor import Xcursor -import requests -from flask import Flask, request, jsonify, send_file, abort -from werkzeug.utils import secure_filename - app = Flask(__name__) pyautogui.PAUSE = 0 @@ -140,22 +137,24 @@ def get_terminal_output(): xpath = '//application[@name="gnome-terminal-server"]/frame[@st:active="true"]//terminal[@st:focused="true"]' terminals: List[_Element] = desktop_xml.xpath(xpath, namespaces=_accessibility_ns_map) output = terminals[0].text.rstrip() if len(terminals) == 1 else None - else: # windows and macos platform is not implemented currently + else: # windows and macos platform is not implemented currently raise NotImplementedError return jsonify({"output": output, "status": "success"}) except: return jsonify({"output": None, "status": "error"}) -_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org" - , "attr": "uri:deskat:attributes.at-spi.gnome.org" - , "cp": "uri:deskat:component.at-spi.gnome.org" - , "doc": "uri:deskat:document.at-spi.gnome.org" - , "docattr": "uri:deskat:attributes.document.at-spi.gnome.org" - , "txt": "uri:deskat:text.at-spi.gnome.org" - , "val": "uri:deskat:value.at-spi.gnome.org" - , "act": "uri:deskat:action.at-spi.gnome.org" - } +_accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org" + , "attr": "uri:deskat:attributes.at-spi.gnome.org" + , "cp": "uri:deskat:component.at-spi.gnome.org" + , "doc": "uri:deskat:document.at-spi.gnome.org" + , "docattr": "uri:deskat:attributes.document.at-spi.gnome.org" + , "txt": "uri:deskat:text.at-spi.gnome.org" + , "val": "uri:deskat:value.at-spi.gnome.org" + , "act": "uri:deskat:action.at-spi.gnome.org" + } + + def _create_node(node: Accessible) -> _Element: attribute_dict: Dict[str, Any] = {"name": node.name} @@ -163,11 +162,11 @@ def _create_node(node: Accessible) -> _Element: states: List[StateType] = node.getState().get_states() for st in states: state_name: str = StateType._enum_lookup[st] - attribute_dict[ "{{{:}}}{:}"\ - .format( _accessibility_ns_map["st"] - , state_name.split("_", maxsplit=1)[1].lower() - ) - ] = "true" + attribute_dict["{{{:}}}{:}" \ + .format(_accessibility_ns_map["st"] + , state_name.split("_", maxsplit=1)[1].lower() + ) + ] = "true" # }}} States # # Attributes {{{ # @@ -176,11 +175,11 @@ def _create_node(node: Accessible) -> _Element: attribute_name: str attribute_value: str attribute_name, attribute_value = attrbt.split(":", maxsplit=1) - attribute_dict[ "{{{:}}}{:}"\ - .format( _accessibility_ns_map["attr"] - , attribute_name - ) - ] = attribute_value + attribute_dict["{{{:}}}{:}" \ + .format(_accessibility_ns_map["attr"] + , attribute_name + ) + ] = attribute_value # }}} Attributes # # Component {{{ # @@ -189,9 +188,12 @@ def _create_node(node: Accessible) -> _Element: except NotImplementedError: pass else: - attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_SCREEN)) - attribute_dict["{{{:}}}windowcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_WINDOW)) - attribute_dict["{{{:}}}parentcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_PARENT)) + attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = str( + component.getPosition(pyatspi.XY_SCREEN)) + attribute_dict["{{{:}}}windowcoord".format(_accessibility_ns_map["cp"])] = str( + component.getPosition(pyatspi.XY_WINDOW)) + attribute_dict["{{{:}}}parentcoord".format(_accessibility_ns_map["cp"])] = str( + component.getPosition(pyatspi.XY_PARENT)) attribute_dict["{{{:}}}size".format(_accessibility_ns_map["cp"])] = str(component.getSize()) # }}} Component # @@ -208,11 +210,11 @@ def _create_node(node: Accessible) -> _Element: attribute_name: str attribute_value: str attribute_name, attribute_value = attrbt.split(":", maxsplit=1) - attribute_dict[ "{{{:}}}{:}"\ - .format( _accessibility_ns_map["docattr"] - , attribute_name - ) - ] = attribute_value + attribute_dict["{{{:}}}{:}" \ + .format(_accessibility_ns_map["docattr"] + , attribute_name + ) + ] = attribute_value # }}} Document # # Text {{{ # @@ -222,13 +224,13 @@ def _create_node(node: Accessible) -> _Element: pass else: # only text shown on current screen is available - #attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount) + # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount) text: str = text_obj.getText(0, text_obj.characterCount) # }}} Text # # Selection {{{ # try: - node.querySelection() + node.querySelection() except NotImplementedError: pass else: @@ -255,34 +257,36 @@ def _create_node(node: Accessible) -> _Element: else: for i in range(action.nActions): action_name: str = action.getName(i).replace(" ", "-") - attribute_dict[ "{{{:}}}{:}_desc"\ - .format( _accessibility_ns_map["act"] - , action_name - ) - ] = action.getDescription(i) - attribute_dict[ "{{{:}}}{:}_kb"\ - .format( _accessibility_ns_map["act"] - , action_name - ) - ] = action.getKeyBinding(i) + attribute_dict["{{{:}}}{:}_desc" \ + .format(_accessibility_ns_map["act"] + , action_name + ) + ] = action.getDescription(i) + attribute_dict["{{{:}}}{:}_kb" \ + .format(_accessibility_ns_map["act"] + , action_name + ) + ] = action.getKeyBinding(i) # }}} Action # - xml_node = lxml.etree.Element( node.getRoleName().replace(" ", "-") - , attrib=attribute_dict - , nsmap=_accessibility_ns_map - ) - if "text" in locals() and len(text)>0: + xml_node = lxml.etree.Element(node.getRoleName().replace(" ", "-") + , attrib=attribute_dict + , nsmap=_accessibility_ns_map + ) + if "text" in locals() and len(text) > 0: xml_node.text = text for ch in node: xml_node.append(_create_node(ch)) return xml_node + @app.route("/accessibility", methods=["GET"]) def get_accessibility_tree(): desktop: Accessible = pyatspi.Registry.getDesktop(0) desktop_xml: _Element = _create_node(desktop) return jsonify({"AT": lxml.etree.tostring(desktop_xml, encoding="unicode")}) + @app.route('/screen_size', methods=['POST']) def get_screen_size(): d = display.Display() @@ -562,5 +566,43 @@ def open_file(): return f"Failed to open {path}. Error: {e}", 500 +@app.route("/setup/activate_window", methods=['POST']) +def activate_window(): + data = request.json + window_name = data.get('window_name', None) + + os_name = platform.system() + + if os_name == 'Windows': + import pygetwindow as gw + try: + # Find the VS Code window + vscode_window = gw.getWindowsWithTitle(window_name)[0] + # Activate the window, bringing it to the front + vscode_window.activate() + except IndexError: + return "VS Code window not found.", 404 + + elif os_name == 'Darwin': + import pygetwindow as gw + try: + # Find the VS Code window + vscode_window = gw.getWindowsWithTitle(window_name)[0] + # Un-minimize the window and then bring it to the front + vscode_window.unminimize() + vscode_window.activate() + except IndexError: + return "VS Code window not found.", 404 + + elif os_name == 'Linux': + # Attempt to activate VS Code window using wmctrl + subprocess.Popen(["wmctrl", "-a", window_name]) + + else: + return f"Operating system {os_name} not supported.", 400 + + return "File opened successfully", 200 + + if __name__ == '__main__': app.run(debug=True, host="0.0.0.0") diff --git a/evaluation_examples/examples/vs_code/0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json b/evaluation_examples/examples/vs_code/0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json index cbbfd16..2c7a72f 100644 --- a/evaluation_examples/examples/vs_code/0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json +++ b/evaluation_examples/examples/vs_code/0ed39f63-6049-43d4-ba4d-5fa2fe04a951.json @@ -1,7 +1,7 @@ { "id": "0ed39f63-6049-43d4-ba4d-5fa2fe04a951", "snapshot": "vscode", - "instruction": "Could you help me find and replace \"text\" with \"test\" in this file?", + "instruction": "Please change all the places that say \"text\" to \"test\" in this document for me.", "source": "https://www.quora.com/How-do-you-find-and-replace-text-in-Visual-Studio-Code", "config": [ { @@ -20,6 +20,12 @@ "parameters": { "command": ["code", "Desktop/vscode_replace_text.txt"] } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Visual Studio Code" + } } ], "trajectory": "trajectories/0ed39f63-6049-43d4-ba4d-5fa2fe04a951", diff --git a/evaluation_examples/examples/vs_code/53ad5833-3455-407b-bbc6-45b4c79ab8fb.json b/evaluation_examples/examples/vs_code/53ad5833-3455-407b-bbc6-45b4c79ab8fb.json index fb0d892..11fb7e5 100644 --- a/evaluation_examples/examples/vs_code/53ad5833-3455-407b-bbc6-45b4c79ab8fb.json +++ b/evaluation_examples/examples/vs_code/53ad5833-3455-407b-bbc6-45b4c79ab8fb.json @@ -1,19 +1,50 @@ { "id": "53ad5833-3455-407b-bbc6-45b4c79ab8fb", "snapshot": "vscode", - "instruction": "Could you help me open the project at /home/user/project?", + "instruction": "I'd like the \"project\" in the \"user\" folder under \"home\" to be opened with VS Code, please.", "source": "https://www.youtube.com/watch?v=VqCgcpAypFQ", "config": [ { "type": "launch", "parameters": { - "command": ["code"] + "command": [ + "code" + ] } }, { "type": "command", "parameters": { - "command": ["mkdir", "-p", "/home/user/project"] + "command": [ + "mkdir", + "-p", + "/home/user/project/.vscode" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1akdsiRVdq6CUtT-FX8Dpf8ruPTq6DcFn&export=download&authuser=0&confirm=t&uuid=ce2fa96a-454e-43d9-bbe3-98553b7eed0d&at=APZUnTVw_YQ1URTvP34vrmKcw0b4:1705222451052", + "path": "/home/user/project/main.py" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1BkwtqtAzv_K2CrTbJZ0HbMHBffzdD9vc&export=download&authuser=0&confirm=t&uuid=28f77090-deef-49a1-b156-91317881e75e&at=APZUnTXuaR6i_3t3Prslk535GaO5:1705222457290", + "path": "/home/user/project/README.md" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1ea_zF2tbcXOB8w9neBV-U5xI2nnPzIw_&export=download&authuser=0&confirm=t&uuid=9cf8c5bb-a880-475c-b80b-967a0c4fbea4&at=APZUnTUdjIj80F3Mbgi72eZDTZLO:1705222462443", + "path": "/home/user/project/.vscode/settings.json" + } + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Visual Studio Code" } } ], @@ -22,6 +53,14 @@ "vscode" ], "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Visual Studio Code" + } + } + ], "func": "compare_config", "expected": { "type": "rule", @@ -31,6 +70,7 @@ }, "result": { "type": "vscode_config", + "vscode_extension_command": "OpenProject", "path": "OpenProject.txt", "dest": "OpenProject.txt" } diff --git a/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json b/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json index b3201d8..956138d 100644 --- a/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json +++ b/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json @@ -16,9 +16,15 @@ } }, { - "type": "open", + "type": "launch", "parameters": { - "path": "Desktop/main.py" + "command": ["code", "Desktop/main.py"] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Visual Studio Code" } } ], @@ -36,6 +42,7 @@ }, "result": { "type": "vscode_config", + "vscode_extension_command": "GetBreakPoint", "path": "GetBreakPoint.txt", "dest": "GetBreakPoint.txt" } diff --git a/evaluation_examples/examples/vs_code/982d12a5-beab-424f-8d38-d2a48429e511.json b/evaluation_examples/examples/vs_code/982d12a5-beab-424f-8d38-d2a48429e511.json index 309024f..8c0748d 100644 --- a/evaluation_examples/examples/vs_code/982d12a5-beab-424f-8d38-d2a48429e511.json +++ b/evaluation_examples/examples/vs_code/982d12a5-beab-424f-8d38-d2a48429e511.json @@ -3,7 +3,22 @@ "snapshot": "vscode", "instruction": "Could you help me change the color theme to Dark?", "source": "https://www.youtube.com/watch?v=ORrELERGIHs", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "code" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Visual Studio Code" + } + } + ], "trajectory": "trajectories/982d12a5-beab-424f-8d38-d2a48429e511", "related_apps": [ "vscode" @@ -11,11 +26,14 @@ "evaluator": { "func": "compare_config", "expected": { - "type": "string", - "string": "2" + "type": "rule", + "rules": { + "expect": "2" + } }, "result": { "type": "vscode_config", + "vscode_extension_command": "GetColorTheme", "path": "GetColorTheme.txt", "dest": "GetColorTheme.txt" } diff --git a/evaluation_examples/examples/vs_code/eabc805a-bfcf-4460-b250-ac92135819f6.json b/evaluation_examples/examples/vs_code/eabc805a-bfcf-4460-b250-ac92135819f6.json index bf63054..7fb741d 100644 --- a/evaluation_examples/examples/vs_code/eabc805a-bfcf-4460-b250-ac92135819f6.json +++ b/evaluation_examples/examples/vs_code/eabc805a-bfcf-4460-b250-ac92135819f6.json @@ -3,20 +3,44 @@ "snapshot": "vscode", "instruction": "Help me install the extension Python.", "source": "https://www.youtube.com/watch?v=VqCgcpAypFQ", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "code" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Visual Studio Code" + } + } + ], "trajectory": "trajectories/eabc805a-bfcf-4460-b250-ac92135819f6", "related_apps": [ "vscode" ], "evaluator": { - "func": "compare_answer", - "expected": { - "type": "string", - "string": "ms-python.python\n" - }, + "func": "is_extension_installed", "result": { - "type": "command_line", - "command": "code --list-extensions | grep ms-python.python" + "type": "vm_command_line", + "command": [ + "code", + "--list-extensions", + "|", + "grep", + "ms-python.python" + ] + }, + "expected": { + "type": "rule", + "rules": { + "type": "contain", + "expected": "ms-python.python" + } } } } diff --git a/main.py b/main.py index 5da8091..d4c3dc1 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,10 @@ +import datetime import json -from desktop_env.envs.desktop_env import DesktopEnv - import logging import os import sys -import datetime + +from desktop_env.envs.desktop_env import DesktopEnv # Logger Configs {{{ # logger = logging.getLogger() @@ -12,17 +12,18 @@ logger.setLevel(logging.DEBUG) datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") -file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str))) -debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str))) +file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8") +debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8") stdout_handler = logging.StreamHandler(sys.stdout) -sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str))) +sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8") file_handler.setLevel(logging.INFO) debug_handler.setLevel(logging.DEBUG) stdout_handler.setLevel(logging.INFO) sdebug_handler.setLevel(logging.DEBUG) -formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") +formatter = logging.Formatter( + fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") file_handler.setFormatter(formatter) debug_handler.setFormatter(formatter) stdout_handler.setFormatter(formatter) @@ -39,19 +40,21 @@ logger.addHandler(sdebug_handler) logger = logging.getLogger("desktopenv.main") + def human_agent(): """ Runs the Gym environment with human input. """ - with open("evaluation_examples/examples/thunderbird/e1e75309-3ddb-4d09-92ec-de869c928143.json.nosetup", "r") as f: + with open("evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json", "r") as f: example = json.load(f) - example["snapshot"] = "Snapshot 18" + example["snapshot"] = "vscode_setup" - env = DesktopEnv( path_to_vm="../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx" - , action_space="computer_13" - , task_config=example - ) + env = DesktopEnv( + path_to_vm=r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx", + action_space="computer_13", + task_config=example + ) # reset the environment to certain snapshot observation = env.reset() done = False @@ -76,7 +79,8 @@ def human_agent(): # } logger.info(trajectory[i]) - observation, reward, done, info = env.step(trajectory[i], pause=5) + observation, reward, done, info = env.step(trajectory[i]) + observation.pop("accessibility_tree") logger.info("Observation: %s", observation) logger.info("Reward: %.2f", reward) logger.info("Info: %s", info) @@ -87,12 +91,14 @@ def human_agent(): logger.info("The episode is done.") break + input("PAUSING") + result = env.evaluate() logger.info("Result: %.2f", result) - #input("PAUSING") + input("PAUSING") - #env.close() + # env.close() logger.info("Environment closed.")