Finish loading the vscode examples v1; Improve on the infra: Add accessibility tree into the observation; Add activate window function, etc

This commit is contained in:
Timothyxxx
2024-01-14 18:30:49 +08:00
parent 2228f346a9
commit d52b692ee5
16 changed files with 368 additions and 157 deletions

View File

@@ -1,18 +1,18 @@
import json import json
import time import logging
import os.path import os.path
import time
import traceback import traceback
import uuid import uuid
from typing import Dict, List
from typing import Any, Union, Optional from typing import Any, Union, Optional
from typing import Dict, List
import requests import requests
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
from requests_toolbelt.multipart.encoder import MultipartEncoder from requests_toolbelt.multipart.encoder import MultipartEncoder
from desktop_env.evaluators.metrics.utils import compare_urls from desktop_env.evaluators.metrics.utils import compare_urls
import logging
logger = logging.getLogger("desktopenv.setup") logger = logging.getLogger("desktopenv.setup")
@@ -58,31 +58,31 @@ class SetupController:
# can add other setup steps # can add other setup steps
# ZDY_COMMENT: merged with launch # ZDY_COMMENT: merged with launch
#def _command_setup(self, command: str): # def _command_setup(self, command: str):
#""" # """
#Directly send a command into the virtual machine os for setting up. # Directly send a command into the virtual machine os for setting up.
#""" # """
#payload = json.dumps({"command": command}) # payload = json.dumps({"command": command})
#headers = { # headers = {
#'Content-Type': 'application/json' # 'Content-Type': 'application/json'
#} # }
#timeout = 5 # timeout = 5
#timout_whitelist = ["vlc"] # timout_whitelist = ["vlc"]
# #
#try: # try:
# #
#response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout) # response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
#if response.status_code == 200: # if response.status_code == 200:
#print("Command executed successfully:", response.text) # print("Command executed successfully:", response.text)
#else: # else:
#print("Failed to execute command. Status code:", response.status_code) # print("Failed to execute command. Status code:", response.status_code)
#except requests.exceptions.Timeout as e: # except requests.exceptions.Timeout as e:
#if command in timout_whitelist: # if command in timout_whitelist:
#print("Command executed successfully:", command) # print("Command executed successfully:", command)
#else: # else:
#print("An error occurred while trying to execute the command:", e) # print("An error occurred while trying to execute the command:", e)
#except requests.exceptions.RequestException as e: # except requests.exceptions.RequestException as e:
#print("An error occurred while trying to execute the command:", e) # print("An error occurred while trying to execute the command:", e)
def _download_setup(self, files: List[Dict[str, str]]): def _download_setup(self, files: List[Dict[str, str]]):
""" """
@@ -225,9 +225,14 @@ class SetupController:
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e) logger.error("An error occurred while trying to send the request: %s", e)
def _execute_setup( self, command: List[str] def _execute_setup(
, stdout: str = "", stderr: str = "" self,
, shell: bool = False, until: Optional[Dict[str, Any]] = None): command: List[str],
stdout: str = "",
stderr: str = "",
shell: bool = False,
until: Optional[Dict[str, Any]] = None
):
if not command: if not command:
raise Exception("Empty comman to launch.") raise Exception("Empty comman to launch.")
@@ -249,7 +254,7 @@ class SetupController:
if stderr: if stderr:
with open(os.path.join(self.cache_dir, stderr), "w") as f: with open(os.path.join(self.cache_dir, stderr), "w") as f:
f.write(results["error"]) f.write(results["error"])
logger.info( "Command executed successfully: %s -> %s" logger.info("Command executed successfully: %s -> %s"
, " ".join(command) , " ".join(command)
, response.text , response.text
) )
@@ -264,13 +269,13 @@ class SetupController:
results = None results = None
nb_failings += 1 nb_failings += 1
if len(until)==0: if len(until) == 0:
terminates = True terminates = True
elif results is not None: elif results is not None:
terminates = "returncode" in until and results["returncode"]==until["returncode"]\ terminates = "returncode" in until and results["returncode"] == until["returncode"] \
or "stdout" in until and until["stdout"] in results["output"]\ or "stdout" in until and until["stdout"] in results["output"] \
or "stderr" in until and until["stderr"] in results["error"] or "stderr" in until and until["stderr"] in results["error"]
terminates = terminates or nb_failings>=5 terminates = terminates or nb_failings >= 5
if not terminates: if not terminates:
time.sleep(0.3) time.sleep(0.3)
@@ -293,6 +298,25 @@ class SetupController:
# TODO # TODO
raise NotImplementedError() raise NotImplementedError()
def _activate_window_setup(self, window_name: str):
if not window_name:
raise Exception(f"Setup Open - Invalid path ({window_name}).")
payload = json.dumps({"window_name": window_name})
headers = {
'Content-Type': 'application/json'
}
# send request to server to open file
try:
response = requests.post(self.http_server + "/setup" + "/activate_window", headers=headers, data=payload)
if response.status_code == 200:
logger.info("Command executed successfully: %s", response.text)
else:
logger.error(f"Failed to activate window {window_name}. Status code: %s", response.text)
except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e)
# Chrome setup # Chrome setup
def _chrome_open_tabs_setup(self, urls_to_open: List[str]): def _chrome_open_tabs_setup(self, urls_to_open: List[str]):
host = self.vm_ip host = self.vm_ip

View File

@@ -221,6 +221,7 @@ class DesktopEnv(gym.Env):
time.sleep(pause) time.sleep(pause)
observation = { observation = {
"screenshot": self._get_obs(), "screenshot": self._get_obs(),
"accessibility_tree": self.controller.get_accessibility_tree(),
"terminal": self.controller.get_terminal_output(), "terminal": self.controller.get_terminal_output(),
"instruction": self.instruction "instruction": self.instruction
} }

View File

@@ -1,7 +1,9 @@
from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \
get_shortcuts_on_desktop
from .file import get_cloud_file, get_vm_file, get_cache_file from .file import get_cloud_file, get_vm_file, get_cache_file
from .general import get_vm_command_line
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
from .misc import get_rule, get_accessibility_tree from .misc import get_rule, get_accessibility_tree
from .vlc import get_vlc_playing_info, get_vlc_config
from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, get_shortcuts_on_desktop
from .replay import get_replay from .replay import get_replay
from .vlc import get_vlc_playing_info, get_vlc_config
from .vscode import get_vscode_config from .vscode import get_vscode_config

View File

@@ -1,22 +1,19 @@
import os import logging
from typing import Dict from typing import Dict
import requests
logger = logging.getLogger("desktopenv.getters.general")
def get_string(env, config: Dict[str, str]) -> str: def get_vm_command_line(env, config: Dict[str, str]):
""" vm_ip = env.vm_ip
Config: port = 5000
string (str) command = config["command"]
"""
return config["string"] response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command})
if response.status_code == 200:
def get_command_line(env, config: Dict[str, str]) -> str: return response.json()["output"]
""" else:
Config: logger.error("Failed to get vm command line. Status code: %d", response.status_code)
string (str) return None
"""
f = os.popen(config["command"])
return f.read()

View File

@@ -2,6 +2,7 @@ from typing import List, Dict, Any
def get_replay(env, trajectory: List[Dict[str, Any]]) -> None: def get_replay(env, trajectory: List[Dict[str, Any]]) -> None:
# fixme: need to be combined with the accessibility tree to activate the selection of the target window
def parse(action): def parse(action):
if action["type"] == "hotkey": if action["type"] == "hotkey":
keys = "', '".join(action["param"]) keys = "', '".join(action["param"])

View File

@@ -1,14 +1,30 @@
from typing import Any import logging
from typing import Dict from typing import Any, Dict
from .file import get_vm_file from .file import get_vm_file
from .replay import get_replay from .replay import get_replay
logger = logging.getLogger("desktopenv.getters.vscode")
def get_vscode_config(env, config: Dict[str, Any]) -> str: def get_vscode_config(env, config: Dict[str, Any]) -> str:
trajectory = [{"type": "hotkey", "param": ["command", "shift", "p"]}, os_type = env.vm_platform
{"type": "typewrite", "param": "OpenProject"}, vscode_extension_command = config["vscode_extension_command"]
{"type": "press", "param": "enter"}]
# fixme: depends on how we config and install the vscode in virtual machine, need to be aligned and double-checked
if os_type == "MacOS":
trajectory = [
{"type": "hotkey", "param": ["command", "shift", "p"]},
{"type": "typewrite", "param": vscode_extension_command},
{"type": "press", "param": "enter"}
]
else:
trajectory = [
{"type": "hotkey", "param": ["ctrl", "shift", "p"]},
{"type": "typewrite", "param": vscode_extension_command},
{"type": "press", "param": "enter"}
]
get_replay(env, trajectory) get_replay(env, trajectory)

View File

@@ -13,4 +13,4 @@ from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, co
from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3 from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3
from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
from .vscode import compare_text_file, compare_config, compare_answer from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed

View File

@@ -10,6 +10,8 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
Return: Return:
float: the score float: the score
""" """
if not actual:
return 0.
with open(actual) as f1: with open(actual) as f1:
actual_text = f1.read() actual_text = f1.read()
@@ -22,6 +24,9 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
def compare_config(actual: str, rules: Dict, **options) -> float: def compare_config(actual: str, rules: Dict, **options) -> float:
if not actual:
return 0.
with open(actual) as f1: with open(actual) as f1:
actual_text = f1.read() actual_text = f1.read()
@@ -39,9 +44,24 @@ def compare_answer(actual: str, rules: Dict, **options) -> float:
Return: Return:
float: the score float: the score
""" """
if not actual:
return 0.
if actual == rules['expect']: if actual == rules['expect']:
return 1.0 return 1.0
# TODO: can use text embedding to get non-zero return # TODO: can use text embedding to get non-zero return
return 0.0 return 0.0
def is_extension_installed(actual: str, rules: Dict, **options):
if rules['type'] == 'contain':
if rules['expected'] in actual:
return 1.0
return 0.0
elif rules['type'] == 'not_contain':
if rules['expected'] not in actual:
return 1.0
return 0.0
else:
raise NotImplementedError

View File

@@ -71,3 +71,10 @@ You can use accerciser to check the accessibility tree on GNOME VM.
```sh ```sh
sudo apt install accerciser sudo apt install accerciser
``` ```
### Additional Installation
Activating the window manager control requires the installation of `wmctrl`:
```bash
sudo apt install wmctrl
```

View File

@@ -3,29 +3,26 @@ import os
import platform import platform
import subprocess import subprocess
from pathlib import Path from pathlib import Path
from typing import Any, Optional
from typing import List, Dict
import Xlib
import lxml.etree import lxml.etree
from lxml.etree import _Element
import pyatspi import pyatspi
import pyautogui
import requests
from PIL import Image
from Xlib import display, X
from flask import Flask, request, jsonify, send_file, abort
from lxml.etree import _Element
from pyatspi import Accessible, StateType from pyatspi import Accessible, StateType
from pyatspi import Action as ATAction
from pyatspi import Component, Document from pyatspi import Component, Document
from pyatspi import Text as ATText from pyatspi import Text as ATText
from pyatspi import Value as ATValue from pyatspi import Value as ATValue
from pyatspi import Action as ATAction
from typing import List, Dict
from typing import Any, Optional
import Xlib
import pyautogui
from PIL import Image
from Xlib import display, X
from pyxcursor import Xcursor from pyxcursor import Xcursor
import requests
from flask import Flask, request, jsonify, send_file, abort
from werkzeug.utils import secure_filename
app = Flask(__name__) app = Flask(__name__)
pyautogui.PAUSE = 0 pyautogui.PAUSE = 0
@@ -147,7 +144,7 @@ def get_terminal_output():
return jsonify({"output": None, "status": "error"}) return jsonify({"output": None, "status": "error"})
_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org" _accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org"
, "attr": "uri:deskat:attributes.at-spi.gnome.org" , "attr": "uri:deskat:attributes.at-spi.gnome.org"
, "cp": "uri:deskat:component.at-spi.gnome.org" , "cp": "uri:deskat:component.at-spi.gnome.org"
, "doc": "uri:deskat:document.at-spi.gnome.org" , "doc": "uri:deskat:document.at-spi.gnome.org"
@@ -156,6 +153,8 @@ _accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
, "val": "uri:deskat:value.at-spi.gnome.org" , "val": "uri:deskat:value.at-spi.gnome.org"
, "act": "uri:deskat:action.at-spi.gnome.org" , "act": "uri:deskat:action.at-spi.gnome.org"
} }
def _create_node(node: Accessible) -> _Element: def _create_node(node: Accessible) -> _Element:
attribute_dict: Dict[str, Any] = {"name": node.name} attribute_dict: Dict[str, Any] = {"name": node.name}
@@ -163,8 +162,8 @@ def _create_node(node: Accessible) -> _Element:
states: List[StateType] = node.getState().get_states() states: List[StateType] = node.getState().get_states()
for st in states: for st in states:
state_name: str = StateType._enum_lookup[st] state_name: str = StateType._enum_lookup[st]
attribute_dict[ "{{{:}}}{:}"\ attribute_dict["{{{:}}}{:}" \
.format( _accessibility_ns_map["st"] .format(_accessibility_ns_map["st"]
, state_name.split("_", maxsplit=1)[1].lower() , state_name.split("_", maxsplit=1)[1].lower()
) )
] = "true" ] = "true"
@@ -176,8 +175,8 @@ def _create_node(node: Accessible) -> _Element:
attribute_name: str attribute_name: str
attribute_value: str attribute_value: str
attribute_name, attribute_value = attrbt.split(":", maxsplit=1) attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
attribute_dict[ "{{{:}}}{:}"\ attribute_dict["{{{:}}}{:}" \
.format( _accessibility_ns_map["attr"] .format(_accessibility_ns_map["attr"]
, attribute_name , attribute_name
) )
] = attribute_value ] = attribute_value
@@ -189,9 +188,12 @@ def _create_node(node: Accessible) -> _Element:
except NotImplementedError: except NotImplementedError:
pass pass
else: else:
attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_SCREEN)) attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = str(
attribute_dict["{{{:}}}windowcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_WINDOW)) component.getPosition(pyatspi.XY_SCREEN))
attribute_dict["{{{:}}}parentcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_PARENT)) attribute_dict["{{{:}}}windowcoord".format(_accessibility_ns_map["cp"])] = str(
component.getPosition(pyatspi.XY_WINDOW))
attribute_dict["{{{:}}}parentcoord".format(_accessibility_ns_map["cp"])] = str(
component.getPosition(pyatspi.XY_PARENT))
attribute_dict["{{{:}}}size".format(_accessibility_ns_map["cp"])] = str(component.getSize()) attribute_dict["{{{:}}}size".format(_accessibility_ns_map["cp"])] = str(component.getSize())
# }}} Component # # }}} Component #
@@ -208,8 +210,8 @@ def _create_node(node: Accessible) -> _Element:
attribute_name: str attribute_name: str
attribute_value: str attribute_value: str
attribute_name, attribute_value = attrbt.split(":", maxsplit=1) attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
attribute_dict[ "{{{:}}}{:}"\ attribute_dict["{{{:}}}{:}" \
.format( _accessibility_ns_map["docattr"] .format(_accessibility_ns_map["docattr"]
, attribute_name , attribute_name
) )
] = attribute_value ] = attribute_value
@@ -222,7 +224,7 @@ def _create_node(node: Accessible) -> _Element:
pass pass
else: else:
# only text shown on current screen is available # only text shown on current screen is available
#attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount) # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
text: str = text_obj.getText(0, text_obj.characterCount) text: str = text_obj.getText(0, text_obj.characterCount)
# }}} Text # # }}} Text #
@@ -255,34 +257,36 @@ def _create_node(node: Accessible) -> _Element:
else: else:
for i in range(action.nActions): for i in range(action.nActions):
action_name: str = action.getName(i).replace(" ", "-") action_name: str = action.getName(i).replace(" ", "-")
attribute_dict[ "{{{:}}}{:}_desc"\ attribute_dict["{{{:}}}{:}_desc" \
.format( _accessibility_ns_map["act"] .format(_accessibility_ns_map["act"]
, action_name , action_name
) )
] = action.getDescription(i) ] = action.getDescription(i)
attribute_dict[ "{{{:}}}{:}_kb"\ attribute_dict["{{{:}}}{:}_kb" \
.format( _accessibility_ns_map["act"] .format(_accessibility_ns_map["act"]
, action_name , action_name
) )
] = action.getKeyBinding(i) ] = action.getKeyBinding(i)
# }}} Action # # }}} Action #
xml_node = lxml.etree.Element( node.getRoleName().replace(" ", "-") xml_node = lxml.etree.Element(node.getRoleName().replace(" ", "-")
, attrib=attribute_dict , attrib=attribute_dict
, nsmap=_accessibility_ns_map , nsmap=_accessibility_ns_map
) )
if "text" in locals() and len(text)>0: if "text" in locals() and len(text) > 0:
xml_node.text = text xml_node.text = text
for ch in node: for ch in node:
xml_node.append(_create_node(ch)) xml_node.append(_create_node(ch))
return xml_node return xml_node
@app.route("/accessibility", methods=["GET"]) @app.route("/accessibility", methods=["GET"])
def get_accessibility_tree(): def get_accessibility_tree():
desktop: Accessible = pyatspi.Registry.getDesktop(0) desktop: Accessible = pyatspi.Registry.getDesktop(0)
desktop_xml: _Element = _create_node(desktop) desktop_xml: _Element = _create_node(desktop)
return jsonify({"AT": lxml.etree.tostring(desktop_xml, encoding="unicode")}) return jsonify({"AT": lxml.etree.tostring(desktop_xml, encoding="unicode")})
@app.route('/screen_size', methods=['POST']) @app.route('/screen_size', methods=['POST'])
def get_screen_size(): def get_screen_size():
d = display.Display() d = display.Display()
@@ -562,5 +566,43 @@ def open_file():
return f"Failed to open {path}. Error: {e}", 500 return f"Failed to open {path}. Error: {e}", 500
@app.route("/setup/activate_window", methods=['POST'])
def activate_window():
data = request.json
window_name = data.get('window_name', None)
os_name = platform.system()
if os_name == 'Windows':
import pygetwindow as gw
try:
# Find the VS Code window
vscode_window = gw.getWindowsWithTitle(window_name)[0]
# Activate the window, bringing it to the front
vscode_window.activate()
except IndexError:
return "VS Code window not found.", 404
elif os_name == 'Darwin':
import pygetwindow as gw
try:
# Find the VS Code window
vscode_window = gw.getWindowsWithTitle(window_name)[0]
# Un-minimize the window and then bring it to the front
vscode_window.unminimize()
vscode_window.activate()
except IndexError:
return "VS Code window not found.", 404
elif os_name == 'Linux':
# Attempt to activate VS Code window using wmctrl
subprocess.Popen(["wmctrl", "-a", window_name])
else:
return f"Operating system {os_name} not supported.", 400
return "File opened successfully", 200
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0") app.run(debug=True, host="0.0.0.0")

View File

@@ -1,7 +1,7 @@
{ {
"id": "0ed39f63-6049-43d4-ba4d-5fa2fe04a951", "id": "0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"snapshot": "vscode", "snapshot": "vscode",
"instruction": "Could you help me find and replace \"text\" with \"test\" in this file?", "instruction": "Please change all the places that say \"text\" to \"test\" in this document for me.",
"source": "https://www.quora.com/How-do-you-find-and-replace-text-in-Visual-Studio-Code", "source": "https://www.quora.com/How-do-you-find-and-replace-text-in-Visual-Studio-Code",
"config": [ "config": [
{ {
@@ -20,6 +20,12 @@
"parameters": { "parameters": {
"command": ["code", "Desktop/vscode_replace_text.txt"] "command": ["code", "Desktop/vscode_replace_text.txt"]
} }
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
} }
], ],
"trajectory": "trajectories/0ed39f63-6049-43d4-ba4d-5fa2fe04a951", "trajectory": "trajectories/0ed39f63-6049-43d4-ba4d-5fa2fe04a951",

View File

@@ -1,19 +1,50 @@
{ {
"id": "53ad5833-3455-407b-bbc6-45b4c79ab8fb", "id": "53ad5833-3455-407b-bbc6-45b4c79ab8fb",
"snapshot": "vscode", "snapshot": "vscode",
"instruction": "Could you help me open the project at /home/user/project?", "instruction": "I'd like the \"project\" in the \"user\" folder under \"home\" to be opened with VS Code, please.",
"source": "https://www.youtube.com/watch?v=VqCgcpAypFQ", "source": "https://www.youtube.com/watch?v=VqCgcpAypFQ",
"config": [ "config": [
{ {
"type": "launch", "type": "launch",
"parameters": { "parameters": {
"command": ["code"] "command": [
"code"
]
} }
}, },
{ {
"type": "command", "type": "command",
"parameters": { "parameters": {
"command": ["mkdir", "-p", "/home/user/project"] "command": [
"mkdir",
"-p",
"/home/user/project/.vscode"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1akdsiRVdq6CUtT-FX8Dpf8ruPTq6DcFn&export=download&authuser=0&confirm=t&uuid=ce2fa96a-454e-43d9-bbe3-98553b7eed0d&at=APZUnTVw_YQ1URTvP34vrmKcw0b4:1705222451052",
"path": "/home/user/project/main.py"
},
{
"url": "https://drive.usercontent.google.com/download?id=1BkwtqtAzv_K2CrTbJZ0HbMHBffzdD9vc&export=download&authuser=0&confirm=t&uuid=28f77090-deef-49a1-b156-91317881e75e&at=APZUnTXuaR6i_3t3Prslk535GaO5:1705222457290",
"path": "/home/user/project/README.md"
},
{
"url": "https://drive.usercontent.google.com/download?id=1ea_zF2tbcXOB8w9neBV-U5xI2nnPzIw_&export=download&authuser=0&confirm=t&uuid=9cf8c5bb-a880-475c-b80b-967a0c4fbea4&at=APZUnTUdjIj80F3Mbgi72eZDTZLO:1705222462443",
"path": "/home/user/project/.vscode/settings.json"
}
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
} }
} }
], ],
@@ -22,6 +53,14 @@
"vscode" "vscode"
], ],
"evaluator": { "evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"func": "compare_config", "func": "compare_config",
"expected": { "expected": {
"type": "rule", "type": "rule",
@@ -31,6 +70,7 @@
}, },
"result": { "result": {
"type": "vscode_config", "type": "vscode_config",
"vscode_extension_command": "OpenProject",
"path": "OpenProject.txt", "path": "OpenProject.txt",
"dest": "OpenProject.txt" "dest": "OpenProject.txt"
} }

View File

@@ -16,9 +16,15 @@
} }
}, },
{ {
"type": "open", "type": "launch",
"parameters": { "parameters": {
"path": "Desktop/main.py" "command": ["code", "Desktop/main.py"]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
} }
} }
], ],
@@ -36,6 +42,7 @@
}, },
"result": { "result": {
"type": "vscode_config", "type": "vscode_config",
"vscode_extension_command": "GetBreakPoint",
"path": "GetBreakPoint.txt", "path": "GetBreakPoint.txt",
"dest": "GetBreakPoint.txt" "dest": "GetBreakPoint.txt"
} }

View File

@@ -3,7 +3,22 @@
"snapshot": "vscode", "snapshot": "vscode",
"instruction": "Could you help me change the color theme to Dark?", "instruction": "Could you help me change the color theme to Dark?",
"source": "https://www.youtube.com/watch?v=ORrELERGIHs", "source": "https://www.youtube.com/watch?v=ORrELERGIHs",
"config": [], "config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/982d12a5-beab-424f-8d38-d2a48429e511", "trajectory": "trajectories/982d12a5-beab-424f-8d38-d2a48429e511",
"related_apps": [ "related_apps": [
"vscode" "vscode"
@@ -11,11 +26,14 @@
"evaluator": { "evaluator": {
"func": "compare_config", "func": "compare_config",
"expected": { "expected": {
"type": "string", "type": "rule",
"string": "2" "rules": {
"expect": "2"
}
}, },
"result": { "result": {
"type": "vscode_config", "type": "vscode_config",
"vscode_extension_command": "GetColorTheme",
"path": "GetColorTheme.txt", "path": "GetColorTheme.txt",
"dest": "GetColorTheme.txt" "dest": "GetColorTheme.txt"
} }

View File

@@ -3,20 +3,44 @@
"snapshot": "vscode", "snapshot": "vscode",
"instruction": "Help me install the extension Python.", "instruction": "Help me install the extension Python.",
"source": "https://www.youtube.com/watch?v=VqCgcpAypFQ", "source": "https://www.youtube.com/watch?v=VqCgcpAypFQ",
"config": [], "config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/eabc805a-bfcf-4460-b250-ac92135819f6", "trajectory": "trajectories/eabc805a-bfcf-4460-b250-ac92135819f6",
"related_apps": [ "related_apps": [
"vscode" "vscode"
], ],
"evaluator": { "evaluator": {
"func": "compare_answer", "func": "is_extension_installed",
"expected": {
"type": "string",
"string": "ms-python.python\n"
},
"result": { "result": {
"type": "command_line", "type": "vm_command_line",
"command": "code --list-extensions | grep ms-python.python" "command": [
"code",
"--list-extensions",
"|",
"grep",
"ms-python.python"
]
},
"expected": {
"type": "rule",
"rules": {
"type": "contain",
"expected": "ms-python.python"
}
} }
} }
} }

36
main.py
View File

@@ -1,10 +1,10 @@
import datetime
import json import json
from desktop_env.envs.desktop_env import DesktopEnv
import logging import logging
import os import os
import sys import sys
import datetime
from desktop_env.envs.desktop_env import DesktopEnv
# Logger Configs {{{ # # Logger Configs {{{ #
logger = logging.getLogger() logger = logging.getLogger()
@@ -12,17 +12,18 @@ logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str))) file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str))) debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout) stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str))) sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO) file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG) debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO) stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG) sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter) file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter) debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter) stdout_handler.setFormatter(formatter)
@@ -39,18 +40,20 @@ logger.addHandler(sdebug_handler)
logger = logging.getLogger("desktopenv.main") logger = logging.getLogger("desktopenv.main")
def human_agent(): def human_agent():
""" """
Runs the Gym environment with human input. Runs the Gym environment with human input.
""" """
with open("evaluation_examples/examples/thunderbird/e1e75309-3ddb-4d09-92ec-de869c928143.json.nosetup", "r") as f: with open("evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json", "r") as f:
example = json.load(f) example = json.load(f)
example["snapshot"] = "Snapshot 18" example["snapshot"] = "vscode_setup"
env = DesktopEnv( path_to_vm="../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx" env = DesktopEnv(
, action_space="computer_13" path_to_vm=r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx",
, task_config=example action_space="computer_13",
task_config=example
) )
# reset the environment to certain snapshot # reset the environment to certain snapshot
observation = env.reset() observation = env.reset()
@@ -76,7 +79,8 @@ def human_agent():
# } # }
logger.info(trajectory[i]) logger.info(trajectory[i])
observation, reward, done, info = env.step(trajectory[i], pause=5) observation, reward, done, info = env.step(trajectory[i])
observation.pop("accessibility_tree")
logger.info("Observation: %s", observation) logger.info("Observation: %s", observation)
logger.info("Reward: %.2f", reward) logger.info("Reward: %.2f", reward)
logger.info("Info: %s", info) logger.info("Info: %s", info)
@@ -87,12 +91,14 @@ def human_agent():
logger.info("The episode is done.") logger.info("The episode is done.")
break break
input("PAUSING")
result = env.evaluate() result = env.evaluate()
logger.info("Result: %.2f", result) logger.info("Result: %.2f", result)
#input("PAUSING") input("PAUSING")
#env.close() # env.close()
logger.info("Environment closed.") logger.info("Environment closed.")