ver Jan10th

new Thunderbird task config
This commit is contained in:
David Chang
2024-01-10 17:36:59 +08:00
parent ec3bc3079f
commit cf5d480f44
10 changed files with 325 additions and 182 deletions

View File

@@ -1,5 +1,5 @@
import json
from typing import Any, Dict
from typing import Any, Dict, Optional
import requests
from desktop_env.envs.actions import KEYBOARD_KEYS
@@ -22,6 +22,15 @@ class PythonController:
logger.error("Failed to get screenshot. Status code: %d", response.status_code)
return None
def get_accessibility_tree(self) -> Optional[str]:
response: requests.Response = requests.get(self.http_server + "/accessibility")
if response.status_code == 200:
return response.json()["AT"]
else:
logger.error("Failed to get accessibility tree. Status code: %d", response.status_code)
return None
def get_file(self, file_path: str):
"""
Gets a file from the server.
@@ -65,7 +74,7 @@ class PythonController:
if action_type == "MOVE_TO":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.moveTo()")
self.execute_python_command("pyautogui.moveTo()")
elif "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
@@ -75,7 +84,7 @@ class PythonController:
elif action_type == "CLICK":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.click()")
self.execute_python_command("pyautogui.click()")
elif "button" in parameters and "x" in parameters and "y" in parameters:
button = parameters["button"]
x = parameters["x"]
@@ -106,7 +115,7 @@ class PythonController:
elif action_type == "MOUSE_DOWN":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.mouseDown()")
self.execute_python_command("pyautogui.mouseDown()")
elif "button" in parameters:
button = parameters["button"]
self.execute_python_command(f"pyautogui.mouseDown(button='{button}')")
@@ -115,7 +124,7 @@ class PythonController:
elif action_type == "MOUSE_UP":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.mouseUp()")
self.execute_python_command("pyautogui.mouseUp()")
elif "button" in parameters:
button = parameters["button"]
self.execute_python_command(f"pyautogui.mouseUp(button='{button}')")
@@ -124,7 +133,7 @@ class PythonController:
elif action_type == "RIGHT_CLICK":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.rightClick()")
self.execute_python_command("pyautogui.rightClick()")
elif "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
@@ -134,7 +143,7 @@ class PythonController:
elif action_type == "DOUBLE_CLICK":
if parameters == {} or None:
self.execute_python_command(f"pyautogui.doubleClick()")
self.execute_python_command("pyautogui.doubleClick()")
elif "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
@@ -200,7 +209,7 @@ class PythonController:
raise Exception(f"Unknown parameters: {parameters}")
keys = parameters["keys"]
if not isinstance(keys, list):
raise Exception(f"Keys must be a list of keys")
raise Exception("Keys must be a list of keys")
for key in keys:
if key.lower() not in KEYBOARD_KEYS:
raise Exception(f"Key must be one of {KEYBOARD_KEYS}")

View File

@@ -223,7 +223,7 @@ class DesktopEnv(gym.Env):
Evaluate whether the task is successfully completed.
"""
self.setup_controller.setup(self.evaluator["postconfig"])
self.setup_controller.setup(self.evaluator.get("postconfig", []))
result_state = self.result_getter(self, self.evaluator["result"])
expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \

View File

@@ -1,19 +1,19 @@
from typing import TypeVar, Any
from typing import Dict, List
from typing import TypeVar
#from typing import Dict, List
import platform
import subprocess
import ctypes
import os
import pyatspi
from pyatspi import Accessible, StateType
from pyatspi import Component, Document
from pyatspi import Text as ATText
from pyatspi import Value as ATValue
from pyatspi import Action as ATAction
import lxml.etree
from lxml.etree import _Element
#import pyatspi
#from pyatspi import Accessible, StateType
#from pyatspi import Component, Document
#from pyatspi import Text as ATText
#from pyatspi import Value as ATValue
#from pyatspi import Action as ATAction
#import lxml.etree
#from lxml.etree import _Element
import logging
logger = logging.getLogger("desktopenv.getters.misc")
@@ -75,146 +75,21 @@ def get_wallpaper(*args):
else:
return "Unsupported OS"
_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
, "cp": "uri:deskat:component.at-spi.gnome.org"
, "doc": "uri:deskat:document.at-spi.gnome.org"
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
, "txt": "uri:deskat:text.at-spi.gnome.org"
, "val": "uri:deskat:value.at-spi.gnome.org"
, "act": "uri:deskat:action.at-spi.gnome.org"
}
def _create_node(node: Accessible) -> _Element:
attribute_dict: Dict[str, Any] = {"name": node.name}
#def get_accessibility_tree(*args) -> _Element:
#desktop: Accessible = pyatspi.Registry.getDesktop(0)
#desktop_xml: _Element = _create_node(desktop)
#return desktop_xml
# States {{{ #
states: List[StateType] = node.getState().get_states()
for st in states:
state_name: str = StateType._enum_lookup[st]
attribute_dict[ "{{{:}}}{:}"\
.format( _accessibility_ns_map["st"]
, state_name.split("_", maxsplit=1)[1].lower()
)
] = "true"
# }}} States #
def get_accessibility_tree(env, *args) -> str:
accessibility_tree: str = env.controller.get_accessibility_tree()
logger.debug("AT@eval: %s", accessibility_tree)
return accessibility_tree
# Attributes {{{ #
attributes: List[str] = node.getAttributes()
for attrbt in attributes:
attribute_name: str
attribute_value: str
attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
attribute_dict[ "{{{:}}}{:}"\
.format( _accessibility_ns_map["attr"]
, attribute_name
)
] = attribute_value
# }}} Attributes #
# Component {{{ #
try:
component: Component = node.queryComponent()
except NotImplementedError:
pass
else:
attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_SCREEN))
attribute_dict["{{{:}}}windowcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_WINDOW))
attribute_dict["{{{:}}}parentcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_PARENT))
attribute_dict["{{{:}}}size".format(_accessibility_ns_map["cp"])] = str(component.getSize())
# }}} Component #
# Document {{{ #
try:
document: Document = node.queryDocument()
except NotImplementedError:
pass
else:
attribute_dict["{{{:}}}locale".format(_accessibility_ns_map["doc"])] = document.getLocale()
attribute_dict["{{{:}}}pagecount".format(_accessibility_ns_map["doc"])] = str(document.getPageCount())
attribute_dict["{{{:}}}currentpage".format(_accessibility_ns_map["doc"])] = str(document.getCurrentPageNumber())
for attrbt in document.getAttributes():
attribute_name: str
attribute_value: str
attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
attribute_dict[ "{{{:}}}{:}"\
.format( _accessibility_ns_map["docattr"]
, attribute_name
)
] = attribute_value
# }}} Document #
# Text {{{ #
try:
text_obj: ATText = node.queryText()
except NotImplementedError:
pass
else:
# only text shown on current screen is available
#attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
text: str = text_obj.getText(0, text_obj.characterCount)
# }}} Text #
# Selection {{{ #
try:
node.querySelection()
except NotImplementedError:
pass
else:
attribute_dict["selection"] = "true"
# }}} Selection #
# Value {{{ #
try:
value: ATValue = node.queryValue()
except NotImplementedError:
pass
else:
attribute_dict["{{{:}}}value".format(_accessibility_ns_map["val"])] = str(value.currentValue)
attribute_dict["{{{:}}}min".format(_accessibility_ns_map["val"])] = str(value.minimumValue)
attribute_dict["{{{:}}}max".format(_accessibility_ns_map["val"])] = str(value.maximumValue)
attribute_dict["{{{:}}}step".format(_accessibility_ns_map["val"])] = str(value.minimumIncrement)
# }}} Value #
# Action {{{ #
try:
action: ATAction = node.queryAction()
except NotImplementedError:
pass
else:
for i in range(action.nActions):
action_name: str = action.getName(i).replace(" ", "-")
attribute_dict[ "{{{:}}}{:}_desc"\
.format( _accessibility_ns_map["act"]
, action_name
)
] = action.getDescription(i)
attribute_dict[ "{{{:}}}{:}_kb"\
.format( _accessibility_ns_map["act"]
, action_name
)
] = action.getKeyBinding(i)
# }}} Action #
xml_node = lxml.etree.Element( node.getRoleName().replace(" ", "-")
, attrib=attribute_dict
, nsmap=_accessibility_ns_map
)
if "text" in locals() and len(text)>0:
xml_node.text = text
for ch in node:
xml_node.append(_create_node(ch))
return xml_node
def get_accessibility_tree(*args) -> _Element:
desktop: Accessible = pyatspi.Registry.getDesktop(0)
desktop_xml: _Element = _create_node(desktop)
return desktop_xml
if __name__ == "__main__":
import sys
with open(sys.argv[1], "w") as f:
f.write( lxml.etree.tostring( get_accessibility_tree()
, encoding="unicode"
, pretty_print=True
)
)
#if __name__ == "__main__":
#import sys
#with open(sys.argv[1], "w") as f:
#f.write( lxml.etree.tostring( get_accessibility_tree()
#, encoding="unicode"
#, pretty_print=True
#)
#)

View File

@@ -6,4 +6,4 @@ from .docs import is_first_line_centered, check_file_exists, compare_contains_im
from .pdf import check_pdf_pages
from .libreoffice import check_libre_locale
#from .vlc import is_vlc_playing
from .general import check_csv
from .general import check_csv, check_accessibility_tree

View File

@@ -1,5 +1,16 @@
import csv
import lxml.etree
from lxml.etree import _Element
from lxml.cssselect import CSSSelector
from typing import Dict, List
from typing import Callable, Any
from numbers import Number
import operator
from rapidfuzz import fuzz
import functools
def _match_record(pattern: Dict[str, str], item: Dict[str, str]) -> float:
return all(k in item and item[k]==val for k, val in pattern.items())
@@ -28,3 +39,57 @@ def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
expect_metrics[i] = expect_metrics[i] or _match_record(r, rcd)
unexpect_metric = unexpect_metric and all(_match_record(r, rcd) for r in rules.get("unexpect", []))
return float(all(expect_metrics) and unexpect_metric)
_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
, "cp": "uri:deskat:component.at-spi.gnome.org"
, "doc": "uri:deskat:document.at-spi.gnome.org"
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
, "txt": "uri:deskat:text.at-spi.gnome.org"
, "val": "uri:deskat:value.at-spi.gnome.org"
, "act": "uri:deskat:action.at-spi.gnome.org"
}
def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
"""
Args:
result (str): XML of GNOME Accessibility Tree
rules (Dict[str, Any]): dict like
{
"selectors": list of str as CSS selectors, will be connected by ", "
to form a composite selector. Only one from `selectors` and
`xpath` is needed. If both are present, `xpath` takes the
priority.
"xpath": str as xpath. Only one from `selectors` and `xpath` is
needed. If both are present, `xpath` takes the priority.
"text": str as the expected text content of the selected element.
"exact": bool specifying whether exact match or fuzzy match should
be performed. defaults to True
}
Returns:
float
"""
at: _Element = lxml.etree.fromstring(result)
if "xpath" in rules:
elements: List[_Element] = at.xpath(rules["xpath"], namespaces=_accessibility_ns_map)
elif "selectors" in rules:
selector = CSSSelector(", ".join(rules["selectors"]), namespaces=_accessibility_ns_map)
elements: List[_Element] = selector(at)
else:
raise ValueError("At least one of xpath and selectors is required")
if len(elements)==0:
return 0.
if "text" in rules:
match_func: Callable[[str], Number] = functools.partial( operator.eq if rules["exact"] else fuzz.ratio
, rules["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
else:
match_score = 1.
return float(match_score)

View File

@@ -2,14 +2,26 @@ import os
from pathlib import Path
import platform
import subprocess
import requests
from .pyxcursor import Xcursor
from pyxcursor import Xcursor
# import Xlib.display
import pyautogui
# from PIL import ImageGrab, Image
from PIL import Image
import lxml.etree
from lxml.etree import _Element
import pyatspi
from pyatspi import Accessible, StateType
from pyatspi import Component, Document
from pyatspi import Text as ATText
from pyatspi import Value as ATValue
from pyatspi import Action as ATAction
import requests
from flask import Flask, request, jsonify, send_file
from typing import List
from typing import List, Dict
from typing import Any
app = Flask(__name__)
@@ -100,6 +112,141 @@ def capture_screen_with_cursor():
return send_file(file_path, mimetype='image/png')
_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
, "cp": "uri:deskat:component.at-spi.gnome.org"
, "doc": "uri:deskat:document.at-spi.gnome.org"
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
, "txt": "uri:deskat:text.at-spi.gnome.org"
, "val": "uri:deskat:value.at-spi.gnome.org"
, "act": "uri:deskat:action.at-spi.gnome.org"
}
def _create_node(node: Accessible) -> _Element:
attribute_dict: Dict[str, Any] = {"name": node.name}
# States {{{ #
states: List[StateType] = node.getState().get_states()
for st in states:
state_name: str = StateType._enum_lookup[st]
attribute_dict[ "{{{:}}}{:}"\
.format( _accessibility_ns_map["st"]
, state_name.split("_", maxsplit=1)[1].lower()
)
] = "true"
# }}} States #
# Attributes {{{ #
attributes: List[str] = node.getAttributes()
for attrbt in attributes:
attribute_name: str
attribute_value: str
attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
attribute_dict[ "{{{:}}}{:}"\
.format( _accessibility_ns_map["attr"]
, attribute_name
)
] = attribute_value
# }}} Attributes #
# Component {{{ #
try:
component: Component = node.queryComponent()
except NotImplementedError:
pass
else:
attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_SCREEN))
attribute_dict["{{{:}}}windowcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_WINDOW))
attribute_dict["{{{:}}}parentcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_PARENT))
attribute_dict["{{{:}}}size".format(_accessibility_ns_map["cp"])] = str(component.getSize())
# }}} Component #
# Document {{{ #
try:
document: Document = node.queryDocument()
except NotImplementedError:
pass
else:
attribute_dict["{{{:}}}locale".format(_accessibility_ns_map["doc"])] = document.getLocale()
attribute_dict["{{{:}}}pagecount".format(_accessibility_ns_map["doc"])] = str(document.getPageCount())
attribute_dict["{{{:}}}currentpage".format(_accessibility_ns_map["doc"])] = str(document.getCurrentPageNumber())
for attrbt in document.getAttributes():
attribute_name: str
attribute_value: str
attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
attribute_dict[ "{{{:}}}{:}"\
.format( _accessibility_ns_map["docattr"]
, attribute_name
)
] = attribute_value
# }}} Document #
# Text {{{ #
try:
text_obj: ATText = node.queryText()
except NotImplementedError:
pass
else:
# only text shown on current screen is available
#attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
text: str = text_obj.getText(0, text_obj.characterCount)
# }}} Text #
# Selection {{{ #
try:
node.querySelection()
except NotImplementedError:
pass
else:
attribute_dict["selection"] = "true"
# }}} Selection #
# Value {{{ #
try:
value: ATValue = node.queryValue()
except NotImplementedError:
pass
else:
attribute_dict["{{{:}}}value".format(_accessibility_ns_map["val"])] = str(value.currentValue)
attribute_dict["{{{:}}}min".format(_accessibility_ns_map["val"])] = str(value.minimumValue)
attribute_dict["{{{:}}}max".format(_accessibility_ns_map["val"])] = str(value.maximumValue)
attribute_dict["{{{:}}}step".format(_accessibility_ns_map["val"])] = str(value.minimumIncrement)
# }}} Value #
# Action {{{ #
try:
action: ATAction = node.queryAction()
except NotImplementedError:
pass
else:
for i in range(action.nActions):
action_name: str = action.getName(i).replace(" ", "-")
attribute_dict[ "{{{:}}}{:}_desc"\
.format( _accessibility_ns_map["act"]
, action_name
)
] = action.getDescription(i)
attribute_dict[ "{{{:}}}{:}_kb"\
.format( _accessibility_ns_map["act"]
, action_name
)
] = action.getKeyBinding(i)
# }}} Action #
xml_node = lxml.etree.Element( node.getRoleName().replace(" ", "-")
, attrib=attribute_dict
, nsmap=_accessibility_ns_map
)
if "text" in locals() and len(text)>0:
xml_node.text = text
for ch in node:
xml_node.append(_create_node(ch))
return xml_node
@app.route("/accessibility", methods=["GET"])
def get_accessibility_tree():
desktop: Accessible = pyatspi.Registry.getDesktop(0)
desktop_xml: _Element = _create_node(desktop)
return jsonify({"AT": lxml.etree.tostring(desktop_xml, encoding="unicode")})
@app.route('/file', methods=['POST'])
def get_file():

View File

@@ -3,10 +3,57 @@
"snapshot": "thunderbird",
"instruction": "Could you help me open up the Thunderbird profile manager utility?",
"source": "https://www.quora.com/How-do-I-open-a-Thunderbird-profile-manager-utility",
"config": [],
"trajectory": "trajectories/",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
"path": "/home/user/thunderbird-profile.tar.gz"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"-xzv",
"--recursive-unlink",
"-f",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/user/"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"/usr/bin/thunderbird"
]
}
}
],
"trajectory": "trajectories/12086550-11c0-466b-b367-1d9e75b3910e",
"related_apps": [
"thunderbird"
],
"evaluator": "evaluation_dir"
"evaluator": {
"result": {
"type": "accessibility_tree"
},
"expected": {
"type": "rule",
"rules": {
"selectors": [
"application[name=Thunderbird] page-tab-list[attr|id=\"tabmail-tabs\"]>page-tab[name=\"About Profiles\"]"
]
}
},
"func": "check_accessibility_tree"
}
}

View File

@@ -10,7 +10,7 @@
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
"path": "/home/david/thunderbird-profile.tar.gz"
"path": "/home/user/thunderbird-profile.tar.gz"
}
]
}
@@ -23,9 +23,9 @@
"-xzv",
"--recursive-unlink",
"-f",
"/home/david/thunderbird-profile.tar.gz",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/david/"
"/home/user/"
]
}
},
@@ -50,7 +50,7 @@
"files": [
{
"url": "https://raw.githubusercontent.com/unode/firefox_decrypt/main/firefox_decrypt.py",
"path": "/home/david/firefox_decrypt.py"
"path": "/home/user/firefox_decrypt.py"
}
]
}
@@ -60,8 +60,8 @@
"parameters": {
"command": [
"python3",
"/home/david/firefox_decrypt.py",
"/home/david/.thunderbird",
"/home/user/firefox_decrypt.py",
"/home/user/.thunderbird",
"-n",
"-c",
"2",

View File

@@ -10,7 +10,7 @@
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1EHLRWzBCOsyERkSMUnTF2pnsR0n6ZvtR&export=download&authuser=0&confirm=t&uuid=de09bd5e-bef8-499a-b599-c642af190e10&at=APZUnTXqOsQkxl0zMSX6R1Sgp_v3:1704362491712",
"path": "/home/david/thunderbird-profile.tar.gz"
"path": "/home/user/thunderbird-profile.tar.gz"
}
]
}
@@ -23,9 +23,9 @@
"-xzv",
"--recursive-unlink",
"-f",
"/home/david/thunderbird-profile.tar.gz",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/david/"
"/home/user/"
]
}
},
@@ -50,7 +50,7 @@
"files": [
{
"url": "https://raw.githubusercontent.com/unode/firefox_decrypt/main/firefox_decrypt.py",
"path": "/home/david/firefox_decrypt.py"
"path": "/home/user/firefox_decrypt.py"
}
]
}
@@ -60,8 +60,8 @@
"parameters": {
"command": [
"python3",
"/home/david/firefox_decrypt.py",
"/home/david/.thunderbird",
"/home/user/firefox_decrypt.py",
"/home/user/.thunderbird",
"-n",
"-c",
"2",

View File

@@ -44,11 +44,11 @@ def human_agent():
Runs the Gym environment with human input.
"""
with open("evaluation_examples/examples/thunderbird/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json", "r") as f:
with open("evaluation_examples/examples/thunderbird/12086550-11c0-466b-b367-1d9e75b3910e.json", "r") as f:
example = json.load(f)
example["snapshot"] = "Snapshot 13"
example["snapshot"] = "Snapshot 9"
env = DesktopEnv( path_to_vm="/home/david/vmware/KUbuntu 64-bit/KUbuntu 64-bit.vmx"
env = DesktopEnv( path_to_vm="../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
, action_space="computer_13"
, task_config=example
)