Merge branch 'zdy'

This commit is contained in:
David Chang
2024-01-30 21:14:57 +08:00
3 changed files with 287 additions and 16 deletions

View File

@@ -9,18 +9,29 @@ from typing import List, Dict, Tuple
import Xlib import Xlib
import lxml.etree import lxml.etree
import pyatspi
import pyautogui import pyautogui
import requests import requests
from PIL import Image from PIL import Image
from Xlib import display, X from Xlib import display, X
from flask import Flask, request, jsonify, send_file, abort # , send_from_directory from flask import Flask, request, jsonify, send_file, abort # , send_from_directory
from lxml.etree import _Element from lxml.etree import _Element
from pyatspi import Accessible, StateType, STATE_SHOWING
from pyatspi import Action as ATAction platform_name: str = platform.system()
from pyatspi import Component, Document
from pyatspi import Text as ATText if platform_name=="Linux":
from pyatspi import Value as ATValue import pyatspi
from pyatspi import Accessible, StateType, STATE_SHOWING
from pyatspi import Action as ATAction
from pyatspi import Component, Document
from pyatspi import Text as ATText
from pyatspi import Value as ATValue
BaseWrapper = Any
elif platform_name=="Windows":
from pywinauto import Desktop
from pywinauto.base_wrapper import BaseWrapper
Accessible = Any
from pyxcursor import Xcursor from pyxcursor import Xcursor
@@ -181,18 +192,20 @@ def get_terminal_output():
return jsonify({"status": "error", "message": str(e)}), 500 return jsonify({"status": "error", "message": str(e)}), 500
_accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org" _accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
, "attr": "uri:deskat:attributes.at-spi.gnome.org" , "attr": "uri:deskat:attributes.at-spi.gnome.org"
, "cp": "uri:deskat:component.at-spi.gnome.org" , "cp": "uri:deskat:component.at-spi.gnome.org"
, "doc": "uri:deskat:document.at-spi.gnome.org" , "doc": "uri:deskat:document.at-spi.gnome.org"
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org" , "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
, "txt": "uri:deskat:text.at-spi.gnome.org" , "txt": "uri:deskat:text.at-spi.gnome.org"
, "val": "uri:deskat:value.at-spi.gnome.org" , "val": "uri:deskat:value.at-spi.gnome.org"
, "act": "uri:deskat:action.at-spi.gnome.org" , "act": "uri:deskat:action.at-spi.gnome.org"
} , "win": "uri:deskat:uia.windows.microsoft.org"
}
def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = None) -> _Element: def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = None) -> _Element:
# function _create_atspi_node {{{ #
if node.getRoleName() == "document spreadsheet": if node.getRoleName() == "document spreadsheet":
flag = "calc" flag = "calc"
if node.getRoleName() == "application" and node.name=="Thunderbird": if node.getRoleName() == "application" and node.name=="Thunderbird":
@@ -381,6 +394,175 @@ def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = N
break break
xml_node.append(_create_atspi_node(ch, depth+1, flag)) xml_node.append(_create_atspi_node(ch, depth+1, flag))
return xml_node return xml_node
# }}} function _create_atspi_node #
def _create_pywinauto_node(node: BaseWrapper, depth: int = 0, flag: Optional[str] = None) -> _Element:
# function _create_pywinauto_node {{{ #
#element_info: ElementInfo = node.element_info
attribute_dict: Dict[str, Any] = {"name": node.element_info.name}
# States {{{ #
attribute_dict["{{{:}}}enabled".format(_accessibility_ns_map["st"])] = str(node.is_enabled()).lower()
attribute_dict["{{{:}}}visible".format(_accessibility_ns_map["st"])] = str(node.is_visible()).lower()
attribute_dict["{{{:}}}active".format(_accessibility_ns_map["st"])] = str(node.is_active()).lower()
if hasattr(node, "is_minimized"):
try:
attribute_dict["{{{:}}}minimized".format(_accessibility_ns_map["st"])] = str(node.is_minimized()).lower()
except:
pass
if hasattr(node, "is_maximized"):
try:
attribute_dict["{{{:}}}maximized".format(_accessibility_ns_map["st"])] = str(node.is_maximized()).lower()
except:
pass
if hasattr(node, "is_normal"):
try:
attribute_dict["{{{:}}}normal".format(_accessibility_ns_map["st"])] = str(node.is_normal()).lower()
except:
pass
if hasattr(node, "is_unicode"):
try:
attribute_dict["{{{:}}}unicode".format(_accessibility_ns_map["st"])] = str(node.is_unicode()).lower()
except:
pass
if hasattr(node, "is_collapsed"):
try:
attribute_dict["{{{:}}}collapsed".format(_accessibility_ns_map["st"])] = str(node.is_collapsed()).lower()
except:
pass
if hasattr(node, "is_checkable"):
try:
attribute_dict["{{{:}}}checkable".format(_accessibility_ns_map["st"])] = str(node.is_checkable()).lower()
except:
pass
if hasattr(node, "is_checked"):
try:
attribute_dict["{{{:}}}checked".format(_accessibility_ns_map["st"])] = str(node.is_checked()).lower()
except:
pass
if hasattr(node, "is_focused"):
try:
attribute_dict["{{{:}}}focused".format(_accessibility_ns_map["st"])] = str(node.is_focused()).lower()
except:
pass
if hasattr(node, "is_keyboard_focused"):
try:
attribute_dict["{{{:}}}keyboard_focused".format(_accessibility_ns_map["st"])] = str(node.is_keyboard_focused()).lower()
except:
pass
if hasattr(node, "is_selected"):
try:
attribute_dict["{{{:}}}selected".format(_accessibility_ns_map["st"])] = str(node.is_selected()).lower()
except:
pass
if hasattr(node, "is_selection_required"):
try:
attribute_dict["{{{:}}}selection_required".format(_accessibility_ns_map["st"])] = str(node.is_selection_required()).lower()
except:
pass
if hasattr(node, "is_pressable"):
try:
attribute_dict["{{{:}}}pressable".format(_accessibility_ns_map["st"])] = str(node.is_pressable()).lower()
except:
pass
if hasattr(node, "is_pressed"):
try:
attribute_dict["{{{:}}}pressed".format(_accessibility_ns_map["st"])] = str(node.is_pressed()).lower()
except:
pass
if hasattr(node, "is_expanded"):
try:
attribute_dict["{{{:}}}expanded".format(_accessibility_ns_map["st"])] = str(node.is_expanded()).lower()
except:
pass
if hasattr(node, "is_editable"):
try:
attribute_dict["{{{:}}}editable".format(_accessibility_ns_map["st"])] = str(node.is_editable()).lower()
except:
pass
# }}} States #
# Component {{{ #
rectangle = node.rectangle()
attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = "({:d}, {:d})".format(rectangle.left, rectangle.top)
attribute_dict["{{{:}}}size".format(_accessibility_ns_map["cp"])] = "({:d}, {:d})".format(rectangle.width(), rectangle.height())
# }}} Component #
# Text {{{ #
text: str = node.window_text()
if text==attribute_dict["name"]:
text = ""
#if hasattr(node, "texts"):
#texts: List[str] = node.texts()[1:]
#texts: Iterable[str] = map(lambda itm: itm if isinstance(itm, str) else "".join(itm), texts)
#text += "\n".join(texts)
#text = text.strip()
# }}} Text #
# Selection {{{ #
if hasattr(node, "select"):
attribute_dict["selection"] = "true"
# }}} Selection #
# Value {{{ #
if hasattr(node, "get_step"):
attribute_dict["{{{:}}}step".format(_accessibility_ns_map["val"])] = str(node.get_step())
if hasattr(node, "value"):
attribute_dict["{{{:}}}value".format(_accessibility_ns_map["val"])] = str(node.value())
if hasattr(node, "get_value"):
attribute_dict["{{{:}}}value".format(_accessibility_ns_map["val"])] = str(node.get_value())
elif hasattr(node, "get_position"):
attribute_dict["{{{:}}}value".format(_accessibility_ns_map["val"])] = str(node.get_position())
if hasattr(node, "min_value"):
attribute_dict["{{{:}}}min".format(_accessibility_ns_map["val"])] = str(node.min_value())
elif hasattr(node, "get_range_min"):
attribute_dict["{{{:}}}min".format(_accessibility_ns_map["val"])] = str(node.get_range_min())
if hasattr(node, "max_value"):
attribute_dict["{{{:}}}max".format(_accessibility_ns_map["val"])] = str(node.max_value())
elif hasattr(node, "get_range_max"):
attribute_dict["{{{:}}}max".format(_accessibility_ns_map["val"])] = str(node.get_range_max())
# }}} Value #
attribute_dict["{{{:}}}class".format(_accessibility_ns_map["win"])] = str(type(node))
node_role_name: str = node.class_name().lower().replace(" ", "-")
node_role_name = "".join( map( lambda ch: ch if ch.isidentifier()\
or ch in {"-"}\
or ch.isalnum()
else "-"
, node_role_name
)
)
if node_role_name.strip() == "":
node_role_name = "unknown"
xml_node = lxml.etree.Element(
node_role_name,
attrib=attribute_dict,
nsmap=_accessibility_ns_map
)
if text is not None and len(text)>0 and text!=attribute_dict["name"]:
xml_node.text = text
# HYPERPARAMETER
if depth==50:
logger.warning("Max depth reached")
#print("Max depth reached")
return xml_node
for i, ch in enumerate(node.children()):
# HYPERPARAMETER
if i>=2048:
logger.warning("Max width reached")
#print("Max width reached")
break
xml_node.append(_create_pywinauto_node(ch, depth+1, flag))
return xml_node
# }}} function _create_pywinauto_node #
@app.route("/accessibility", methods=["GET"]) @app.route("/accessibility", methods=["GET"])
def get_accessibility_tree(): def get_accessibility_tree():
@@ -392,7 +574,15 @@ def get_accessibility_tree():
desktop_xml: _Element = _create_atspi_node(desktop, 0) desktop_xml: _Element = _create_atspi_node(desktop, 0)
return jsonify({"AT": lxml.etree.tostring(desktop_xml, encoding="unicode")}) return jsonify({"AT": lxml.etree.tostring(desktop_xml, encoding="unicode")})
# TODO: Windows AT may be read through `pywinauto` module, however, two different backends `win32` and `uia` are supported and different results may be returned elif os_name == "Windows":
# Windows AT may be read through `pywinauto` module, however, two different backends `win32` and `uia` are supported and different results may be returned
desktop: Desktop = Desktop(backend="uia")
xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map)
for wnd in desktop.windows():
logger.debug("Win UIA AT parsing: %s(%d)", wnd.element_info.name, len(wnd.children()))
node: _Element = _create_pywinauto_node(wnd, 1)
xml_node.append(node)
return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")})
else: else:
return "Currently not implemented for platform {:}.".format(platform.platform()), 500 return "Currently not implemented for platform {:}.".format(platform.platform()), 500

View File

@@ -0,0 +1,76 @@
{
"id": "01b269ae-2111-4a07-81fd-3fcd711993b0",
"snapshot": "libreoffice_calc",
"instruction": "Fill all the blank cells with the value in the cell above it",
"source": "https://www.youtube.com/shorts/VrUzPTIwQ04",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1FuOZ-5YoKgLLwl_oZd4R3D8pZACf_ukS&export=download&authuser=0&confirm=t&uuid=2051e7a6-5930-4cef-8d77-20ebf66ec6e6&at=APZUnTX1fXqlxy6rluq-Kw-LUhS5:1705919461032",
"path": "C:\\Users\\user\\Student_Level_Fill_Blank.xlsx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "C:\\Users\\user\\Student_Level_Fill_Blank.xlsx"
}
}
],
"trajectory": "trajectories/01b269ae-2111-4a07-81fd-3fcd711993b0",
"related_apps": [
"msoffice_excel"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Student_Level_Fill_Blank.xlsx - Excel",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");"
]
}
}
],
"func": "compare_table",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1HTle3vgdZSjJIK_wjXyjtWwbiYJeguwv&export=download&authuser=0&confirm=t&uuid=c5d0868b-bed2-48fb-949b-8a9f3f61e8cf&at=APZUnTVqS9CTZFJ1rPqCGQPDCv3p:1705919542916",
"dest": "Student_Level_Fill_Blank_gold.xlsx"
},
"result": {
"type": "vm_file",
"path": "C:\\Users\\user\\Student_Level_Fill_Blank.xlsx",
"dest": "Student_Level_Fill_Blank.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -45,6 +45,10 @@ def linearize_accessibility_tree(accessibility_tree):
linearized_accessibility_tree += node.attrib.get('name') + "\t" linearized_accessibility_tree += node.attrib.get('name') + "\t"
if node.text: if node.text:
linearized_accessibility_tree += (node.text if '"' not in node.text else '"{:}"'.format(node.text.replace('"', '""'))) + "\t" linearized_accessibility_tree += (node.text if '"' not in node.text else '"{:}"'.format(node.text.replace('"', '""'))) + "\t"
elif node.get("{uri:deskat:uia.windows.microsoft.org}class").endswith("EditWrapper")\
and node.get("{uri:deskat:value.at-spi.gnome.org}value"):
text: str = node.get("{uri:deskat:value.at-spi.gnome.org}value")
linearized_accessibility_tree += (text if '"' not in text else '"{:}"'.format(text.replace('"', '""'))) + "\t"
else: else:
linearized_accessibility_tree += '""\t' linearized_accessibility_tree += '""\t'
linearized_accessibility_tree += node.attrib.get( linearized_accessibility_tree += node.attrib.get(
@@ -278,6 +282,7 @@ class GPT4v_Agent:
elif self.exp in ["som", "seeact"]: elif self.exp in ["som", "seeact"]:
_screenshot = previous_obs["screenshot"] _screenshot = previous_obs["screenshot"]
_linearized_accessibility_tree = previous_obs["accessibility_tree"] _linearized_accessibility_tree = previous_obs["accessibility_tree"]
logger.debug("LINEAR AT: %s", _linearized_accessibility_tree)
messages.append({ messages.append({
"role": "user", "role": "user",