diff --git a/.gitignore b/.gitignore index 22367f9..df58997 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,8 @@ branch_flag branch-config *.syncthing.*.tmp cache +version.folder +at_processing test.xlsx test2.xlsx diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 4d9763b..f691724 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -276,12 +276,12 @@ def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = N # only text shown on current screen is available # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount) text: str = text_obj.getText(0, text_obj.characterCount) - if flag=="thunderbird": - # appeard in thunderbird (uFFFC), "Object Replacement Character" in - # Unicode, "used as placeholder in text for an otherwise - # unspecified object; uFFFD is another "Replacement Character", - # just in case - text = text.replace("\ufffc", "").replace("\ufffd", "") + #if flag=="thunderbird": + # appeard in thunderbird (uFFFC) (not only in thunderbird), "Object + # Replacement Character" in Unicode, "used as placeholder in text for + # an otherwise unspecified object; uFFFD is another "Replacement + # Character", just in case + text = text.replace("\ufffc", "").replace("\ufffd", "") # }}} Text # # Selection {{{ # diff --git a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json index be80232..34d0353 100644 --- a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json +++ b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json @@ -57,7 +57,7 @@ { "type": "launch", "parameters": { - "command": ["nautilus"] + "command": ["nautilus", "/home/user/Documents/Finance"] } } ], diff --git a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py index 7ab439f..7c45ec3 100644 --- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py +++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py @@ -25,35 +25,42 @@ def find_leaf_nodes(xlm_file_str): return leaf_nodes -def filter_nodes(nodes): +def filter_nodes(nodes, platform="ubuntu"): filtered_nodes = [] for node in nodes: - if not node.get('{uri:deskat:state.at-spi.gnome.org}visible', None) == 'true': - # Not visible - continue - # Check if the node is a 'panel' - if node.tag == 'panel': - # Check if the 'panel' represents an interactive element - # or if it has certain attributes that are of interest. - # Add your conditions here... - if node.get('{uri:deskat:state.at-spi.gnome.org}focusable', 'false') == 'true': - filtered_nodes.append(node) - elif node.tag == 'text': - continue - elif node.get("name") == "" and node.text is None: - continue - else: - coords = tuple( - map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', '))) - if coords[0] < 0 or coords[1] < 0: - continue - size = tuple( - map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', '))) - if size[0] <= 0 or size[1] <= 0: - continue - # Node is not a 'panel', add to the list. - filtered_nodes.append(node) + if node.tag.startswith("document")\ + or node.tag.endswith("item")\ + or node.tag.endswith("button")\ + or node.tag.endswith("heading")\ + or node.tag.endswith("label")\ + or node.tag.endswith("bar")\ + or node.tag.endswith("searchbox")\ + or node.tag.endswith("textbox")\ + or node.tag.endswith("link")\ + or node.tag.endswith("tabelement")\ + or node.tag.endswith("textfield")\ + or node.tag.endswith("textarea")\ + or node.tag in [ "alert", "canvas", "check-box" + , "combo-box", "entry", "icon" + , "image", "paragraph" + , "section", "slider", "static" + , "table-cell", "terminal", "text" + , "netuiribbontab", "start", "trayclockwclass" + , "traydummysearchcontrol", "uiimage", "uiproperty" + ]: + if ( platform=="ubuntu"\ + and node.get("{{{:}}}showing".format("uri:deskat:state.at-spi.gnome.org"), "false")=="true"\ + and node.get("{{{:}}}visible".format("uri:deskat:state.at-spi.gnome.org"), "false")=="true"\ + or platform=="windows"\ + and node.get("{{{:}}}visible".format("uri:deskat:state.at-spi.gnome.org"), "false")=="true"\ + )\ + and node.get("{{{:}}}enabled".format("uri:deskat:state.at-spi.gnome.org"), "false")=="true"\ + and (node.get("name", "") != "" or node.text is not None and len(node.text)>0): + coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format("uri:deskat:component.at-spi.gnome.org"))) + sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format("uri:deskat:component.at-spi.gnome.org"))) + if coordinates[0]>0 and coordinates[1]>0 and sizes[0]>0 and sizes[1]>0: + filtered_nodes.append(node) return filtered_nodes @@ -134,12 +141,14 @@ def print_nodes_with_indent(nodes, indent=0): if __name__ == '__main__': - with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f: - xml_file_str = f.read() + import json + with open('2.json', 'r', encoding='utf-8') as f: + xml_file_str = json.load(f)["AT"] filtered_nodes = filter_nodes(find_leaf_nodes(xml_file_str)) print(len(filtered_nodes)) - masks = draw_bounding_boxes(filtered_nodes, 'screenshot.png', - 'chrome_desktop_example_1_tagged_remove.png', ) + masks = draw_bounding_boxes( filtered_nodes, '2.png' + , '2.a.png' + ) # print(masks) print(len(masks))