diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 357664c..83d1c67 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -182,6 +182,11 @@ _accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org" def _create_atspi_node(node: Accessible, depth: int, flag: Optional[str] = None) -> _Element: + if node.getRoleName() == "document spreadsheet": + flag = "calc" + if node.getRoleName() == "application" and node.name=="Thunderbird": + flag = "thunderbird" + attribute_dict: Dict[str, Any] = {"name": node.name} # States {{{ # @@ -247,6 +252,12 @@ def _create_atspi_node(node: Accessible, depth: int, flag: Optional[str] = None) # only text shown on current screen is available # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount) text: str = text_obj.getText(0, text_obj.characterCount) + if flag=="thunderbird": + # appeard in thunderbird (uFFFC), "Object Replacement Character" in + # Unicode, "used as placeholder in text for an otherwise + # unspecified object; uFFFD is another "Replacement Character", + # just in case + text = text.replace("\ufffc", "").replace("\ufffd", "") # }}} Text # # Selection {{{ # @@ -303,18 +314,11 @@ def _create_atspi_node(node: Accessible, depth: int, flag: Optional[str] = None) if "text" in locals() and len(text) > 0: xml_node.text = text - # HACK: libreoffice has a problem -> billions of children for parent with RoleName "document spreadsheet" - #if node.getRoleName() == "document spreadsheet": - #return xml_node # HYPERPARAMETER if depth==50: logger.warning("Max depth reached") return xml_node - # HACK: libreoffice has a problem -> billions of children for parent with RoleName "document spreadsheet" - if node.getRoleName() == "document spreadsheet": - flag = "calc" - if flag=="calc" and node_role_name=="table": # Maximum column: 1024 if ver<=7.3 else 16384 # Maximum row: 104 8576 diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py index 423e554..323e08f 100644 --- a/mm_agents/gpt_4v_agent.py +++ b/mm_agents/gpt_4v_agent.py @@ -33,12 +33,13 @@ def linearize_accessibility_tree(accessibility_tree): leaf_nodes = find_leaf_nodes(accessibility_tree) filtered_nodes = filter_nodes(leaf_nodes) - linearized_accessibility_tree = "tag\ttext\tposition\tsize\n" + linearized_accessibility_tree = "tag\tname\ttext\tposition\tsize\n" # Linearize the accessibility tree nodes into a table format for node in filtered_nodes: linearized_accessibility_tree += node.tag + "\t" linearized_accessibility_tree += node.attrib.get('name') + "\t" + linearized_accessibility_tree += (node.text if '"' not in node.text else '"{:}"'.format(node.text.replace('"', '""'))) + "\t" linearized_accessibility_tree += node.attrib.get( '{uri:deskat:component.at-spi.gnome.org}screencoord') + "\t" linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') + "\n"