FIx corner cases (val connection in chrome when using playwright, and action parsing for agent, and accessibility tree xml handling)

2024-01-16 22:00:01 +08:00
parent 186bf2e97c
commit 20b1d950a0
5 changed files with 51 additions and 30 deletions
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -3,8 +3,11 @@ import xml.etree.ElementTree as ET
 from PIL import Image, ImageDraw, ImageFont


-def find_leaf_nodes(xml_file_path):
-    root = ET.fromstring(xml_file_path)
+def find_leaf_nodes(xlm_file_str):
+    if not xlm_file_str:
+        return []
+
+    root = ET.fromstring(xlm_file_str)

    # Recursive function to traverse the XML tree and collect leaf nodes
    def collect_leaf_nodes(node, leaf_nodes):
@@ -97,19 +100,3 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):

    # Save the result
    image.save(output_image_file_path)
-
-
-if __name__ == '__main__':
-    with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
-        xml_string = f.read()
-    image_file_path = 'screenshot.png'  # Replace with your actual screenshot image path
-    output_image_file_path = 'annotated_screenshot.png'  # Replace with your desired output image path
-
-    leaf_nodes = find_leaf_nodes(xml_string)
-    filtered_nodes = filter_nodes(leaf_nodes)
-    print(f"Found {len(filtered_nodes)} filtered nodes")
-
-    for node in filtered_nodes:
-        print(node.tag, node.attrib)
-
-    draw_bounding_boxes(filtered_nodes, image_file_path, output_image_file_path)