import xml.etree.ElementTree as ET from PIL import Image, ImageDraw, ImageFont def find_leaf_nodes(xml_file_path): root = ET.fromstring(xml_file_path) # Recursive function to traverse the XML tree and collect leaf nodes def collect_leaf_nodes(node, leaf_nodes): # If the node has no children, it is a leaf node, add it to the list if not list(node): leaf_nodes.append(node) # If the node has children, recurse on each child for child in node: collect_leaf_nodes(child, leaf_nodes) # List to hold all leaf nodes leaf_nodes = [] collect_leaf_nodes(root, leaf_nodes) return leaf_nodes def filter_nodes(nodes): filtered_nodes = [] for node in nodes: if not node.get('{uri:deskat:state.at-spi.gnome.org}visible', None) == 'true': # Not visible continue # Check if the node is a 'panel' if node.tag == 'panel': # Check if the 'panel' represents an interactive element # or if it has certain attributes that are of interest. # Add your conditions here... if node.get('{uri:deskat:state.at-spi.gnome.org}focusable', 'false') == 'true': filtered_nodes.append(node) elif node.tag == 'text': continue else: coords = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', '))) if coords[0] < 0 or coords[1] < 0: continue size = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', '))) if size[0] <= 0 or size[1] <= 0: continue # Node is not a 'panel', add to the list. filtered_nodes.append(node) return filtered_nodes def draw_bounding_boxes(nodes, image_file_path, output_image_file_path): # Load the screenshot image image = Image.open(image_file_path) draw = ImageDraw.Draw(image) # Optional: Load a font. If you don't specify a font, a default one will be used. try: # Adjust the path to the font file you have or use a default one font = ImageFont.truetype("arial.ttf", 20) except IOError: # Fallback to a basic font if the specified font can't be loaded font = ImageFont.load_default() # Loop over all the visible nodes and draw their bounding boxes for index, _node in enumerate(nodes): coords_str = _node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord') size_str = _node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') if coords_str and size_str: try: # Parse the coordinates and size from the strings coords = tuple(map(int, coords_str.strip('()').split(', '))) size = tuple(map(int, size_str.strip('()').split(', '))) # Check for negative sizes if size[0] <= 0 or size[1] <= 0: raise ValueError(f"Size must be positive, got: {size}") # Calculate the bottom-right corner of the bounding box bottom_right = (coords[0] + size[0], coords[1] + size[1]) # Check that bottom_right > coords (x1 >= x0, y1 >= y0) if bottom_right[0] < coords[0] or bottom_right[1] < coords[1]: raise ValueError(f"Invalid coordinates or size, coords: {coords}, size: {size}") # Draw rectangle on image draw.rectangle([coords, bottom_right], outline="red", width=2) # Draw index number at the bottom left of the bounding box text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right draw.text(text_position, str(index), font=font, fill="purple") except ValueError as e: pass # Save the result image.save(output_image_file_path) if __name__ == '__main__': with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f: xml_string = f.read() image_file_path = 'screenshot.png' # Replace with your actual screenshot image path output_image_file_path = 'annotated_screenshot.png' # Replace with your desired output image path leaf_nodes = find_leaf_nodes(xml_string) filtered_nodes = filter_nodes(leaf_nodes) print(f"Found {len(filtered_nodes)} filtered nodes") for node in filtered_nodes: print(node.tag, node.attrib) draw_bounding_boxes(filtered_nodes, image_file_path, output_image_file_path)