116 lines
4.5 KiB
Python
116 lines
4.5 KiB
Python
import xml.etree.ElementTree as ET
|
|
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
|
|
def find_leaf_nodes(xml_file_path):
|
|
root = ET.fromstring(xml_file_path)
|
|
|
|
# Recursive function to traverse the XML tree and collect leaf nodes
|
|
def collect_leaf_nodes(node, leaf_nodes):
|
|
# If the node has no children, it is a leaf node, add it to the list
|
|
if not list(node):
|
|
leaf_nodes.append(node)
|
|
# If the node has children, recurse on each child
|
|
for child in node:
|
|
collect_leaf_nodes(child, leaf_nodes)
|
|
|
|
# List to hold all leaf nodes
|
|
leaf_nodes = []
|
|
collect_leaf_nodes(root, leaf_nodes)
|
|
return leaf_nodes
|
|
|
|
|
|
def filter_nodes(nodes):
|
|
filtered_nodes = []
|
|
|
|
for node in nodes:
|
|
if not node.get('{uri:deskat:state.at-spi.gnome.org}visible', None) == 'true':
|
|
# Not visible
|
|
continue
|
|
# Check if the node is a 'panel'
|
|
if node.tag == 'panel':
|
|
# Check if the 'panel' represents an interactive element
|
|
# or if it has certain attributes that are of interest.
|
|
# Add your conditions here...
|
|
if node.get('{uri:deskat:state.at-spi.gnome.org}focusable', 'false') == 'true':
|
|
filtered_nodes.append(node)
|
|
elif node.tag == 'text':
|
|
continue
|
|
else:
|
|
coords = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
|
|
if coords[0] < 0 or coords[1] < 0:
|
|
continue
|
|
size = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
|
|
if size[0] <= 0 or size[1] <= 0:
|
|
continue
|
|
# Node is not a 'panel', add to the list.
|
|
filtered_nodes.append(node)
|
|
|
|
return filtered_nodes
|
|
|
|
|
|
def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
|
|
# Load the screenshot image
|
|
image = Image.open(image_file_path)
|
|
draw = ImageDraw.Draw(image)
|
|
|
|
# Optional: Load a font. If you don't specify a font, a default one will be used.
|
|
try:
|
|
# Adjust the path to the font file you have or use a default one
|
|
font = ImageFont.truetype("arial.ttf", 20)
|
|
except IOError:
|
|
# Fallback to a basic font if the specified font can't be loaded
|
|
font = ImageFont.load_default()
|
|
|
|
# Loop over all the visible nodes and draw their bounding boxes
|
|
for index, _node in enumerate(nodes):
|
|
coords_str = _node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord')
|
|
size_str = _node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size')
|
|
|
|
if coords_str and size_str:
|
|
try:
|
|
# Parse the coordinates and size from the strings
|
|
coords = tuple(map(int, coords_str.strip('()').split(', ')))
|
|
size = tuple(map(int, size_str.strip('()').split(', ')))
|
|
|
|
# Check for negative sizes
|
|
if size[0] <= 0 or size[1] <= 0:
|
|
raise ValueError(f"Size must be positive, got: {size}")
|
|
|
|
# Calculate the bottom-right corner of the bounding box
|
|
bottom_right = (coords[0] + size[0], coords[1] + size[1])
|
|
|
|
# Check that bottom_right > coords (x1 >= x0, y1 >= y0)
|
|
if bottom_right[0] < coords[0] or bottom_right[1] < coords[1]:
|
|
raise ValueError(f"Invalid coordinates or size, coords: {coords}, size: {size}")
|
|
|
|
# Draw rectangle on image
|
|
draw.rectangle([coords, bottom_right], outline="red", width=2)
|
|
|
|
# Draw index number at the bottom left of the bounding box
|
|
text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
|
|
draw.text(text_position, str(index), font=font, fill="purple")
|
|
|
|
except ValueError as e:
|
|
pass
|
|
|
|
# Save the result
|
|
image.save(output_image_file_path)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
|
|
xml_string = f.read()
|
|
image_file_path = 'screenshot.png' # Replace with your actual screenshot image path
|
|
output_image_file_path = 'annotated_screenshot.png' # Replace with your desired output image path
|
|
|
|
leaf_nodes = find_leaf_nodes(xml_string)
|
|
filtered_nodes = filter_nodes(leaf_nodes)
|
|
print(f"Found {len(filtered_nodes)} filtered nodes")
|
|
|
|
for node in filtered_nodes:
|
|
print(node.tag, node.attrib)
|
|
|
|
draw_bounding_boxes(filtered_nodes, image_file_path, output_image_file_path)
|