Initialize all baselines: screenshot, a11y tree, both, SoM, SeeAct
This commit is contained in:
@@ -41,10 +41,12 @@ def filter_nodes(nodes):
|
||||
elif node.tag == 'text':
|
||||
continue
|
||||
else:
|
||||
coords = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
|
||||
coords = tuple(
|
||||
map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
|
||||
if coords[0] < 0 or coords[1] < 0:
|
||||
continue
|
||||
size = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
|
||||
size = tuple(
|
||||
map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
|
||||
if size[0] <= 0 or size[1] <= 0:
|
||||
continue
|
||||
# Node is not a 'panel', add to the list.
|
||||
@@ -57,6 +59,9 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
|
||||
# Load the screenshot image
|
||||
image = Image.open(image_file_path)
|
||||
draw = ImageDraw.Draw(image)
|
||||
marks = []
|
||||
|
||||
# todo: change the image tagger to align with SoM paper
|
||||
|
||||
# Optional: Load a font. If you don't specify a font, a default one will be used.
|
||||
try:
|
||||
@@ -95,8 +100,26 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
|
||||
text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
|
||||
draw.text(text_position, str(index), font=font, fill="purple")
|
||||
|
||||
# each mark is an x, y, w, h tuple
|
||||
marks.append([coords[0], coords[1], size[0], size[1]])
|
||||
|
||||
except ValueError as e:
|
||||
pass
|
||||
|
||||
# Save the result
|
||||
image.save(output_image_file_path)
|
||||
return marks
|
||||
|
||||
|
||||
def print_nodes_with_indent(nodes, indent=0):
|
||||
for node in nodes:
|
||||
print(' ' * indent, node.tag, node.attrib)
|
||||
print_nodes_with_indent(node, indent + 2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
|
||||
xml_file_str = f.read()
|
||||
|
||||
nodes = ET.fromstring(xml_file_str)
|
||||
print_nodes_with_indent(nodes)
|
||||
|
||||
Reference in New Issue
Block a user