Initialize all baselines: screenshot, a11y tree, both, SoM, SeeAct

This commit is contained in:
Timothyxxx
2024-01-20 00:13:46 +08:00
parent 46bd3386dd
commit 09f3e776ae
14 changed files with 2588 additions and 1208 deletions

View File

@@ -41,10 +41,12 @@ def filter_nodes(nodes):
elif node.tag == 'text':
continue
else:
coords = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
coords = tuple(
map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
if coords[0] < 0 or coords[1] < 0:
continue
size = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
size = tuple(
map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
if size[0] <= 0 or size[1] <= 0:
continue
# Node is not a 'panel', add to the list.
@@ -57,6 +59,9 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
# Load the screenshot image
image = Image.open(image_file_path)
draw = ImageDraw.Draw(image)
marks = []
# todo: change the image tagger to align with SoM paper
# Optional: Load a font. If you don't specify a font, a default one will be used.
try:
@@ -95,8 +100,26 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
draw.text(text_position, str(index), font=font, fill="purple")
# each mark is an x, y, w, h tuple
marks.append([coords[0], coords[1], size[0], size[1]])
except ValueError as e:
pass
# Save the result
image.save(output_image_file_path)
return marks
def print_nodes_with_indent(nodes, indent=0):
for node in nodes:
print(' ' * indent, node.tag, node.attrib)
print_nodes_with_indent(node, indent + 2)
if __name__ == '__main__':
with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
xml_file_str = f.read()
nodes = ET.fromstring(xml_file_str)
print_nodes_with_indent(nodes)