Files
sci-gui-agent-benchmark/mm_agents/autoglm_v/prompt/deduplicate_node.py
Yanxiao Zhao a4f8fe2f00 Add autoglm-os-9b-v (#344)
* update for autoglm-v

* Update run_autoglm.py

---------

Co-authored-by: hanyullai <hanyullai@outlook.com>
2025-09-24 19:43:28 +08:00

101 lines
3.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
def parse_line(line):
# 解析格式label Google Chrome (191, 13) (104, 17)
pattern = r"^(\S+)\s+(.+?)\s+\((\d+), (\d+)\)\s+\((\d+), (\d+)\)"
m = re.match(pattern, line)
if not m:
return None
node_type, text, cx, cy, w, h = m.groups()
cx, cy, w, h = map(int, (cx, cy, w, h))
# bounding box as (x1, y1, x2, y2)
x1 = cx - w // 2
y1 = cy - h // 2
x2 = x1 + w
y2 = y1 + h
return {
"type": node_type,
"text": text.strip(),
"bbox": (x1, y1, x2, y2),
"center": (cx, cy),
"size": (w, h),
"raw": line,
}
def iou(box1, box2):
# box: (x1, y1, x2, y2)
xi1 = max(box1[0], box2[0])
yi1 = max(box1[1], box2[1])
xi2 = min(box1[2], box2[2])
yi2 = min(box1[3], box2[3])
inter_width = max(0, xi2 - xi1)
inter_height = max(0, yi2 - yi1)
inter_area = inter_width * inter_height
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
union = area1 + area2 - inter_area
if union == 0:
return 0
return inter_area / union
def norm_text(s):
# 归一化文本:小写、去空格等
return re.sub(r"\s+", "", s.lower())
def text_similarity(a, b):
# 简单判定完全一致为1否则0
na, nb = norm_text(a), norm_text(b)
if na == nb:
return 1.0
else:
return 0
def filter_similar_nodes(linearized_accessibility_tree):
lines = [ln for ln in linearized_accessibility_tree.split("\n") if ln.strip()]
# parse all nodes
nodes = []
for ln in lines:
node = parse_line(ln)
if node:
nodes.append(node)
else:
# 解析不了的保留
nodes.append({"raw": ln, "invalid": True})
filtered = []
removed = [False] * len(nodes)
# 阈值可自行调整
IOU_THRESH = 0.2
TEXT_THRESH = 0.9
for i, ni in enumerate(nodes):
if ni.get("invalid"):
filtered.append(ni["raw"])
continue
if removed[i]:
continue
for j in range(i + 1, len(nodes)):
nj = nodes[j]
if nj.get("invalid"):
continue
iou_val = iou(ni["bbox"], nj["bbox"])
text_sim = text_similarity(ni["text"], nj["text"])
if iou_val > IOU_THRESH and text_sim > TEXT_THRESH:
# 二者极其相似,移除后者
removed[j] = True
# print(f"移除: {nj['raw']} (与 {ni['raw']} 相似度高)")
# 保留未被标记为移除的
if not removed[i]:
filtered.append(ni["raw"])
return "\n".join(filtered)
# 示例用法
if __name__ == "__main__":
linearized_accessibility_tree = "tag\ttext\tposition (center x & y)\tsize (w & h)\nicon\t\t(1853, 1001)\t(64, 64)\nlabel\tHome\t(1853, 1045)\t(40, 17)\nlabel\tActivities\t(49, 13)\t(63, 17)\ntext\tActivities\t(49, 13)\t(63, 17)\nlabel\tApr 171704\t(995, 13)\t(117, 27)\ntext\tApr 171704\t(995, 13)\t(87, 18)\nmenu\tSystem\t(1867, 13)\t(106, 27)\npush-button\tGoogle Chrome\t(35, 65)\t(70, 64)\npush-button\tThunderbird Mail\t(35, 133)\t(70, 64)\npush-button\tVisual Studio Code\t(35, 201)\t(70, 64)\npush-button\tVLC media player\t(35, 269)\t(70, 64)\npush-button\tLibreOffice Writer\t(35, 337)\t(70, 64)\npush-button\tLibreOffice Calc\t(35, 405)\t(70, 64)\npush-button\tLibreOffice Impress\t(35, 473)\t(70, 64)\npush-button\tGNU Image Manipulation Program\t(35, 541)\t(70, 64)\npush-button\tFiles\t(35, 609)\t(70, 64)\npush-button\tUbuntu Software\t(35, 677)\t(70, 64)\npush-button\tHelp\t(35, 745)\t(70, 64)\npush-button\tTrash\t(35, 816)\t(70, 64)\ntoggle-button\tShow Applications\t(35, 1045)\t(70, 70)"
result = filter_similar_nodes(linearized_accessibility_tree)
print(result)