* autoglm-os initialize * clean code * chore: use proxy for download setup * feat(autoglm-os): add parameter to toggle images * fix: use temporary directory for files pulled from the vm to prevent potential collision when running multiple instances of the same task in parallel * update * add client_password * update multienv * fix * fix prompt * fix prompt * fix prompt * fix sys prompt * feat: use proxy in file evaluator * fix client_password * fix note_prompt * fix autoglm agent cmd type * fix * revert: fix: use temporary directory for files pulled from the vm to prevent potential collision when running multiple instances of the same task in parallel reverts commit bab5473eea1de0e61b0e1d68b23ce324a5b0ee57 * feat(autoglm): setup tools * fix(autoglm): remove second time of get a11y tree * add osworld server restart * Revert "add osworld server restart" This reverts commit 7bd9d84122e246ce2a26de0e49c25494244c2b3d. * fix _launch_setup * fix autoglm agent tools & xml tree * fix desktop_env * fix bug for tool name capitalization * fix: always use proxy for setup download * add fail after exceeding max turns * fix(autoglm): avoid adding image to message when screenshot is empty * fix maximize_window * fix maximize_window * fix maximize_window * fix import browsertools module bug * fix task proxy config bug * restore setup * refactor desktop env * restore image in provider * restore file.py * refactor desktop_env * quick fix * refactor desktop_env.step * fix our env reset * add max truns constraint * clean run script * clean lib_run_single.py --------- Co-authored-by: hanyullai <hanyullai@outlook.com> Co-authored-by: JingBh <jingbohao@yeah.net>
101 lines
3.8 KiB
Python
101 lines
3.8 KiB
Python
import re
|
||
|
||
|
||
def parse_line(line):
|
||
# 解析格式,如:label Google Chrome (191, 13) (104, 17)
|
||
pattern = r"^(\S+)\s+(.+?)\s+\((\d+), (\d+)\)\s+\((\d+), (\d+)\)"
|
||
m = re.match(pattern, line)
|
||
if not m:
|
||
return None
|
||
node_type, text, cx, cy, w, h = m.groups()
|
||
cx, cy, w, h = map(int, (cx, cy, w, h))
|
||
# bounding box as (x1, y1, x2, y2)
|
||
x1 = cx - w // 2
|
||
y1 = cy - h // 2
|
||
x2 = x1 + w
|
||
y2 = y1 + h
|
||
return {
|
||
"type": node_type,
|
||
"text": text.strip(),
|
||
"bbox": (x1, y1, x2, y2),
|
||
"center": (cx, cy),
|
||
"size": (w, h),
|
||
"raw": line,
|
||
}
|
||
|
||
|
||
def iou(box1, box2):
|
||
# box: (x1, y1, x2, y2)
|
||
xi1 = max(box1[0], box2[0])
|
||
yi1 = max(box1[1], box2[1])
|
||
xi2 = min(box1[2], box2[2])
|
||
yi2 = min(box1[3], box2[3])
|
||
inter_width = max(0, xi2 - xi1)
|
||
inter_height = max(0, yi2 - yi1)
|
||
inter_area = inter_width * inter_height
|
||
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
||
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
|
||
union = area1 + area2 - inter_area
|
||
if union == 0:
|
||
return 0
|
||
return inter_area / union
|
||
|
||
|
||
def norm_text(s):
|
||
# 归一化文本:小写、去空格等
|
||
return re.sub(r"\s+", "", s.lower())
|
||
|
||
|
||
def text_similarity(a, b):
|
||
# 简单判定:完全一致为1,否则0
|
||
na, nb = norm_text(a), norm_text(b)
|
||
if na == nb:
|
||
return 1.0
|
||
else:
|
||
return 0
|
||
|
||
|
||
def filter_similar_nodes(linearized_accessibility_tree):
|
||
lines = [ln for ln in linearized_accessibility_tree.split("\n") if ln.strip()]
|
||
# parse all nodes
|
||
nodes = []
|
||
for ln in lines:
|
||
node = parse_line(ln)
|
||
if node:
|
||
nodes.append(node)
|
||
else:
|
||
# 解析不了的保留
|
||
nodes.append({"raw": ln, "invalid": True})
|
||
filtered = []
|
||
removed = [False] * len(nodes)
|
||
# 阈值可自行调整
|
||
IOU_THRESH = 0.2
|
||
TEXT_THRESH = 0.9
|
||
for i, ni in enumerate(nodes):
|
||
if ni.get("invalid"):
|
||
filtered.append(ni["raw"])
|
||
continue
|
||
if removed[i]:
|
||
continue
|
||
for j in range(i + 1, len(nodes)):
|
||
nj = nodes[j]
|
||
if nj.get("invalid"):
|
||
continue
|
||
iou_val = iou(ni["bbox"], nj["bbox"])
|
||
text_sim = text_similarity(ni["text"], nj["text"])
|
||
if iou_val > IOU_THRESH and text_sim > TEXT_THRESH:
|
||
# 二者极其相似,移除后者
|
||
removed[j] = True
|
||
# print(f"移除: {nj['raw']} (与 {ni['raw']} 相似度高)")
|
||
# 保留未被标记为移除的
|
||
if not removed[i]:
|
||
filtered.append(ni["raw"])
|
||
return "\n".join(filtered)
|
||
|
||
|
||
# 示例用法
|
||
if __name__ == "__main__":
|
||
linearized_accessibility_tree = "tag\ttext\tposition (center x & y)\tsize (w & h)\nicon\t\t(1853, 1001)\t(64, 64)\nlabel\tHome\t(1853, 1045)\t(40, 17)\nlabel\tActivities\t(49, 13)\t(63, 17)\ntext\tActivities\t(49, 13)\t(63, 17)\nlabel\tApr 17 17∶04\t(995, 13)\t(117, 27)\ntext\tApr 17 17∶04\t(995, 13)\t(87, 18)\nmenu\tSystem\t(1867, 13)\t(106, 27)\npush-button\tGoogle Chrome\t(35, 65)\t(70, 64)\npush-button\tThunderbird Mail\t(35, 133)\t(70, 64)\npush-button\tVisual Studio Code\t(35, 201)\t(70, 64)\npush-button\tVLC media player\t(35, 269)\t(70, 64)\npush-button\tLibreOffice Writer\t(35, 337)\t(70, 64)\npush-button\tLibreOffice Calc\t(35, 405)\t(70, 64)\npush-button\tLibreOffice Impress\t(35, 473)\t(70, 64)\npush-button\tGNU Image Manipulation Program\t(35, 541)\t(70, 64)\npush-button\tFiles\t(35, 609)\t(70, 64)\npush-button\tUbuntu Software\t(35, 677)\t(70, 64)\npush-button\tHelp\t(35, 745)\t(70, 64)\npush-button\tTrash\t(35, 816)\t(70, 64)\ntoggle-button\tShow Applications\t(35, 1045)\t(70, 70)"
|
||
result = filter_similar_nodes(linearized_accessibility_tree)
|
||
print(result)
|