feat: 增强任务步骤注入与a11y状态表达,提升树形交互稳定性
- 打通 metadata.steps 传递链路,将任务步骤注入 agent 预测上下文 - 优化 a11y tree 线性化输出:使用中心坐标并新增 states 列(expanded/collapsed/selected 等) - 放宽可保留节点条件,保留无文本输入类控件(edit/textfield/searchbox 等) - 强化输出约束:单轮仅允许动作代码或 WAIT/DONE/FAIL,禁止动作与 DONE 同轮返回 - 补充 avogadro 示例步骤:展开 aromatics 并选择 benzene.cjson
This commit is contained in:
@@ -114,6 +114,12 @@ def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:
|
||||
and (
|
||||
node.get("name", "") != "" or node.text is not None and len(node.text) > 0 \
|
||||
or check_image and node.get("image", "false") == "true"
|
||||
# Keep empty input fields (edit/textfield) - they are important interactive elements
|
||||
# even without name/text (e.g., search boxes, filter inputs)
|
||||
or node.tag.endswith("edit") or node.tag.endswith("textfield")
|
||||
or node.tag.endswith("textarea") or node.tag.endswith("textbox")
|
||||
or node.tag.endswith("searchbox") or node.tag.endswith("combobox")
|
||||
or node.tag in {"entry", "combo-box", "check-box", "slider"}
|
||||
)
|
||||
|
||||
coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(_component_ns), "(-1, -1)"))
|
||||
|
||||
Reference in New Issue
Block a user