feat: 增强任务步骤注入与a11y状态表达,提升树形交互稳定性

- 打通 metadata.steps 传递链路,将任务步骤注入 agent 预测上下文

- 优化 a11y tree 线性化输出:使用中心坐标并新增 states 列(expanded/collapsed/selected 等)

- 放宽可保留节点条件,保留无文本输入类控件(edit/textfield/searchbox 等)

- 强化输出约束:单轮仅允许动作代码或 WAIT/DONE/FAIL,禁止动作与 DONE 同轮返回

- 补充 avogadro 示例步骤:展开 aromatics 并选择 benzene.cjson
This commit is contained in:
2026-02-26 18:56:53 +08:00
parent 07e66490dd
commit b75f6bf341
6 changed files with 54 additions and 11 deletions

4
run.py
View File

@@ -198,8 +198,11 @@ def test(args: argparse.Namespace, test_all_meta: dict) -> None:
logger.info(f"[Example ID]: {example_id}")
instruction = example["instruction"]
metadata_steps = example.get("metadata", {}).get("steps", "")
logger.info(f"[Instruction]: {instruction}")
if metadata_steps:
logger.info(f"[Metadata Steps]: {metadata_steps}")
# wandb each example config settings
cfg_args["instruction"] = instruction
cfg_args["start_time"] = datetime.datetime.now().strftime(
@@ -227,6 +230,7 @@ def test(args: argparse.Namespace, test_all_meta: dict) -> None:
args,
example_result_dir,
scores,
metadata_steps=metadata_steps,
)
except Exception as e:
logger.error(f"Exception in {domain}/{example_id}: {e}")