feat: 增强任务步骤注入与a11y状态表达,提升树形交互稳定性
- 打通 metadata.steps 传递链路,将任务步骤注入 agent 预测上下文 - 优化 a11y tree 线性化输出:使用中心坐标并新增 states 列(expanded/collapsed/selected 等) - 放宽可保留节点条件,保留无文本输入类控件(edit/textfield/searchbox 等) - 强化输出约束:单轮仅允许动作代码或 WAIT/DONE/FAIL,禁止动作与 DONE 同轮返回 - 补充 avogadro 示例步骤:展开 aromatics 并选择 benzene.cjson
This commit is contained in:
4
run.py
4
run.py
@@ -198,8 +198,11 @@ def test(args: argparse.Namespace, test_all_meta: dict) -> None:
|
||||
logger.info(f"[Example ID]: {example_id}")
|
||||
|
||||
instruction = example["instruction"]
|
||||
metadata_steps = example.get("metadata", {}).get("steps", "")
|
||||
|
||||
logger.info(f"[Instruction]: {instruction}")
|
||||
if metadata_steps:
|
||||
logger.info(f"[Metadata Steps]: {metadata_steps}")
|
||||
# wandb each example config settings
|
||||
cfg_args["instruction"] = instruction
|
||||
cfg_args["start_time"] = datetime.datetime.now().strftime(
|
||||
@@ -227,6 +230,7 @@ def test(args: argparse.Namespace, test_all_meta: dict) -> None:
|
||||
args,
|
||||
example_result_dir,
|
||||
scores,
|
||||
metadata_steps=metadata_steps,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception in {domain}/{example_id}: {e}")
|
||||
|
||||
Reference in New Issue
Block a user