feat: 增强任务步骤注入与a11y状态表达,提升树形交互稳定性

- 打通 metadata.steps 传递链路,将任务步骤注入 agent 预测上下文

- 优化 a11y tree 线性化输出:使用中心坐标并新增 states 列(expanded/collapsed/selected 等)

- 放宽可保留节点条件,保留无文本输入类控件(edit/textfield/searchbox 等)

- 强化输出约束:单轮仅允许动作代码或 WAIT/DONE/FAIL,禁止动作与 DONE 同轮返回

- 补充 avogadro 示例步骤:展开 aromatics 并选择 benzene.cjson
This commit is contained in:
2026-02-26 18:56:53 +08:00
parent 07e66490dd
commit b75f6bf341
6 changed files with 54 additions and 11 deletions

View File

@@ -9,7 +9,7 @@ from lib_results_logger import log_task_completion
logger = logging.getLogger("desktopenv.experiment")
def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores, metadata_steps=""):
runtime_logger = setup_logger(example, example_result_dir)
# Reset environment first to get fresh VM IP
@@ -36,7 +36,8 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
logger.info(f"Step {step_idx + 1} prediction...")
response, actions = agent.predict(
instruction,
obs
obs,
metadata_steps=metadata_steps,
)
logger.info(f"Response: {response}")
logger.info(f"Actions: {actions}")