From 1e9281a1ab43a6c0f28944c1906640e3dadcd8c9 Mon Sep 17 00:00:00 2001 From: cui0711 <1729461967@qq.com> Date: Thu, 5 Feb 2026 16:56:39 +0800 Subject: [PATCH] feat(cli): add eval_model argument --- run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/run.py b/run.py index e8844f6..5a35eef 100644 --- a/run.py +++ b/run.py @@ -85,7 +85,7 @@ def config() -> argparse.Namespace: parser.add_argument("--screen_width", type=int, default=1920) parser.add_argument("--screen_height", type=int, default=1080) parser.add_argument("--sleep_after_execution", type=float, default=0.0) - parser.add_argument("--max_steps", type=int, default=15) + parser.add_argument("--max_steps", type=int, default=8) parser.add_argument("--enable_recording", action="store_true", help="Enable video recording (disabled by default)") # agent config @@ -100,6 +100,7 @@ def config() -> argparse.Namespace: parser.add_argument("--top_p", type=float, default=0.9) parser.add_argument("--max_tokens", type=int, default=16384) parser.add_argument("--stop_token", type=str, default=None) + parser.add_argument("--eval_model", type=str, default="gpt-5.2-chat-latest") # example config parser.add_argument("--domain", type=str, default="all") @@ -161,6 +162,7 @@ def test(args: argparse.Namespace, test_all_meta: dict) -> None: os_type = "Windows", require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"], + eval_model=args.eval_model ) # get actual VM screen size after environment initialization