From 63484c7b7b07a0460d3ae7cd3d1f0bc6e570f272 Mon Sep 17 00:00:00 2001 From: cui0711 <1729461967@qq.com> Date: Thu, 5 Feb 2026 16:55:49 +0800 Subject: [PATCH] fix(runner): pass result_dir to evaluate and re-enable environment reset --- lib_run_single.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/lib_run_single.py b/lib_run_single.py index 2262ea5..591e39e 100644 --- a/lib_run_single.py +++ b/lib_run_single.py @@ -13,17 +13,17 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl runtime_logger = setup_logger(example, example_result_dir) # Reset environment first to get fresh VM IP - # env.reset(task_config=example) - # logger.info("=======Environment reset completed=======") + env.reset(task_config=example) + logger.info("=======Environment reset completed=======") # # Reset agent with fresh VM IP (for snapshot reverts) # try: # agent.reset(runtime_logger, vm_ip=env.vm_ip) # except Exception as e: # agent.reset(vm_ip=env.vm_ip) - + # time.sleep(10) # Wait for the environment to be ready - + # get initial observation logger.info("Getting initial observation...") obs = env._get_obs() # Get the initial observation @@ -74,8 +74,7 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl break step_idx += 1 time.sleep(20) # Wait for the environment to settle - result = env.evaluate() - logger.info("Result: %.2f", result) + result = env.evaluate(result_dir=example_result_dir) scores.append(result) with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: f.write(f"{result}\n") @@ -112,7 +111,7 @@ def run_single_example_human(env, example, max_steps, instruction, args, example f.write("\n") # Evaluate the result - result = env.evaluate() + result = env.evaluate(result_dir=example_result_dir) logger.info("Result: %.2f", result) scores.append(result) with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: @@ -549,7 +548,7 @@ def run_single_example_os_symphony(agent, env, example, max_steps, instruction, break step_idx += 1 end_time = time.time() - result = float(env.evaluate()) + result = float(env.evaluate(result_dir=example_result_dir)) logger.info("Result: %.2f", result) scores.append(result) with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: @@ -647,7 +646,7 @@ def run_single_example_evocua(agent, env, example, max_steps, instruction, args, step_idx += 1 time.sleep(20) # Wait for environment to settle - result = env.evaluate() + result = env.evaluate(result_dir=example_result_dir) logger.info("Result: %.2f", result) scores.append(result)