Add hosted GBOX agent for OSWorld evaluation (#376)

This commit is contained in:
Subash Shibu
2025-11-12 21:13:31 -08:00
committed by GitHub
parent 00b6468eb7
commit 3167339e45
5 changed files with 736 additions and 12 deletions

View File

@@ -10,12 +10,15 @@ logger = logging.getLogger("desktopenv.experiment")
def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
runtime_logger = setup_logger(example, example_result_dir)
try:
agent.reset(runtime_logger)
except Exception as e:
agent.reset()
# Reset environment first to get fresh VM IP
env.reset(task_config=example)
# Reset agent with fresh VM IP (for snapshot reverts)
try:
agent.reset(runtime_logger, vm_ip=env.vm_ip)
except Exception as e:
agent.reset(vm_ip=env.vm_ip)
time.sleep(60) # Wait for the environment to be ready
obs = env._get_obs() # Get the initial observation