OpenCUA-72B (#354)

* use aws pub ip

* os task fix: set the default dim screen time to be 300s

* OpenCUA-72B

* update password

* update

* update

* update opencua72b agent

* change provider ip

---------

Co-authored-by: Jiaqi <dengjiaqi@moonshot.cn>
This commit is contained in:
Xinyuan Wang
2025-10-13 10:39:33 +08:00
committed by GitHub
parent ddb8372a6c
commit f9e9273b3b
7 changed files with 1345 additions and 761 deletions

View File

@@ -55,6 +55,7 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
logger.info("The episode is done.")
break
step_idx += 1
time.sleep(20) # Wait for the environment to settle
result = env.evaluate()
logger.info("Result: %.2f", result)
scores.append(result)
@@ -186,23 +187,25 @@ def run_single_example_opencua(agent, env, example, max_steps, instruction, args
"wb") as _f:
_f.write(obs['screenshot'])
with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
with open(os.path.join(example_result_dir, "traj.jsonl"), "a", encoding="utf-8") as f:
f.write(json.dumps({
"step_num": step_idx + 1,
"action_timestamp": action_timestamp,
"action": action,
"natural_language_action": info_dict.get("action"),
"action_timestamp": action_timestamp,
"response": response,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
}))
}, ensure_ascii=False))
f.write("\n")
if done:
logger.info("The episode is done.")
break
step_idx += 1
time.sleep(20) # Wait for the environment to settle
result = env.evaluate()
logger.info("Result: %.2f", result)
scores.append(result)