OpenCUA-72B (#354)

* use aws pub ip * os task fix: set the default dim screen time to be 300s * OpenCUA-72B * update password * update * update * update opencua72b agent * change provider ip --------- Co-authored-by: Jiaqi <dengjiaqi@moonshot.cn>
2025-10-13 10:39:33 +08:00
parent ddb8372a6c
commit f9e9273b3b
7 changed files with 1345 additions and 761 deletions
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -55,6 +55,7 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
                logger.info("The episode is done.")
                break
        step_idx += 1
+    time.sleep(20) # Wait for the environment to settle
    result = env.evaluate()
    logger.info("Result: %.2f", result)
    scores.append(result)
@@ -186,23 +187,25 @@ def run_single_example_opencua(agent, env, example, max_steps, instruction, args
                      "wb") as _f:
                _f.write(obs['screenshot'])

-            with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+            with open(os.path.join(example_result_dir, "traj.jsonl"), "a", encoding="utf-8") as f:
                f.write(json.dumps({
                    "step_num": step_idx + 1,
-                    "action_timestamp": action_timestamp,
                    "action": action,
+                    "natural_language_action": info_dict.get("action"),
+                    "action_timestamp": action_timestamp,
                    "response": response,
                    "reward": reward,
                    "done": done,
                    "info": info,
                    "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
-                }))
+                }, ensure_ascii=False))
                f.write("\n")
            if done:
                logger.info("The episode is done.")
                break
        step_idx += 1

+    time.sleep(20) # Wait for the environment to settle
    result = env.evaluate()
    logger.info("Result: %.2f", result)
    scores.append(result)