Add ui agent (#343)

* add uipath agent * readme update
2025-09-24 14:42:46 +03:00
parent 088e68798c
commit f59cf00cae
14 changed files with 2167 additions and 1 deletions
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -326,4 +326,60 @@ def run_single_example_mano(agent, env, example, max_steps, instruction, args, e
    scores.append(result)
    with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
        f.write(f"{result}\n")
-    env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+    env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+    
+def run_single_example_uipath(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
+    runtime_logger = setup_logger(example, example_result_dir)
+    try:
+        agent.reset(runtime_logger)
+    except Exception as e:
+        agent.reset()
+
+    env.reset(task_config=example)
+
+    time.sleep(60) # Wait for the environment to be ready
+    obs = env._get_obs() # Get the initial observation
+    done = False
+    step_idx = 0
+    env.controller.start_recording()
+    while not done and step_idx < max_steps:
+        response, actions = agent.predict(
+            instruction,
+            obs,
+            args,
+            step_idx
+        )
+        for action in actions:
+            # Capture the timestamp before executing the action
+            action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+            logger.info("Step %d: %s", step_idx + 1, action)
+            obs, reward, done, info = env.step(action, args.sleep_after_execution)
+
+            logger.info("Reward: %.2f", reward)
+            logger.info("Done: %s", done)
+            # Save screenshot and trajectory information
+            with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"),
+                      "wb") as _f:
+                _f.write(obs['screenshot'])
+            with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+                f.write(json.dumps({
+                    "step_num": step_idx + 1,
+                    "action_timestamp": action_timestamp,
+                    "action": action,
+                    "response": response,
+                    "reward": reward,
+                    "done": done,
+                    "info": info,
+                    "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
+                }))
+                f.write("\n")
+            if done:
+                logger.info("The episode is done.")
+                break
+        step_idx += 1
+    result = env.evaluate()
+    logger.info("Result: %.2f", result)
+    scores.append(result)
+    with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
+        f.write(f"{result}\n")
+    env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))