feat: add flexible recording control and improve execution logging

2026-01-30 16:28:13 +08:00
parent 47bcfc0f0b
commit 716d82f4d1
4 changed files with 67 additions and 20 deletions
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -13,30 +13,44 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
    runtime_logger = setup_logger(example, example_result_dir)

    # Reset environment first to get fresh VM IP
-    env.reset(task_config=example)
+    # env.reset(task_config=example)
+    # logger.info("=======Environment reset completed=======")

-    # Reset agent with fresh VM IP (for snapshot reverts)
-    try:
-        agent.reset(runtime_logger, vm_ip=env.vm_ip)
-    except Exception as e:
-        agent.reset(vm_ip=env.vm_ip)
+    # # Reset agent with fresh VM IP (for snapshot reverts)
+    # try:
+    #     agent.reset(runtime_logger, vm_ip=env.vm_ip)
+    # except Exception as e:
+    #     agent.reset(vm_ip=env.vm_ip)
    
-    time.sleep(60) # Wait for the environment to be ready
+    # time.sleep(10) # Wait for the environment to be ready
+    
+    # get initial observation
+    logger.info("Getting initial observation...")
    obs = env._get_obs() # Get the initial observation
+    logger.info("Initial observation obtained.")
    done = False
    step_idx = 0
-    env.controller.start_recording()
+    if getattr(args, 'enable_recording', False):
+        env.controller.start_recording()
    while not done and step_idx < max_steps:
+        logger.info(f"Step {step_idx + 1} prediction...")
        response, actions = agent.predict(
            instruction,
            obs
        )
+        logger.info(f"Response: {response}")
+        logger.info(f"Actions: {actions}")
+        
+        logger.info(f"Executing actions...")
        for action in actions:
            # Capture the timestamp before executing the action
            action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S%f")
            logger.info("Step %d: %s", step_idx + 1, action)
+            
+            logger.info("执行动作中...")
            obs, reward, done, info = env.step(action, args.sleep_after_execution)
-
+            logger.info("动作执行完成。")
+            
            logger.info("Reward: %.2f", reward)
            logger.info("Done: %s", done)
            # Save screenshot and trajectory information
@@ -69,7 +83,8 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
    # Log task completion to results.json
    log_task_completion(example, result, example_result_dir, args)
    
-    env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+    if getattr(args, 'enable_recording', False):
+        env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))


 def setup_logger(example, example_result_dir):