feat&fix: update paths in configuration, enhance error handling, and improve UI elements

2025-06-01 04:48:50 +00:00
parent d1a001b2b7
commit cb62b3c877
13 changed files with 359 additions and 68 deletions
--- a/monitor/main.py
+++ b/monitor/main.py
@@ -15,9 +15,9 @@ load_dotenv()
 app = Flask(__name__)

 # Load configuration from environment variables
-TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "evaluation_examples/test_small.json")
-EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "evaluation_examples/examples")
-RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "results_operator_aws/pyautogui/screenshot/computer-use-preview")
+TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "../evaluation_examples/test_small.json")
+EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "../evaluation_examples/examples")
+RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "../results_operator_aws/pyautogui/screenshot/computer-use-preview")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))

 def load_task_list():
@@ -71,11 +71,59 @@ def get_task_status(task_type, task_id):
    
    last_step = steps[-1]
    
-    # check if the task is done
+    # Check the log file for agent responses and exit conditions
+    log_data = {
+        "agent_responses": [],
+        "exit_condition": None,
+        "last_message": None
+    }
+    
+    if os.path.exists(log_file):
+        try:
+            with open(log_file, 'r') as f:
+                log_content = f.readlines()
+                last_response = None
+                
+                for i, line in enumerate(log_content):
+                    # Extract agent responses for each step
+                    if "Responses: [" in line:
+                        response_text = line.split("Responses: [")[1].strip()
+                        if response_text.endswith("]"):
+                            response_text = response_text[:-1]  # Remove closing bracket
+                        
+                        # Clean up the response text - remove quotes
+                        if response_text.startswith("'") and response_text.endswith("'"):
+                            response_text = response_text[1:-1]  # Remove surrounding quotes
+                        elif response_text == '"]':  # Empty response
+                            response_text = ""
+                        
+                        # Handle list of responses
+                        if response_text and "', '" in response_text:
+                            responses = [r.strip("'") for r in response_text.split("', '")]
+                            log_data["agent_responses"].append(responses[0])  # Use first response
+                            last_response = responses[0]  # Keep track of the last response
+                        elif response_text:
+                            log_data["agent_responses"].append(response_text)
+                            last_response = response_text  # Keep track of the last response
+                    
+                    # Check for exit conditions near the end of the log
+                    if "The state of the agent is not correct" in line or "Exit condition met" in line:
+                        log_data["exit_condition"] = line.strip()
+                        # If this is a message exit, save the last response as the last message
+                        if "message_exit: True" in line and last_response:
+                            log_data["last_message"] = last_response
+        except Exception as e:
+            log_data["error"] = f"Error parsing log file: {str(e)}"
+    
+    # check if the task is done based on both trajectory and log
    if last_step.get("done", False):
        status = "Done"
    elif last_step.get("Error", False):
        status = "Error"
+    elif log_data.get("exit_condition") and "message_exit: True" in log_data.get("exit_condition", ""):
+        status = "Done (Message Exit)"
+    elif len(steps) >= MAX_STEPS:
+        status = "Done (Max Steps)"
    else:
        status = "Running"
    
@@ -86,7 +134,7 @@ def get_task_status(task_type, task_id):
        last_update = "None"
    
    result_content = "Task not completed"
-    if status == "Done":
+    if status.startswith("Done"):
        if os.path.exists(result_file):
            with open(result_file, 'r') as f:
                result_content = f.read().strip()
@@ -99,6 +147,7 @@ def get_task_status(task_type, task_id):
        "max_steps": MAX_STEPS,
        "last_update": last_update,
        "steps": steps,
+        "log_data": log_data,
        "result": result_content
    }