feat&fix: update paths in configuration, enhance error handling, and improve UI elements

2025-06-01 04:48:50 +00:00
parent d1a001b2b7
commit cb62b3c877
13 changed files with 359 additions and 68 deletions
--- a/evaluation_examples/test_small.json
+++ b/evaluation_examples/test_small.json
@@ -29,33 +29,6 @@
    "46407397-a7d5-4c6b-92c6-dbe038b1457b",
    "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
    "510f64c8-9bcc-4be1-8d30-638705850618",
-    "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
-    "c867c42d-a52d-4a24-8ae3-f75d256b5618",
-    "74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
-    "b5062e3e-641c-4e3a-907b-ac864d2e7652",
-    "48d05431-6cd5-4e76-82eb-12b60d823f7d",
-    "eb303e01-261e-4972-8c07-c9b4e7a4922a",
-    "d1acdb87-bb67-4f30-84aa-990e56a09c92",
-    "deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
-    "8e116af7-7db7-4e35-a68b-b0939c066c78",
-    "716a6079-22da-47f1-ba73-c9d58f986a38",
-    "2373b66a-092d-44cb-bfd7-82e86e7a3b4d"
-  ],
-  "os": [
-    "5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
-    "5812b315-e7bd-4265-b51f-863c02174c28"
-  ],
-  "thunderbird": [
-    "dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
-    "15c3b339-88f7-4a86-ab16-e71c58dcb01e"
-  ],
-  "vlc": [
-    "59f21cfb-0120-4326-b255-a5b827b38967",
-    "8f080098-ddb1-424c-b438-4e96e5e4786e"
-  ],
-  "vs_code": [
-    "0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
-    "53ad5833-3455-407b-bbc6-45b4c79ab8fb",
-    "276cc624-87ea-4f08-ab93-f770e3790175"
+    "897e3b53-5d4d-444b-85cb-2cdc8a97d903"
  ]
 }
--- a/monitor/Dockerfile
+++ b/monitor/Dockerfile
@@ -1,14 +1,11 @@
 FROM python:3.9-slim

-WORKDIR /app
+WORKDIR /app/monitor

 # Install dependencies
 COPY monitor/requirements.txt ./
 RUN pip install --no-cache-dir -r requirements.txt

-# Copy application code
-COPY monitor/ ./
-
 # Expose port (will be overridden by environment variable)
 ARG FLASK_PORT=8080
 EXPOSE ${FLASK_PORT}
--- a/monitor/README.md
+++ b/monitor/README.md
@@ -28,11 +28,11 @@ The monitor can be configured by editing the `.env` file in the monitor director
 For example:
 ```bash
 # .env
-TASK_CONFIG_PATH=evaluation_examples/test_small.json
-EXAMPLES_BASE_PATH=evaluation_examples/examples
-RESULTS_BASE_PATH=results_operator_aws/pyautogui/screenshot/computer-use-preview
+TASK_CONFIG_PATH=../evaluation_examples/test_small.json
+EXAMPLES_BASE_PATH=../evaluation_examples/examples
+RESULTS_BASE_PATH=../results_operator_aws/pyautogui/screenshot/computer-use-preview
 MAX_STEPS=50
-FLASK_PORT=8080
+FLASK_PORT=80
 FLASK_HOST=0.0.0.0
 FLASK_DEBUG=true
 ```
--- a/monitor/docker-compose.yml
+++ b/monitor/docker-compose.yml
@@ -8,6 +8,7 @@ services:
    ports:
      - "${FLASK_PORT:-8080}:8080"
    volumes:
+      - .:/app/monitor
      - ../evaluation_examples:/app/evaluation_examples
      - ../results_operator_aws:/app/results_operator_aws
    env_file:
--- a/monitor/main.py
+++ b/monitor/main.py
@@ -15,9 +15,9 @@ load_dotenv()
 app = Flask(__name__)

 # Load configuration from environment variables
-TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "evaluation_examples/test_small.json")
-EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "evaluation_examples/examples")
-RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "results_operator_aws/pyautogui/screenshot/computer-use-preview")
+TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "../evaluation_examples/test_small.json")
+EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "../evaluation_examples/examples")
+RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "../results_operator_aws/pyautogui/screenshot/computer-use-preview")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))

 def load_task_list():
@@ -71,11 +71,59 @@ def get_task_status(task_type, task_id):
    
    last_step = steps[-1]
    
-    # check if the task is done
+    # Check the log file for agent responses and exit conditions
+    log_data = {
+        "agent_responses": [],
+        "exit_condition": None,
+        "last_message": None
+    }
+    
+    if os.path.exists(log_file):
+        try:
+            with open(log_file, 'r') as f:
+                log_content = f.readlines()
+                last_response = None
+                
+                for i, line in enumerate(log_content):
+                    # Extract agent responses for each step
+                    if "Responses: [" in line:
+                        response_text = line.split("Responses: [")[1].strip()
+                        if response_text.endswith("]"):
+                            response_text = response_text[:-1]  # Remove closing bracket
+                        
+                        # Clean up the response text - remove quotes
+                        if response_text.startswith("'") and response_text.endswith("'"):
+                            response_text = response_text[1:-1]  # Remove surrounding quotes
+                        elif response_text == '"]':  # Empty response
+                            response_text = ""
+                        
+                        # Handle list of responses
+                        if response_text and "', '" in response_text:
+                            responses = [r.strip("'") for r in response_text.split("', '")]
+                            log_data["agent_responses"].append(responses[0])  # Use first response
+                            last_response = responses[0]  # Keep track of the last response
+                        elif response_text:
+                            log_data["agent_responses"].append(response_text)
+                            last_response = response_text  # Keep track of the last response
+                    
+                    # Check for exit conditions near the end of the log
+                    if "The state of the agent is not correct" in line or "Exit condition met" in line:
+                        log_data["exit_condition"] = line.strip()
+                        # If this is a message exit, save the last response as the last message
+                        if "message_exit: True" in line and last_response:
+                            log_data["last_message"] = last_response
+        except Exception as e:
+            log_data["error"] = f"Error parsing log file: {str(e)}"
+    
+    # check if the task is done based on both trajectory and log
    if last_step.get("done", False):
        status = "Done"
    elif last_step.get("Error", False):
        status = "Error"
+    elif log_data.get("exit_condition") and "message_exit: True" in log_data.get("exit_condition", ""):
+        status = "Done (Message Exit)"
+    elif len(steps) >= MAX_STEPS:
+        status = "Done (Max Steps)"
    else:
        status = "Running"
    
@@ -86,7 +134,7 @@ def get_task_status(task_type, task_id):
        last_update = "None"
    
    result_content = "Task not completed"
-    if status == "Done":
+    if status.startswith("Done"):
        if os.path.exists(result_file):
            with open(result_file, 'r') as f:
                result_content = f.read().strip()
@@ -99,6 +147,7 @@ def get_task_status(task_type, task_id):
        "max_steps": MAX_STEPS,
        "last_update": last_update,
        "steps": steps,
+        "log_data": log_data,
        "result": result_content
    }

--- a/monitor/static/favicon.ico
+++ b/monitor/static/favicon.ico
--- a/monitor/static/favicon.png
+++ b/monitor/static/favicon.png
--- a/monitor/static/index.css
+++ b/monitor/static/index.css
@@ -56,6 +56,17 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
    margin-bottom: 10px;
    display: block;
 }
+
+/* Specific colors for different stat cards */
+.stat-card:nth-child(4) i { color: #007bff; } /* Total - Blue */
+.stat-card:nth-child(4):hover { background: linear-gradient(135deg, #f0f7ff, #e6f0fb); }
+.stat-card:nth-child(1) i { color: #17a2b8; } /* Active - Cyan */
+.stat-card:nth-child(1):hover { background: linear-gradient(135deg, #e3fafd, #d1f2f6); }
+.stat-card:nth-child(2) i { color: #28a745; } /* Completed - Green */
+.stat-card:nth-child(2):hover { background: linear-gradient(135deg, #e6f9ea, #d4f7db); }
+.stat-card:nth-child(3) i { color: #dc3545; } /* Error - Red */
+.stat-card:nth-child(3):hover { background: linear-gradient(135deg, #feeaec, #fcd8db); }
+
 .stat-card span {
    font-size: 2em;
    font-weight: 600;
@@ -162,11 +173,12 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
    padding: 20px;
    transition: all 0.4s cubic-bezier(.4,0,.2,1);
    opacity: 1;
-    max-height: 2000px;
+    max-height: none;
+    overflow-y: auto;
 }

 .task-type.collapsed .tasks-container {
-    max-height: 0;
+    max-height: 0 !important;
    opacity: 0;
    padding: 0;
    overflow: hidden;
@@ -187,6 +199,9 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
    position: relative;
    z-index: 2;
 }
+.task-card:last-child {
+    margin-bottom: 5px;
+}
 .task-card:hover { box-shadow: 0 10px 30px rgba(0,123,255,0.12); transform: translateY(-3px); }
 .task-header { display: flex; justify-content: space-between; margin-bottom: 14px; align-items: center; }
 .task-title { font-size: 1.2em; font-weight: 600; color: #1a237e; }
@@ -196,6 +211,8 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
 .status-running { background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #0d47a1; }
 .status-completed { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
 .status-error { background: linear-gradient(135deg, #ffebee, #ffcdd2); color: #b71c1c; }
+.status-unknown { background: linear-gradient(135deg, #e0e0e0, #bdbdbd); color: #424242; }
+.status-done-max-steps { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
 .task-details { margin-top: 16px; }
 .progress-bar { height: 12px; background-color: #eef2f7; border-radius: 6px; margin-top: 10px; overflow: hidden; box-shadow: inset 0 1px 3px rgba(0,0,0,0.1); }
 .progress-fill { height: 100%; background: linear-gradient(90deg, #007bff, #00c6ff); width: 0%; transition: width 0.6s ease; }
@@ -302,3 +319,22 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
    color: #0078d7;
 }

+/* Custom scrollbar for tasks container */
+.tasks-container::-webkit-scrollbar {
+    width: 8px;
+}
+
+.tasks-container::-webkit-scrollbar-track {
+    background: #f1f5f9;
+    border-radius: 4px;
+}
+
+.tasks-container::-webkit-scrollbar-thumb {
+    background: #c0d6e8;
+    border-radius: 4px;
+}
+
+.tasks-container::-webkit-scrollbar-thumb:hover {
+    background: #a5c7e5;
+}
+
--- a/monitor/static/index.js
+++ b/monitor/static/index.js
@@ -5,6 +5,7 @@ document.addEventListener('DOMContentLoaded', () => {
    document.getElementById('total-tasks').parentElement.addEventListener('click', () => setTaskFilter('all'));
    document.getElementById('active-tasks').parentElement.addEventListener('click', () => setTaskFilter('active'));
    document.getElementById('completed-tasks').parentElement.addEventListener('click', () => setTaskFilter('completed'));
+    document.getElementById('error-tasks').parentElement.addEventListener('click', () => setTaskFilter('error'));
 });

 let allTaskData = null;
@@ -49,6 +50,8 @@ function setTaskFilter(filter) {
        document.getElementById('active-tasks').parentElement.classList.add('selected');
    } else if (filter === 'completed') {
        document.getElementById('completed-tasks').parentElement.classList.add('selected');
+    } else if (filter === 'error') {
+        document.getElementById('error-tasks').parentElement.classList.add('selected');
    }
 }

@@ -57,14 +60,17 @@ function updateStatistics(data) {
    let totalTasks = 0;
    let activeTasks = 0;
    let completedTasks = 0;
+    let errorTasks = 0;
    
    Object.entries(data).forEach(([taskType, tasks]) => {
        totalTasks += tasks.length;
        tasks.forEach(task => {
            if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') {
                activeTasks++;
-            } else if (task.status.status === 'Done') {
+            } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') {
                completedTasks++;
+            } else if (task.status.status === 'Error') {
+                errorTasks++;
            }
        });
    });
@@ -72,6 +78,19 @@ function updateStatistics(data) {
    document.getElementById('total-tasks').textContent = totalTasks;
    document.getElementById('active-tasks').textContent = activeTasks;
    document.getElementById('completed-tasks').textContent = completedTasks;
+    document.getElementById('error-tasks').textContent = errorTasks;
+    
+    // 高亮显示当前选中的统计卡片
+    document.querySelectorAll('.stat-card').forEach(card => card.classList.remove('selected'));
+    if (currentFilter === 'all') {
+        document.getElementById('total-tasks').parentElement.classList.add('selected');
+    } else if (currentFilter === 'active') {
+        document.getElementById('active-tasks').parentElement.classList.add('selected');
+    } else if (currentFilter === 'completed') {
+        document.getElementById('completed-tasks').parentElement.classList.add('selected');
+    } else if (currentFilter === 'error') {
+        document.getElementById('error-tasks').parentElement.classList.add('selected');
+    }
 }

 function renderTasks(data) {
@@ -86,7 +105,9 @@ function renderTasks(data) {
            if (currentFilter === 'active') {
                filteredTasks = tasks.filter(task => ['Running', 'Preparing', 'Initializing'].includes(task.status.status));
            } else if (currentFilter === 'completed') {
-                filteredTasks = tasks.filter(task => task.status.status === 'Done');
+                filteredTasks = tasks.filter(task => task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)');
+            } else if (currentFilter === 'error') {
+                filteredTasks = tasks.filter(task => task.status.status === 'Error');
            }
            if (filteredTasks.length > 0) {
                filteredData[taskType] = filteredTasks;
@@ -107,7 +128,7 @@ function renderTasks(data) {
        tasks.forEach(task => {
            if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') {
                runningCount++;
-            } else if (task.status.status === 'Done') {
+            } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') {
                completedCount++;
            } else if (task.status.status === 'Error') {
                errorCount++;
@@ -146,6 +167,12 @@ function renderTasks(data) {
            noTasks.innerHTML = '<i class="fas fa-info-circle"></i> No Tasks Available';
            tasksContainer.appendChild(noTasks);
        } else {
+            // Add scrolling for large task lists
+            if (tasks.length > 10) {
+                tasksContainer.style.maxHeight = '600px';
+                tasksContainer.style.overflowY = 'auto';
+            }
+            
            tasks.forEach(task => {
                const taskCard = document.createElement('div');
                taskCard.className = 'task-card';
@@ -178,6 +205,8 @@ function renderTasks(data) {
                        statusIcon = 'fa-running';
                        break;
                    case 'Done':
+                    case 'Done (Message Exit)':
+                    case 'Done (Max Steps)':
                        statusClass = 'status-completed';
                        statusIcon = 'fa-check-circle';
                        break;
@@ -185,6 +214,10 @@ function renderTasks(data) {
                        statusClass = 'status-error';
                        statusIcon = 'fa-exclamation-circle';
                        break;
+                    default:
+                        statusClass = 'status-unknown';
+                        statusIcon = 'fa-question-circle';
+                        break;
                }
                
                taskStatus.classList.add(statusClass);
@@ -202,7 +235,7 @@ function renderTasks(data) {
                
                if (task.status.progress > 0) {
                    const progressText = document.createElement('div');
-                    progressText.innerHTML = `<i class="fas fa-chart-line"></i> Progress: ${task.status.progress} step(s)`;
+                    progressText.innerHTML = `<i class="fas fa-chart-line"></i> Progress: ${task.status.progress}/${task.status.max_steps} step(s)`;
                    taskProgress.appendChild(progressText);
                    
                    const progressBar = document.createElement('div');
--- a/monitor/static/task_detail.css
+++ b/monitor/static/task_detail.css
@@ -49,6 +49,11 @@ h2 { color: #0056b3; margin-top: 36px; font-size: 1.6em; font-weight: 600; }
 .step-card { 
    border: none; 
    background: #fafdff;
+    box-shadow: 0 4px 15px rgba(0,0,0,0.08); 
+    margin-bottom: 25px; 
+    border-radius: 10px; 
+    overflow: hidden; 
+    transition: all 0.3s;
    padding: 22px 26px; 
    margin-bottom: 24px; 
    border-radius: 10px; 
@@ -57,19 +62,29 @@ h2 { color: #0056b3; margin-top: 36px; font-size: 1.6em; font-weight: 600; }
    position: relative;
    overflow: hidden;
 }
+.step-intent {
+    padding: 10px 20px;
+    background: #f0f7ff;
+    border-left: 4px solid #4285f4;
+    margin: 10px 20px;
+    font-size: 0.95em;
+    line-height: 1.5;
+    color: #333;
+}
+.exit-condition {
+    background: #fff8e1;
+    padding: 8px 12px;
+    border-radius: 6px;
+    font-family: 'Courier New', monospace;
+    font-size: 0.9em;
+    border-left: 3px solid #ffa000;
+}
+
 .step-card:hover { 
    box-shadow: 0 10px 30px rgba(0,123,255,0.1); 
    transform: translateY(-3px);
 }
-.step-card:before {
-    content: '';
-    position: absolute;
-    left: 0;
-    top: 0;
-    height: 100%;
-    width: 4px;
-    background: linear-gradient(to bottom, #007bff, #00c6ff);
-}
+
 .step-header { display: flex; justify-content: space-between; margin-bottom: 12px; align-items: center; }
 .step-title { font-weight: 600; color: #1a237e; font-size: 1.1em; }
 .step-time { color: #6c757d; font-size: 0.92em; }
@@ -90,10 +105,7 @@ pre {
    box-shadow: 0 5px 15px rgba(0,0,0,0.08);
    transition: all 0.3s;
 }
-.step-image:hover {
-    transform: scale(1.01);
-    box-shadow: 0 8px 25px rgba(0,0,0,0.12);
-}
+
 .no-steps { 
    color: #8492a6; 
    font-style: italic; 
@@ -154,5 +166,142 @@ pre {
 .status-not-started { background: linear-gradient(135deg, #f0f0f0, #e6e6e6); color: #555; }
 .status-preparing, .status-initializing { background: linear-gradient(135deg, #fff7e0, #ffe8a3); color: #8a6d00; }
 .status-running { background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #0d47a1; }
-.status-done { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
+.status-done, .status-done-message-exit, .status-done-max-steps { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
 .status-error { background: linear-gradient(135deg, #ffebee, #ffcdd2); color: #b71c1c; }
+
+.step-intent {
+    padding: 10px 20px;
+    background: #f0f7ff;
+    border-left: 4px solid #4285f4;
+    margin: 10px 0;
+    font-size: 0.95em;
+    line-height: 1.5;
+    color: #333;
+}
+
+.exit-condition {
+    background: #fff8e1;
+    padding: 8px 12px;
+    border-radius: 6px;
+    font-family: 'Courier New', monospace;
+    font-size: 0.9em;
+    border-left: 3px solid #ffa000;
+    position: relative;
+}
+
+.exit-message {
+    background: #e8f5e9;
+    padding: 12px 16px;
+    border-radius: 6px;
+    font-family: 'Segoe UI', Arial, sans-serif;
+    font-size: 1em;
+    border-left: 3px solid #4caf50;
+    position: relative;
+    line-height: 1.5;
+    color: #1b5e20;
+    margin-top: 4px;
+    box-shadow: 0 2px 5px rgba(0,0,0,0.05);
+}
+
+.exit-condition-help {
+    margin-top: 8px;
+    font-family: 'Segoe UI', Arial, sans-serif;
+    font-size: 0.85em;
+    color: #666;
+    background: #f5f5f5;
+    padding: 6px 10px;
+    border-radius: 4px;
+    border-left: 2px solid #9e9e9e;
+}
+
+/* 工具提示样式 */
+.tooltip {
+    position: relative;
+    display: inline-block;
+    margin-left: 8px;
+    cursor: help;
+}
+
+.tooltip .tooltip-text {
+    visibility: hidden;
+    min-width: 200px;
+    max-width: 500px;
+    width: max-content;
+    background-color: #333;
+    color: #fff;
+    text-align: left;
+    border-radius: 6px;
+    padding: 10px 12px;
+    position: absolute;
+    z-index: 10;
+    bottom: 125%;
+    left: 50%;
+    transform: translateX(-50%);
+    opacity: 0;
+    transition: opacity 0.3s;
+    font-weight: normal;
+    font-size: 0.85em;
+    white-space: normal;
+    word-wrap: break-word;
+    line-height: 1.4;
+    box-shadow: 0 2px 10px rgba(0,0,0,0.2);
+}
+
+.tooltip .tooltip-text::after {
+    content: "";
+    position: absolute;
+    top: 100%;
+    left: 50%;
+    margin-left: -5px;
+    border-width: 5px;
+    border-style: solid;
+    border-color: #333 transparent transparent transparent;
+}
+
+.tooltip:hover .tooltip-text {
+    visibility: visible;
+    opacity: 1;
+}
+
+/* 移动设备上的工具提示调整 */
+@media (max-width: 768px) {
+    .tooltip .tooltip-text {
+        width: auto;
+        max-width: 250px;
+        left: auto;
+        right: 0;
+        transform: none;
+    }
+    
+    .tooltip .tooltip-text::after {
+        left: auto;
+        right: 10px;
+    }
+}
+
+/* 进度条样式 */
+.progress-bar {
+    height: 12px;
+    background-color: #eef2f7;
+    border-radius: 6px;
+    margin: 10px 0;
+    overflow: hidden;
+    box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
+    width: 100%;
+    max-width: 300px;
+}
+
+.progress-fill {
+    height: 100%;
+    background: linear-gradient(90deg, #007bff, #00c6ff);
+    width: 0%;
+    transition: width 0.6s ease;
+}
+
+.progress-percentage {
+    text-align: right;
+    font-size: 0.85em;
+    color: #6c757d;
+    margin-top: 4px;
+    font-weight: normal;
+}
--- a/monitor/templates/index.html
+++ b/monitor/templates/index.html
@@ -4,6 +4,9 @@
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>OSWorld Monitor</title>
+    <link rel="icon" href="/static/favicon.png" type="image/x-icon">
+    <link rel="shortcut icon" href="/static/favicon.png" type="image/x-icon">
+    <link rel="apple-touch-icon" href="/static/favicon.png">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">
    <link rel="stylesheet" href="/static/style.css">
    <link rel="stylesheet" href="/static/index.css">
@@ -12,11 +15,6 @@
    <div class="main-container">
        <h1>OSWorld Monitor <span class="system-status online">System Online</span></h1>
        <div class="dashboard-stats">
-            <div class="stat-card">
-                <i class="fas fa-tasks"></i>
-                <span id="total-tasks">Loading...</span>
-                <div class="stat-label">Total Tasks</div>
-            </div>
            <div class="stat-card">
                <i class="fas fa-running"></i>
                <span id="active-tasks">Loading...</span>
@@ -27,6 +25,16 @@
                <span id="completed-tasks">Loading...</span>
                <div class="stat-label">Completed</div>
            </div>
+            <div class="stat-card">
+                <i class="fas fa-exclamation-circle"></i>
+                <span id="error-tasks">Loading...</span>
+                <div class="stat-label">Error</div>
+            </div>
+            <div class="stat-card">
+                <i class="fas fa-tasks"></i>
+                <span id="total-tasks">Loading...</span>
+                <div class="stat-label">Total Tasks</div>
+            </div>
        </div>
        <div id="task-container">
            <div class="loading-spinner">
--- a/monitor/templates/task_detail.html
+++ b/monitor/templates/task_detail.html
@@ -4,6 +4,9 @@
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Task Detail: {{ task_id }}</title>
+    <link rel="icon" href="/static/favicon.png" type="image/x-icon">
+    <link rel="shortcut icon" href="/static/favicon.png" type="image/x-icon">
+    <link rel="apple-touch-icon" href="/static/favicon.png">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">
    <link rel="stylesheet" href="/static/task_detail.css">
 </head>
@@ -21,11 +24,41 @@
                <dt>Instruction</dt>
                <dd>{{ task_info.instruction }}</dd>
                <dt>Status</dt>
-                <dd class="status status-{{ task_status.status|lower|replace(' ', '-') }}">{{ task_status.status }}</dd>
+                <dd class="status status-{{ task_status.status|lower|replace(' ', '-')|replace('(', '')|replace(')', '') }}">
+                    {{ task_status.status }}
+                    {% if task_status.status == 'Error' %}
+                    <span class="tooltip">
+                        <i class="fas fa-question-circle"></i>
+                        <span class="tooltip-text">Error occurred during task execution</span>
+                    </span>
+                    {% elif task_status.status == 'Done (Message Exit)' %}
+                    <span class="tooltip">
+                        <i class="fas fa-question-circle"></i>
+                        <span class="tooltip-text">Task completed with a message exit condition</span>
+                    </span>
+                    {% elif task_status.status == 'Done (Max Steps)' %}
+                    <span class="tooltip">
+                        <i class="fas fa-question-circle"></i>
+                        <span class="tooltip-text">Maximum steps reached, task completed</span>
+                    </span>
+                    {% endif %}
+                </dd>
                <dt>Current Step</dt>
                <dd>{{ task_status.progress }}</dd>
                <dt>Last Update</dt>
                <dd>{{ task_status.last_update or 'None' }}</dd>
+                {% if task_status.log_data and task_status.log_data.exit_condition %}
+                <dt>Exit Condition</dt>
+                <dd class="exit-condition">
+                    {{ task_status.log_data.exit_condition }}
+                </dd>
+                {% endif %}
+                {% if task_status.status == 'Done (Message Exit)' and task_status.log_data and task_status.log_data.last_message %}
+                <dt>Exit Message</dt>
+                <dd class="exit-message">
+                    {{ task_status.log_data.last_message }}
+                </dd>
+                {% endif %}
                <dt>Result</dt>
                <dd>{{ task_status.result }}</dd>
            </dl>
@@ -40,7 +73,15 @@
                                <div class="step-title"><i class="fas fa-check-circle"></i> Step {{ step.step_num }}</div>
                                <div class="step-time"><i class="far fa-clock"></i> {{ step.action_timestamp }}</div>
                            </div>
-                            <pre>{{ step.action.action }}</pre>
+                            {% if task_status.log_data and task_status.log_data.agent_responses and loop.index0 < task_status.log_data.agent_responses|length %}
+                                <div class="step-intent">
+                                    <i class="fas fa-comment"></i> <strong>Agent Intent:</strong> {{ task_status.log_data.agent_responses[loop.index0] }}
+                                </div>
+                            {% endif %}
+                            <pre>{% if step.action and step.action.action %}{{ step.action.action }}
+                            {% elif step.Error %}Error: {{ step.Error }}
+                            {% else %}{{ step|tojson }}
+                            {% endif %}</pre>
                            {% if step.screenshot_file %}
                                <div>
                                    <img src="/task/{{ task_type }}/{{ task_id }}/screenshot/{{ step.screenshot_file }}" 
--- a/run_multienv_openaicua.py
+++ b/run_multienv_openaicua.py
@@ -18,6 +18,10 @@ from mm_agents.openai_cua_agent import OpenAICUAAgent

 # import wandb

+# load the environment variables from .env file
+if os.path.exists(".env"):
+    from dotenv import load_dotenv
+    load_dotenv()

 #  Logger Configs {{{ #
 logger = logging.getLogger()