Merge remote-tracking branch 'upstream/feat/aws-provider-support'

2025-06-05 13:31:42 +00:00
parent 71578d994e 71e0f1dfdd
commit a6300e05c9
383 changed files with 3303 additions and 2070 deletions
--- a/monitor/.env
+++ b/monitor/.env
@@ -8,4 +8,4 @@ RESULTS_BASE_PATH=../results_operator_timeoutcheck3/pyautogui/screenshot/compute
 MAX_STEPS=150
 FLASK_PORT=80
 FLASK_HOST=0.0.0.0
-FLASK_DEBUG=true
+FLASK_DEBUG=false
--- a/monitor/README.md
+++ b/monitor/README.md
@@ -25,7 +25,7 @@ The monitor can be configured by editing the `.env` file in the monitor director
 | MAX_STEPS | Maximum steps to display for a task | 50 |
 | FLASK_PORT | Port for the web server | 80 |
 | FLASK_HOST | Host address for the web server | 0.0.0.0 |
-| FLASK_DEBUG | Enable debug mode (true/false) | true |
+| FLASK_DEBUG | Enable debug mode (true/false) | false |

 For example:
 ```bash
@@ -36,7 +36,7 @@ RESULTS_BASE_PATH=../results_operator_aws/pyautogui/screenshot/computer-use-prev
 MAX_STEPS=50
 FLASK_PORT=80
 FLASK_HOST=0.0.0.0
-FLASK_DEBUG=true
+FLASK_DEBUG=false
 ```

 ## Running with Docker
--- a/monitor/main.py
+++ b/monitor/main.py
@@ -12,6 +12,9 @@ from dotenv import load_dotenv
 # Load environment variables from .env file
 load_dotenv()

+# {task_type}_{task_id}: status_dict
+TASK_STATUS_CACHE = {}
+
 app = Flask(__name__)

 # Load configuration from environment variables
@@ -122,6 +125,8 @@ def get_task_status(task_type, task_id):
        status = "Error"
    elif log_data.get("exit_condition") and "message_exit: True" in log_data.get("exit_condition", ""):
        status = "Done (Message Exit)"
+    elif log_data.get("exit_condition") and "thought_exit: True" in log_data.get("exit_condition", ""):
+        status = "Done (Thought Exit)"
    elif len(steps) >= MAX_STEPS:
        status = "Done (Max Steps)"
    else:
@@ -151,6 +156,133 @@ def get_task_status(task_type, task_id):
        "result": result_content
    }

+def get_task_status_brief(task_type, task_id):
+    """
+    Get brief status info for a task, without detailed step data, for fast homepage loading.
+    """
+    # Generate cache key based on task type and ID
+    cache_key = f"{task_type}_{task_id}"
+    
+    #  Check if the status is already cached
+    if cache_key in TASK_STATUS_CACHE:
+        return TASK_STATUS_CACHE[cache_key]
+    
+    result_dir = os.path.join(RESULTS_BASE_PATH, task_type, task_id)
+    
+    if not os.path.exists(result_dir):
+        return {
+            "status": "Not Started",
+            "progress": 0,
+            "max_steps": MAX_STEPS,
+            "last_update": None
+        }
+    
+    traj_file = os.path.join(result_dir, "traj.jsonl")
+    log_file = os.path.join(result_dir, "runtime.log")
+    result_file = os.path.join(result_dir, "result.txt")
+    
+    if not os.path.exists(traj_file):
+        return {
+            "status": "Preparing",
+            "progress": 0,
+            "max_steps": MAX_STEPS,
+            "last_update": datetime.fromtimestamp(os.path.getmtime(result_dir)).strftime("%Y-%m-%d %H:%M:%S")
+        }
+    
+    # Get file line count and last line without reading the whole file
+    import subprocess
+    
+    # Use wc -l to get line count
+    try:
+        result = subprocess.run(['wc', '-l', traj_file], capture_output=True, text=True)
+        if result.returncode == 0:
+            step_count = int(result.stdout.strip().split()[0])
+        else:
+            step_count = 0
+    except:
+        step_count = 0
+    
+    # Use tail -n 1 to get last line
+    last_step_data = None
+    if step_count > 0:
+        try:
+            result = subprocess.run(['tail', '-n', '1', traj_file], capture_output=True, text=True)
+            if result.returncode == 0 and result.stdout.strip():
+                last_step_data = json.loads(result.stdout.strip())
+        except:
+            pass
+    
+    if step_count == 0:
+        return {
+            "status": "Initializing",
+            "progress": 0,
+            "max_steps": MAX_STEPS,
+            "last_update": datetime.fromtimestamp(os.path.getmtime(traj_file)).strftime("%Y-%m-%d %H:%M:%S")
+        }
+    
+    # Set default status to "Running"
+    status = "Running"
+    
+    # Determine status from last step data
+    if last_step_data:
+        if last_step_data.get("done", False):
+            status = "Done"
+        elif last_step_data.get("Error", False):
+            status = "Error"
+    
+    # If step count reaches max, consider as done
+    if step_count >= MAX_STEPS:
+        status = "Done (Max Steps)"
+    
+    # Quickly check exit condition in log file (only last few lines)
+    if os.path.exists(log_file) and status == "Running":
+        try:
+            # Use tail to read last 2 lines of log file
+            result = subprocess.run(['tail', '-n', '2', log_file], capture_output=True, text=True)
+            if result.returncode == 0:
+                log_tail = result.stdout
+                if "message_exit: True" in log_tail:
+                    status = "Done (Message Exit)"
+                elif "thought_exit: True" in log_tail:
+                    status = "Done (Thought Exit)"
+        except:
+            pass
+    
+    # If step count reaches max again (double check)
+    if step_count >= MAX_STEPS:
+        status = "Done (Max Steps)"
+    
+    # Get last update time
+    last_update = "None"
+    if last_step_data and "action_timestamp" in last_step_data:
+        try:
+            last_update = datetime.strptime(last_step_data["action_timestamp"], "%Y%m%d@%H%M%S").strftime("%Y-%m-%d %H:%M:%S")
+        except:
+            pass
+    
+    # Get result content if finished
+    result_content = None
+    if status.startswith("Done") and os.path.exists(result_file):
+        try:
+            with open(result_file, 'r') as f:
+                result_content = f.read().strip()
+        except:
+            result_content = "Result file not found"
+    
+    status_dict = {
+        "status": status,
+        "progress": step_count,
+        "max_steps": MAX_STEPS,
+        "last_update": last_update,
+        "result": result_content
+    }
+    
+    # Cache the status if it is done or error
+    if status.startswith("Done") or status == "Error":
+        TASK_STATUS_CACHE[cache_key] = status_dict
+    
+    return status_dict
+
 def get_all_tasks_status():
    task_list = load_task_list()
    result = {}
@@ -176,6 +308,34 @@ def get_all_tasks_status():
    
    return result

+def get_all_tasks_status_brief():
+    """
+    Get brief status info for all tasks, without detailed step data, for fast homepage loading.
+    """
+    task_list = load_task_list()
+    result = {}
+    
+    for task_type, task_ids in task_list.items():
+        result[task_type] = []
+        for task_id in task_ids:
+            task_info = get_task_info(task_type, task_id)
+            task_status = get_task_status_brief(task_type, task_id)
+            
+            if task_info:
+                result[task_type].append({
+                    "id": task_id,
+                    "instruction": task_info.get("instruction", "No instruction provided"),
+                    "status": task_status
+                })
+            else:
+                result[task_type].append({
+                    "id": task_id,
+                    "instruction": "No task info available",
+                    "status": task_status
+                })
+    
+    return result
+
@app.route('/')
 def index():
    return render_template("index.html")
@@ -199,6 +359,11 @@ def api_tasks():
    """Task status API"""
    return jsonify(get_all_tasks_status())

+@app.route('/api/tasks/brief')
+def api_tasks_brief():
+    """Return brief status info for all tasks, without detailed step data, for fast homepage loading."""
+    return jsonify(get_all_tasks_status_brief())
+
@app.route('/task/<task_type>/<task_id>/screenshot/<path:filename>')
 def task_screenshot(task_type, task_id, filename):
    """Get task screenshot"""
--- a/monitor/static/index.css
+++ b/monitor/static/index.css
@@ -66,6 +66,65 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
 .stat-card:nth-child(2):hover { background: linear-gradient(135deg, #e6f9ea, #d4f7db); }
 .stat-card:nth-child(3) i { color: #dc3545; } /* Error - Red */
 .stat-card:nth-child(3):hover { background: linear-gradient(135deg, #feeaec, #fcd8db); }
+.stat-card:nth-child(4) i { color: #007bff; } /* Total - Blue */
+.stat-card:nth-child(4):hover { background: linear-gradient(135deg, #f0f7ff, #e6f0fb); }
+
+/* Score Banner Styles */
+.score-banner {
+    border-radius: 10px;
+    margin: 20px 0 30px;
+    padding: 5px;
+    /* border: 2px solid rgba(255, 193, 7, 0.5); */
+    text-align: center;
+    position: relative;
+    overflow: hidden;
+    /* animation: scoreBannerGlow 3s infinite alternate; */
+}
+
+.score-banner:before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background: radial-gradient(circle at center, rgba(255, 255, 255, 0.8) 0%, transparent 70%);
+    pointer-events: none;
+}
+
+.score-content {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    position: relative;
+    z-index: 1;
+}
+
+.score-banner i {
+    font-size: 2.2em;
+    color: #ffc107;
+    margin-right: 15px;
+    /* animation: rotateIcon 6s linear infinite; */
+    transform-origin: center;
+}
+
+.score-label {
+    font-size: 1.3em;
+    font-weight: 600;
+    color: #b28704;
+    margin-right: 15px;
+}
+
+.score-value {
+    font-size: 2em;
+    font-weight: 700;
+    background: linear-gradient(90deg, #ff8f00, #ffc107);
+    -webkit-background-clip: text;
+    background-clip: text;
+    -webkit-text-fill-color: transparent;
+    text-shadow: 0 1px 2px rgba(0,0,0,0.05);
+}
+

 .stat-card span {
    font-size: 2em;
--- a/monitor/static/index.js
+++ b/monitor/static/index.js
@@ -1,7 +1,6 @@
-// filepath: /home/adlsdztony/codes/OSWorld/monitor/static/index.js
 document.addEventListener('DOMContentLoaded', () => {
    fetchTasks();
-    // 筛选功能绑定
+    // Bind filter functionality
    document.getElementById('total-tasks').parentElement.addEventListener('click', () => setTaskFilter('all'));
    document.getElementById('active-tasks').parentElement.addEventListener('click', () => setTaskFilter('active'));
    document.getElementById('completed-tasks').parentElement.addEventListener('click', () => setTaskFilter('completed'));
@@ -24,11 +23,128 @@ function refreshPage() {
    // Store in sessionStorage
    sessionStorage.setItem('expandedTaskTypes', JSON.stringify(expandedTaskTypes));
    
-    fetchTasks();
+    // Only fetch brief data for update to improve refresh speed
+    fetchTasksForRefresh();
+}
+
+function fetchTasksForRefresh() {
+    fetch('/api/tasks/brief')
+        .then(response => response.json())
+        .then(data => {
+            // Update stored data
+            allTaskData = data;
+            // Only update statistics and task status, do not fully re-render
+            updateStatistics(data);
+            updateTaskStatus(data);
+        })
+        .catch(error => console.error('Error refreshing tasks:', error));
+}
+
+// New function: only update task status, do not re-render the entire list
+function updateTaskStatus(data) {
+    // Add pulse animation to score banner when refreshing
+    const scoreBanner = document.querySelector('.score-banner');
+    if (scoreBanner) {
+        scoreBanner.classList.add('refreshing');
+        setTimeout(() => {
+            scoreBanner.classList.remove('refreshing');
+        }, 1000);
+    }
+    
+    // Update the status display of each task
+    Object.entries(data).forEach(([taskType, tasks]) => {
+        tasks.forEach(task => {
+            // Find the corresponding task card
+            const taskCard = document.querySelector(`.task-card[data-task-id="${task.id}"][data-task-type="${taskType}"]`);
+            if (!taskCard) return;
+            
+            // Update status display
+            const statusElement = taskCard.querySelector('.task-status');
+            if (statusElement) {
+                // Remove all status classes
+                statusElement.classList.remove('status-not-started', 'status-preparing', 'status-running', 'status-completed', 'status-error', 'status-unknown');
+                
+                // Set new status class and icon
+                let statusClass = '';
+                let statusIcon = '';
+                
+                switch(task.status.status) {
+                    case 'Not Started':
+                        statusClass = 'status-not-started';
+                        statusIcon = 'fa-hourglass-start';
+                        break;
+                    case 'Preparing':
+                    case 'Initializing':
+                        statusClass = 'status-preparing';
+                        statusIcon = 'fa-spinner fa-pulse';
+                        break;
+                    case 'Running':
+                        statusClass = 'status-running';
+                        statusIcon = 'fa-running';
+                        break;
+                    case 'Done':
+                    case 'Done (Message Exit)':
+                    case 'Done (Max Steps)':
+                    case 'Done (Thought Exit)':
+                        statusClass = 'status-completed';
+                        statusIcon = 'fa-check-circle';
+                        break;
+                    case 'Error':
+                        statusClass = 'status-error';
+                        statusIcon = 'fa-exclamation-circle';
+                        break;
+                    default:
+                        statusClass = 'status-unknown';
+                        statusIcon = 'fa-question-circle';
+                        break;
+                }
+                
+                statusElement.classList.add(statusClass);
+                statusElement.innerHTML = `<i class="fas ${statusIcon}"></i> ${task.status.status}`;
+            }
+            
+            // Update progress bar
+            if (task.status.progress > 0) {
+                const progressText = taskCard.querySelector('.task-details div:first-child');
+                if (progressText) {
+                    progressText.innerHTML = `<i class="fas fa-chart-line"></i> Progress: ${task.status.progress}/${task.status.max_steps} step(s)`;
+                }
+                
+                const progressFill = taskCard.querySelector('.progress-fill');
+                if (progressFill) {
+                    const percentage = (task.status.progress / task.status.max_steps) * 100;
+                    progressFill.style.width = `${percentage}%`;
+                }
+                
+                const progressPercentage = taskCard.querySelector('.progress-percentage');
+                if (progressPercentage) {
+                    const percentage = (task.status.progress / task.status.max_steps) * 100;
+                    progressPercentage.textContent = `${Math.round(percentage)}%`;
+                }
+            }
+            
+            // Update last update time
+            const timestamp = taskCard.querySelector('.timestamp');
+            if (timestamp && task.status.last_update) {
+                timestamp.innerHTML = `<i class="far fa-clock"></i> Last Update: ${task.status.last_update}`;
+            }
+            
+            // Update result info
+            if (task.status.result) {
+                let resultDiv = taskCard.querySelector('.task-result');
+                if (!resultDiv) {
+                    resultDiv = document.createElement('div');
+                    resultDiv.className = 'task-result';
+                    taskCard.querySelector('.task-details').appendChild(resultDiv);
+                }
+                resultDiv.innerHTML = `<strong><i class="fas fa-flag-checkered"></i> Result:</strong> ${task.status.result}`;
+            }
+        });
+    });
 }

 function fetchTasks() {
-    fetch('/api/tasks')
+    fetch('/api/tasks/brief')
        .then(response => response.json())
        .then(data => {
            allTaskData = data;
@@ -42,7 +158,7 @@ function setTaskFilter(filter) {
    currentFilter = filter;
    if (!allTaskData) return;
    renderTasks(allTaskData);
-    // 高亮选中卡片
+    // Highlight selected card
    document.querySelectorAll('.stat-card').forEach(card => card.classList.remove('selected'));
    if (filter === 'all') {
        document.getElementById('total-tasks').parentElement.classList.add('selected');
@@ -55,20 +171,32 @@ function setTaskFilter(filter) {
    }
 }

-// 更新统计信息
+// Update statistics info
 function updateStatistics(data) {
    let totalTasks = 0;
    let activeTasks = 0;
    let completedTasks = 0;
    let errorTasks = 0;
+    let totalScore = 0;
    
    Object.entries(data).forEach(([taskType, tasks]) => {
        totalTasks += tasks.length;
        tasks.forEach(task => {
            if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') {
                activeTasks++;
-            } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') {
+            } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)' || task.status.status === 'Done (Thought Exit)') {
                completedTasks++;
+                // Calculate score if task is completed
+                if (task.status.result) {
+                    try {
+                        const score = parseFloat(task.status.result);
+                        if (!isNaN(score) && score >= 0 && score <= 1) {
+                            totalScore += score;
+                        }
+                    } catch (e) {
+                        console.log(`Could not parse score for task: ${task.id}`);
+                    }
+                }
            } else if (task.status.status === 'Error') {
                errorTasks++;
            }
@@ -80,7 +208,16 @@ function updateStatistics(data) {
    document.getElementById('completed-tasks').textContent = completedTasks;
    document.getElementById('error-tasks').textContent = errorTasks;
    
-    // 高亮显示当前选中的统计卡片
+    // Update score display with formatted score
+    const scoreDisplay = document.getElementById('score-display');
+    if (completedTasks > 0) {
+        const scoreFormatted = totalScore.toFixed(2);
+        scoreDisplay.innerHTML = `<span>${scoreFormatted}</span> / <span>${completedTasks}</span>`;
+    } else {
+        scoreDisplay.innerHTML = '<span>0.00</span> / <span>0</span>';
+    }
+    
+    // Highlight the currently selected statistics card
    document.querySelectorAll('.stat-card').forEach(card => card.classList.remove('selected'));
    if (currentFilter === 'all') {
        document.getElementById('total-tasks').parentElement.classList.add('selected');
@@ -105,7 +242,7 @@ function renderTasks(data) {
            if (currentFilter === 'active') {
                filteredTasks = tasks.filter(task => ['Running', 'Preparing', 'Initializing'].includes(task.status.status));
            } else if (currentFilter === 'completed') {
-                filteredTasks = tasks.filter(task => task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)');
+                filteredTasks = tasks.filter(task => task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)'|| task.status.status === 'Done (Thought Exit)');
            } else if (currentFilter === 'error') {
                filteredTasks = tasks.filter(task => task.status.status === 'Error');
            }
@@ -128,7 +265,7 @@ function renderTasks(data) {
        tasks.forEach(task => {
            if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') {
                runningCount++;
-            } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') {
+            } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)' || task.status.status === 'Done (Thought Exit)') {
                completedCount++;
            } else if (task.status.status === 'Error') {
                errorCount++;
@@ -176,6 +313,9 @@ function renderTasks(data) {
            tasks.forEach(task => {
                const taskCard = document.createElement('div');
                taskCard.className = 'task-card';
+                // Add data attributes for later updates
+                taskCard.setAttribute('data-task-id', task.id);
+                taskCard.setAttribute('data-task-type', taskType);
                
                const taskHeader = document.createElement('div');
                taskHeader.className = 'task-header';
@@ -207,6 +347,7 @@ function renderTasks(data) {
                    case 'Done':
                    case 'Done (Message Exit)':
                    case 'Done (Max Steps)':
+                    case 'Done (Thought Exit)':
                        statusClass = 'status-completed';
                        statusIcon = 'fa-check-circle';
                        break;
--- a/monitor/static/task_detail.css
+++ b/monitor/static/task_detail.css
@@ -173,7 +173,7 @@ pre {
 .status-not-started { background: linear-gradient(135deg, #f0f0f0, #e6e6e6); color: #555; }
 .status-preparing, .status-initializing { background: linear-gradient(135deg, #fff7e0, #ffe8a3); color: #8a6d00; }
 .status-running { background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #0d47a1; }
-.status-done, .status-done-message-exit, .status-done-max-steps { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
+.status-done, .status-done-message-exit, .status-done-max-steps, .status-done-thought-exit { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
 .status-error { background: linear-gradient(135deg, #ffebee, #ffcdd2); color: #b71c1c; }

 .step-intent {
--- a/monitor/templates/index.html
+++ b/monitor/templates/index.html
@@ -14,6 +14,16 @@
 <body>
    <div class="main-container">
        <h1>OSWorld Monitor <span class="system-status online">System Online</span></h1>
+        
+        <!-- Score Display Banner -->
+        <div class="score-banner">
+            <div class="score-content">
+                <i class="fas fa-star"></i>
+                <span class="score-label">Score:</span>
+                <span id="score-display" class="score-value">Loading...</span>
+            </div>
+        </div>
+        
        <div class="dashboard-stats">
            <div class="stat-card">
                <i class="fas fa-running"></i>
--- a/monitor/templates/task_detail.html
+++ b/monitor/templates/task_detail.html
@@ -41,6 +41,11 @@
                        <i class="fas fa-question-circle"></i>
                        <span class="tooltip-text">Maximum steps reached, task completed</span>
                    </span>
+                    {% elif task_status.status == 'Done (Thought Exit)' %}
+                    <span class="tooltip">
+                        <i class="fas fa-question-circle"></i>
+                        <span class="tooltip-text">Task completed with a thought exit condition</span>
+                    </span>
                    {% endif %}
                </dd>
                <dt>Current Step</dt>