Merge remote-tracking branch 'upstream/feat/aws-provider-support'

This commit is contained in:
yuanmengqi
2025-06-05 13:31:42 +00:00
383 changed files with 3303 additions and 2070 deletions

View File

@@ -8,4 +8,4 @@ RESULTS_BASE_PATH=../results_operator_timeoutcheck3/pyautogui/screenshot/compute
MAX_STEPS=150
FLASK_PORT=80
FLASK_HOST=0.0.0.0
FLASK_DEBUG=true
FLASK_DEBUG=false

View File

@@ -25,7 +25,7 @@ The monitor can be configured by editing the `.env` file in the monitor director
| MAX_STEPS | Maximum steps to display for a task | 50 |
| FLASK_PORT | Port for the web server | 80 |
| FLASK_HOST | Host address for the web server | 0.0.0.0 |
| FLASK_DEBUG | Enable debug mode (true/false) | true |
| FLASK_DEBUG | Enable debug mode (true/false) | false |
For example:
```bash
@@ -36,7 +36,7 @@ RESULTS_BASE_PATH=../results_operator_aws/pyautogui/screenshot/computer-use-prev
MAX_STEPS=50
FLASK_PORT=80
FLASK_HOST=0.0.0.0
FLASK_DEBUG=true
FLASK_DEBUG=false
```
## Running with Docker

View File

@@ -12,6 +12,9 @@ from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# {task_type}_{task_id}: status_dict
TASK_STATUS_CACHE = {}
app = Flask(__name__)
# Load configuration from environment variables
@@ -122,6 +125,8 @@ def get_task_status(task_type, task_id):
status = "Error"
elif log_data.get("exit_condition") and "message_exit: True" in log_data.get("exit_condition", ""):
status = "Done (Message Exit)"
elif log_data.get("exit_condition") and "thought_exit: True" in log_data.get("exit_condition", ""):
status = "Done (Thought Exit)"
elif len(steps) >= MAX_STEPS:
status = "Done (Max Steps)"
else:
@@ -151,6 +156,133 @@ def get_task_status(task_type, task_id):
"result": result_content
}
def get_task_status_brief(task_type, task_id):
"""
Get brief status info for a task, without detailed step data, for fast homepage loading.
"""
# Generate cache key based on task type and ID
cache_key = f"{task_type}_{task_id}"
# Check if the status is already cached
if cache_key in TASK_STATUS_CACHE:
return TASK_STATUS_CACHE[cache_key]
result_dir = os.path.join(RESULTS_BASE_PATH, task_type, task_id)
if not os.path.exists(result_dir):
return {
"status": "Not Started",
"progress": 0,
"max_steps": MAX_STEPS,
"last_update": None
}
traj_file = os.path.join(result_dir, "traj.jsonl")
log_file = os.path.join(result_dir, "runtime.log")
result_file = os.path.join(result_dir, "result.txt")
if not os.path.exists(traj_file):
return {
"status": "Preparing",
"progress": 0,
"max_steps": MAX_STEPS,
"last_update": datetime.fromtimestamp(os.path.getmtime(result_dir)).strftime("%Y-%m-%d %H:%M:%S")
}
# Get file line count and last line without reading the whole file
import subprocess
# Use wc -l to get line count
try:
result = subprocess.run(['wc', '-l', traj_file], capture_output=True, text=True)
if result.returncode == 0:
step_count = int(result.stdout.strip().split()[0])
else:
step_count = 0
except:
step_count = 0
# Use tail -n 1 to get last line
last_step_data = None
if step_count > 0:
try:
result = subprocess.run(['tail', '-n', '1', traj_file], capture_output=True, text=True)
if result.returncode == 0 and result.stdout.strip():
last_step_data = json.loads(result.stdout.strip())
except:
pass
if step_count == 0:
return {
"status": "Initializing",
"progress": 0,
"max_steps": MAX_STEPS,
"last_update": datetime.fromtimestamp(os.path.getmtime(traj_file)).strftime("%Y-%m-%d %H:%M:%S")
}
# Set default status to "Running"
status = "Running"
# Determine status from last step data
if last_step_data:
if last_step_data.get("done", False):
status = "Done"
elif last_step_data.get("Error", False):
status = "Error"
# If step count reaches max, consider as done
if step_count >= MAX_STEPS:
status = "Done (Max Steps)"
# Quickly check exit condition in log file (only last few lines)
if os.path.exists(log_file) and status == "Running":
try:
# Use tail to read last 2 lines of log file
result = subprocess.run(['tail', '-n', '2', log_file], capture_output=True, text=True)
if result.returncode == 0:
log_tail = result.stdout
if "message_exit: True" in log_tail:
status = "Done (Message Exit)"
elif "thought_exit: True" in log_tail:
status = "Done (Thought Exit)"
except:
pass
# If step count reaches max again (double check)
if step_count >= MAX_STEPS:
status = "Done (Max Steps)"
# Get last update time
last_update = "None"
if last_step_data and "action_timestamp" in last_step_data:
try:
last_update = datetime.strptime(last_step_data["action_timestamp"], "%Y%m%d@%H%M%S").strftime("%Y-%m-%d %H:%M:%S")
except:
pass
# Get result content if finished
result_content = None
if status.startswith("Done") and os.path.exists(result_file):
try:
with open(result_file, 'r') as f:
result_content = f.read().strip()
except:
result_content = "Result file not found"
status_dict = {
"status": status,
"progress": step_count,
"max_steps": MAX_STEPS,
"last_update": last_update,
"result": result_content
}
# Cache the status if it is done or error
if status.startswith("Done") or status == "Error":
TASK_STATUS_CACHE[cache_key] = status_dict
return status_dict
def get_all_tasks_status():
task_list = load_task_list()
result = {}
@@ -176,6 +308,34 @@ def get_all_tasks_status():
return result
def get_all_tasks_status_brief():
"""
Get brief status info for all tasks, without detailed step data, for fast homepage loading.
"""
task_list = load_task_list()
result = {}
for task_type, task_ids in task_list.items():
result[task_type] = []
for task_id in task_ids:
task_info = get_task_info(task_type, task_id)
task_status = get_task_status_brief(task_type, task_id)
if task_info:
result[task_type].append({
"id": task_id,
"instruction": task_info.get("instruction", "No instruction provided"),
"status": task_status
})
else:
result[task_type].append({
"id": task_id,
"instruction": "No task info available",
"status": task_status
})
return result
@app.route('/')
def index():
return render_template("index.html")
@@ -199,6 +359,11 @@ def api_tasks():
"""Task status API"""
return jsonify(get_all_tasks_status())
@app.route('/api/tasks/brief')
def api_tasks_brief():
"""Return brief status info for all tasks, without detailed step data, for fast homepage loading."""
return jsonify(get_all_tasks_status_brief())
@app.route('/task/<task_type>/<task_id>/screenshot/<path:filename>')
def task_screenshot(task_type, task_id, filename):
"""Get task screenshot"""

View File

@@ -66,6 +66,65 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
.stat-card:nth-child(2):hover { background: linear-gradient(135deg, #e6f9ea, #d4f7db); }
.stat-card:nth-child(3) i { color: #dc3545; } /* Error - Red */
.stat-card:nth-child(3):hover { background: linear-gradient(135deg, #feeaec, #fcd8db); }
.stat-card:nth-child(4) i { color: #007bff; } /* Total - Blue */
.stat-card:nth-child(4):hover { background: linear-gradient(135deg, #f0f7ff, #e6f0fb); }
/* Score Banner Styles */
.score-banner {
border-radius: 10px;
margin: 20px 0 30px;
padding: 5px;
/* border: 2px solid rgba(255, 193, 7, 0.5); */
text-align: center;
position: relative;
overflow: hidden;
/* animation: scoreBannerGlow 3s infinite alternate; */
}
.score-banner:before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: radial-gradient(circle at center, rgba(255, 255, 255, 0.8) 0%, transparent 70%);
pointer-events: none;
}
.score-content {
display: flex;
align-items: center;
justify-content: center;
position: relative;
z-index: 1;
}
.score-banner i {
font-size: 2.2em;
color: #ffc107;
margin-right: 15px;
/* animation: rotateIcon 6s linear infinite; */
transform-origin: center;
}
.score-label {
font-size: 1.3em;
font-weight: 600;
color: #b28704;
margin-right: 15px;
}
.score-value {
font-size: 2em;
font-weight: 700;
background: linear-gradient(90deg, #ff8f00, #ffc107);
-webkit-background-clip: text;
background-clip: text;
-webkit-text-fill-color: transparent;
text-shadow: 0 1px 2px rgba(0,0,0,0.05);
}
.stat-card span {
font-size: 2em;

View File

@@ -1,7 +1,6 @@
// filepath: /home/adlsdztony/codes/OSWorld/monitor/static/index.js
document.addEventListener('DOMContentLoaded', () => {
fetchTasks();
// 筛选功能绑定
// Bind filter functionality
document.getElementById('total-tasks').parentElement.addEventListener('click', () => setTaskFilter('all'));
document.getElementById('active-tasks').parentElement.addEventListener('click', () => setTaskFilter('active'));
document.getElementById('completed-tasks').parentElement.addEventListener('click', () => setTaskFilter('completed'));
@@ -24,11 +23,128 @@ function refreshPage() {
// Store in sessionStorage
sessionStorage.setItem('expandedTaskTypes', JSON.stringify(expandedTaskTypes));
fetchTasks();
// Only fetch brief data for update to improve refresh speed
fetchTasksForRefresh();
}
function fetchTasksForRefresh() {
fetch('/api/tasks/brief')
.then(response => response.json())
.then(data => {
// Update stored data
allTaskData = data;
// Only update statistics and task status, do not fully re-render
updateStatistics(data);
updateTaskStatus(data);
})
.catch(error => console.error('Error refreshing tasks:', error));
}
// New function: only update task status, do not re-render the entire list
function updateTaskStatus(data) {
// Add pulse animation to score banner when refreshing
const scoreBanner = document.querySelector('.score-banner');
if (scoreBanner) {
scoreBanner.classList.add('refreshing');
setTimeout(() => {
scoreBanner.classList.remove('refreshing');
}, 1000);
}
// Update the status display of each task
Object.entries(data).forEach(([taskType, tasks]) => {
tasks.forEach(task => {
// Find the corresponding task card
const taskCard = document.querySelector(`.task-card[data-task-id="${task.id}"][data-task-type="${taskType}"]`);
if (!taskCard) return;
// Update status display
const statusElement = taskCard.querySelector('.task-status');
if (statusElement) {
// Remove all status classes
statusElement.classList.remove('status-not-started', 'status-preparing', 'status-running', 'status-completed', 'status-error', 'status-unknown');
// Set new status class and icon
let statusClass = '';
let statusIcon = '';
switch(task.status.status) {
case 'Not Started':
statusClass = 'status-not-started';
statusIcon = 'fa-hourglass-start';
break;
case 'Preparing':
case 'Initializing':
statusClass = 'status-preparing';
statusIcon = 'fa-spinner fa-pulse';
break;
case 'Running':
statusClass = 'status-running';
statusIcon = 'fa-running';
break;
case 'Done':
case 'Done (Message Exit)':
case 'Done (Max Steps)':
case 'Done (Thought Exit)':
statusClass = 'status-completed';
statusIcon = 'fa-check-circle';
break;
case 'Error':
statusClass = 'status-error';
statusIcon = 'fa-exclamation-circle';
break;
default:
statusClass = 'status-unknown';
statusIcon = 'fa-question-circle';
break;
}
statusElement.classList.add(statusClass);
statusElement.innerHTML = `<i class="fas ${statusIcon}"></i> ${task.status.status}`;
}
// Update progress bar
if (task.status.progress > 0) {
const progressText = taskCard.querySelector('.task-details div:first-child');
if (progressText) {
progressText.innerHTML = `<i class="fas fa-chart-line"></i> Progress: ${task.status.progress}/${task.status.max_steps} step(s)`;
}
const progressFill = taskCard.querySelector('.progress-fill');
if (progressFill) {
const percentage = (task.status.progress / task.status.max_steps) * 100;
progressFill.style.width = `${percentage}%`;
}
const progressPercentage = taskCard.querySelector('.progress-percentage');
if (progressPercentage) {
const percentage = (task.status.progress / task.status.max_steps) * 100;
progressPercentage.textContent = `${Math.round(percentage)}%`;
}
}
// Update last update time
const timestamp = taskCard.querySelector('.timestamp');
if (timestamp && task.status.last_update) {
timestamp.innerHTML = `<i class="far fa-clock"></i> Last Update: ${task.status.last_update}`;
}
// Update result info
if (task.status.result) {
let resultDiv = taskCard.querySelector('.task-result');
if (!resultDiv) {
resultDiv = document.createElement('div');
resultDiv.className = 'task-result';
taskCard.querySelector('.task-details').appendChild(resultDiv);
}
resultDiv.innerHTML = `<strong><i class="fas fa-flag-checkered"></i> Result:</strong> ${task.status.result}`;
}
});
});
}
function fetchTasks() {
fetch('/api/tasks')
fetch('/api/tasks/brief')
.then(response => response.json())
.then(data => {
allTaskData = data;
@@ -42,7 +158,7 @@ function setTaskFilter(filter) {
currentFilter = filter;
if (!allTaskData) return;
renderTasks(allTaskData);
// 高亮选中卡片
// Highlight selected card
document.querySelectorAll('.stat-card').forEach(card => card.classList.remove('selected'));
if (filter === 'all') {
document.getElementById('total-tasks').parentElement.classList.add('selected');
@@ -55,20 +171,32 @@ function setTaskFilter(filter) {
}
}
// 更新统计信息
// Update statistics info
function updateStatistics(data) {
let totalTasks = 0;
let activeTasks = 0;
let completedTasks = 0;
let errorTasks = 0;
let totalScore = 0;
Object.entries(data).forEach(([taskType, tasks]) => {
totalTasks += tasks.length;
tasks.forEach(task => {
if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') {
activeTasks++;
} else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') {
} else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)' || task.status.status === 'Done (Thought Exit)') {
completedTasks++;
// Calculate score if task is completed
if (task.status.result) {
try {
const score = parseFloat(task.status.result);
if (!isNaN(score) && score >= 0 && score <= 1) {
totalScore += score;
}
} catch (e) {
console.log(`Could not parse score for task: ${task.id}`);
}
}
} else if (task.status.status === 'Error') {
errorTasks++;
}
@@ -80,7 +208,16 @@ function updateStatistics(data) {
document.getElementById('completed-tasks').textContent = completedTasks;
document.getElementById('error-tasks').textContent = errorTasks;
// 高亮显示当前选中的统计卡片
// Update score display with formatted score
const scoreDisplay = document.getElementById('score-display');
if (completedTasks > 0) {
const scoreFormatted = totalScore.toFixed(2);
scoreDisplay.innerHTML = `<span>${scoreFormatted}</span> / <span>${completedTasks}</span>`;
} else {
scoreDisplay.innerHTML = '<span>0.00</span> / <span>0</span>';
}
// Highlight the currently selected statistics card
document.querySelectorAll('.stat-card').forEach(card => card.classList.remove('selected'));
if (currentFilter === 'all') {
document.getElementById('total-tasks').parentElement.classList.add('selected');
@@ -105,7 +242,7 @@ function renderTasks(data) {
if (currentFilter === 'active') {
filteredTasks = tasks.filter(task => ['Running', 'Preparing', 'Initializing'].includes(task.status.status));
} else if (currentFilter === 'completed') {
filteredTasks = tasks.filter(task => task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)');
filteredTasks = tasks.filter(task => task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)'|| task.status.status === 'Done (Thought Exit)');
} else if (currentFilter === 'error') {
filteredTasks = tasks.filter(task => task.status.status === 'Error');
}
@@ -128,7 +265,7 @@ function renderTasks(data) {
tasks.forEach(task => {
if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') {
runningCount++;
} else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') {
} else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)' || task.status.status === 'Done (Thought Exit)') {
completedCount++;
} else if (task.status.status === 'Error') {
errorCount++;
@@ -176,6 +313,9 @@ function renderTasks(data) {
tasks.forEach(task => {
const taskCard = document.createElement('div');
taskCard.className = 'task-card';
// Add data attributes for later updates
taskCard.setAttribute('data-task-id', task.id);
taskCard.setAttribute('data-task-type', taskType);
const taskHeader = document.createElement('div');
taskHeader.className = 'task-header';
@@ -207,6 +347,7 @@ function renderTasks(data) {
case 'Done':
case 'Done (Message Exit)':
case 'Done (Max Steps)':
case 'Done (Thought Exit)':
statusClass = 'status-completed';
statusIcon = 'fa-check-circle';
break;

View File

@@ -173,7 +173,7 @@ pre {
.status-not-started { background: linear-gradient(135deg, #f0f0f0, #e6e6e6); color: #555; }
.status-preparing, .status-initializing { background: linear-gradient(135deg, #fff7e0, #ffe8a3); color: #8a6d00; }
.status-running { background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #0d47a1; }
.status-done, .status-done-message-exit, .status-done-max-steps { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
.status-done, .status-done-message-exit, .status-done-max-steps, .status-done-thought-exit { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
.status-error { background: linear-gradient(135deg, #ffebee, #ffcdd2); color: #b71c1c; }
.step-intent {

View File

@@ -14,6 +14,16 @@
<body>
<div class="main-container">
<h1>OSWorld Monitor <span class="system-status online">System Online</span></h1>
<!-- Score Display Banner -->
<div class="score-banner">
<div class="score-content">
<i class="fas fa-star"></i>
<span class="score-label">Score:</span>
<span id="score-display" class="score-value">Loading...</span>
</div>
</div>
<div class="dashboard-stats">
<div class="stat-card">
<i class="fas fa-running"></i>

View File

@@ -41,6 +41,11 @@
<i class="fas fa-question-circle"></i>
<span class="tooltip-text">Maximum steps reached, task completed</span>
</span>
{% elif task_status.status == 'Done (Thought Exit)' %}
<span class="tooltip">
<i class="fas fa-question-circle"></i>
<span class="tooltip-text">Task completed with a thought exit condition</span>
</span>
{% endif %}
</dd>
<dt>Current Step</dt>