diff --git a/evaluation_examples/test_small.json b/evaluation_examples/test_small.json
index dbf95d3..3e0f127 100644
--- a/evaluation_examples/test_small.json
+++ b/evaluation_examples/test_small.json
@@ -29,33 +29,6 @@
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
"510f64c8-9bcc-4be1-8d30-638705850618",
- "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
- "c867c42d-a52d-4a24-8ae3-f75d256b5618",
- "74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
- "b5062e3e-641c-4e3a-907b-ac864d2e7652",
- "48d05431-6cd5-4e76-82eb-12b60d823f7d",
- "eb303e01-261e-4972-8c07-c9b4e7a4922a",
- "d1acdb87-bb67-4f30-84aa-990e56a09c92",
- "deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
- "8e116af7-7db7-4e35-a68b-b0939c066c78",
- "716a6079-22da-47f1-ba73-c9d58f986a38",
- "2373b66a-092d-44cb-bfd7-82e86e7a3b4d"
- ],
- "os": [
- "5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
- "5812b315-e7bd-4265-b51f-863c02174c28"
- ],
- "thunderbird": [
- "dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
- "15c3b339-88f7-4a86-ab16-e71c58dcb01e"
- ],
- "vlc": [
- "59f21cfb-0120-4326-b255-a5b827b38967",
- "8f080098-ddb1-424c-b438-4e96e5e4786e"
- ],
- "vs_code": [
- "0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
- "53ad5833-3455-407b-bbc6-45b4c79ab8fb",
- "276cc624-87ea-4f08-ab93-f770e3790175"
+ "897e3b53-5d4d-444b-85cb-2cdc8a97d903"
]
}
\ No newline at end of file
diff --git a/monitor/Dockerfile b/monitor/Dockerfile
index 4692b7f..04311e0 100644
--- a/monitor/Dockerfile
+++ b/monitor/Dockerfile
@@ -1,14 +1,11 @@
FROM python:3.9-slim
-WORKDIR /app
+WORKDIR /app/monitor
# Install dependencies
COPY monitor/requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
-# Copy application code
-COPY monitor/ ./
-
# Expose port (will be overridden by environment variable)
ARG FLASK_PORT=8080
EXPOSE ${FLASK_PORT}
diff --git a/monitor/README.md b/monitor/README.md
index b746eb6..e2d640a 100644
--- a/monitor/README.md
+++ b/monitor/README.md
@@ -28,11 +28,11 @@ The monitor can be configured by editing the `.env` file in the monitor director
For example:
```bash
# .env
-TASK_CONFIG_PATH=evaluation_examples/test_small.json
-EXAMPLES_BASE_PATH=evaluation_examples/examples
-RESULTS_BASE_PATH=results_operator_aws/pyautogui/screenshot/computer-use-preview
+TASK_CONFIG_PATH=../evaluation_examples/test_small.json
+EXAMPLES_BASE_PATH=../evaluation_examples/examples
+RESULTS_BASE_PATH=../results_operator_aws/pyautogui/screenshot/computer-use-preview
MAX_STEPS=50
-FLASK_PORT=8080
+FLASK_PORT=80
FLASK_HOST=0.0.0.0
FLASK_DEBUG=true
```
diff --git a/monitor/docker-compose.yml b/monitor/docker-compose.yml
index f5463fa..f7fb056 100644
--- a/monitor/docker-compose.yml
+++ b/monitor/docker-compose.yml
@@ -8,6 +8,7 @@ services:
ports:
- "${FLASK_PORT:-8080}:8080"
volumes:
+ - .:/app/monitor
- ../evaluation_examples:/app/evaluation_examples
- ../results_operator_aws:/app/results_operator_aws
env_file:
diff --git a/monitor/main.py b/monitor/main.py
index 1e1bfec..1d6bd96 100644
--- a/monitor/main.py
+++ b/monitor/main.py
@@ -15,9 +15,9 @@ load_dotenv()
app = Flask(__name__)
# Load configuration from environment variables
-TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "evaluation_examples/test_small.json")
-EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "evaluation_examples/examples")
-RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "results_operator_aws/pyautogui/screenshot/computer-use-preview")
+TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "../evaluation_examples/test_small.json")
+EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "../evaluation_examples/examples")
+RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "../results_operator_aws/pyautogui/screenshot/computer-use-preview")
MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
def load_task_list():
@@ -71,11 +71,59 @@ def get_task_status(task_type, task_id):
last_step = steps[-1]
- # check if the task is done
+ # Check the log file for agent responses and exit conditions
+ log_data = {
+ "agent_responses": [],
+ "exit_condition": None,
+ "last_message": None
+ }
+
+ if os.path.exists(log_file):
+ try:
+ with open(log_file, 'r') as f:
+ log_content = f.readlines()
+ last_response = None
+
+ for i, line in enumerate(log_content):
+ # Extract agent responses for each step
+ if "Responses: [" in line:
+ response_text = line.split("Responses: [")[1].strip()
+ if response_text.endswith("]"):
+ response_text = response_text[:-1] # Remove closing bracket
+
+ # Clean up the response text - remove quotes
+ if response_text.startswith("'") and response_text.endswith("'"):
+ response_text = response_text[1:-1] # Remove surrounding quotes
+ elif response_text == '"]': # Empty response
+ response_text = ""
+
+ # Handle list of responses
+ if response_text and "', '" in response_text:
+ responses = [r.strip("'") for r in response_text.split("', '")]
+ log_data["agent_responses"].append(responses[0]) # Use first response
+ last_response = responses[0] # Keep track of the last response
+ elif response_text:
+ log_data["agent_responses"].append(response_text)
+ last_response = response_text # Keep track of the last response
+
+ # Check for exit conditions near the end of the log
+ if "The state of the agent is not correct" in line or "Exit condition met" in line:
+ log_data["exit_condition"] = line.strip()
+ # If this is a message exit, save the last response as the last message
+ if "message_exit: True" in line and last_response:
+ log_data["last_message"] = last_response
+ except Exception as e:
+ log_data["error"] = f"Error parsing log file: {str(e)}"
+
+ # check if the task is done based on both trajectory and log
if last_step.get("done", False):
status = "Done"
elif last_step.get("Error", False):
status = "Error"
+ elif log_data.get("exit_condition") and "message_exit: True" in log_data.get("exit_condition", ""):
+ status = "Done (Message Exit)"
+ elif len(steps) >= MAX_STEPS:
+ status = "Done (Max Steps)"
else:
status = "Running"
@@ -86,7 +134,7 @@ def get_task_status(task_type, task_id):
last_update = "None"
result_content = "Task not completed"
- if status == "Done":
+ if status.startswith("Done"):
if os.path.exists(result_file):
with open(result_file, 'r') as f:
result_content = f.read().strip()
@@ -99,6 +147,7 @@ def get_task_status(task_type, task_id):
"max_steps": MAX_STEPS,
"last_update": last_update,
"steps": steps,
+ "log_data": log_data,
"result": result_content
}
diff --git a/monitor/static/favicon.ico b/monitor/static/favicon.ico
new file mode 100644
index 0000000..e69de29
diff --git a/monitor/static/favicon.png b/monitor/static/favicon.png
new file mode 100644
index 0000000..7fe40ab
Binary files /dev/null and b/monitor/static/favicon.png differ
diff --git a/monitor/static/index.css b/monitor/static/index.css
index 87a3a28..1bdb589 100644
--- a/monitor/static/index.css
+++ b/monitor/static/index.css
@@ -56,6 +56,17 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
margin-bottom: 10px;
display: block;
}
+
+/* Specific colors for different stat cards */
+.stat-card:nth-child(4) i { color: #007bff; } /* Total - Blue */
+.stat-card:nth-child(4):hover { background: linear-gradient(135deg, #f0f7ff, #e6f0fb); }
+.stat-card:nth-child(1) i { color: #17a2b8; } /* Active - Cyan */
+.stat-card:nth-child(1):hover { background: linear-gradient(135deg, #e3fafd, #d1f2f6); }
+.stat-card:nth-child(2) i { color: #28a745; } /* Completed - Green */
+.stat-card:nth-child(2):hover { background: linear-gradient(135deg, #e6f9ea, #d4f7db); }
+.stat-card:nth-child(3) i { color: #dc3545; } /* Error - Red */
+.stat-card:nth-child(3):hover { background: linear-gradient(135deg, #feeaec, #fcd8db); }
+
.stat-card span {
font-size: 2em;
font-weight: 600;
@@ -162,11 +173,12 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
padding: 20px;
transition: all 0.4s cubic-bezier(.4,0,.2,1);
opacity: 1;
- max-height: 2000px;
+ max-height: none;
+ overflow-y: auto;
}
.task-type.collapsed .tasks-container {
- max-height: 0;
+ max-height: 0 !important;
opacity: 0;
padding: 0;
overflow: hidden;
@@ -187,6 +199,9 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
position: relative;
z-index: 2;
}
+.task-card:last-child {
+ margin-bottom: 5px;
+}
.task-card:hover { box-shadow: 0 10px 30px rgba(0,123,255,0.12); transform: translateY(-3px); }
.task-header { display: flex; justify-content: space-between; margin-bottom: 14px; align-items: center; }
.task-title { font-size: 1.2em; font-weight: 600; color: #1a237e; }
@@ -196,6 +211,8 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
.status-running { background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #0d47a1; }
.status-completed { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
.status-error { background: linear-gradient(135deg, #ffebee, #ffcdd2); color: #b71c1c; }
+.status-unknown { background: linear-gradient(135deg, #e0e0e0, #bdbdbd); color: #424242; }
+.status-done-max-steps { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
.task-details { margin-top: 16px; }
.progress-bar { height: 12px; background-color: #eef2f7; border-radius: 6px; margin-top: 10px; overflow: hidden; box-shadow: inset 0 1px 3px rgba(0,0,0,0.1); }
.progress-fill { height: 100%; background: linear-gradient(90deg, #007bff, #00c6ff); width: 0%; transition: width 0.6s ease; }
@@ -302,3 +319,22 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
color: #0078d7;
}
+/* Custom scrollbar for tasks container */
+.tasks-container::-webkit-scrollbar {
+ width: 8px;
+}
+
+.tasks-container::-webkit-scrollbar-track {
+ background: #f1f5f9;
+ border-radius: 4px;
+}
+
+.tasks-container::-webkit-scrollbar-thumb {
+ background: #c0d6e8;
+ border-radius: 4px;
+}
+
+.tasks-container::-webkit-scrollbar-thumb:hover {
+ background: #a5c7e5;
+}
+
diff --git a/monitor/static/index.js b/monitor/static/index.js
index 6769bb3..a8ef7b5 100644
--- a/monitor/static/index.js
+++ b/monitor/static/index.js
@@ -5,6 +5,7 @@ document.addEventListener('DOMContentLoaded', () => {
document.getElementById('total-tasks').parentElement.addEventListener('click', () => setTaskFilter('all'));
document.getElementById('active-tasks').parentElement.addEventListener('click', () => setTaskFilter('active'));
document.getElementById('completed-tasks').parentElement.addEventListener('click', () => setTaskFilter('completed'));
+ document.getElementById('error-tasks').parentElement.addEventListener('click', () => setTaskFilter('error'));
});
let allTaskData = null;
@@ -49,6 +50,8 @@ function setTaskFilter(filter) {
document.getElementById('active-tasks').parentElement.classList.add('selected');
} else if (filter === 'completed') {
document.getElementById('completed-tasks').parentElement.classList.add('selected');
+ } else if (filter === 'error') {
+ document.getElementById('error-tasks').parentElement.classList.add('selected');
}
}
@@ -57,14 +60,17 @@ function updateStatistics(data) {
let totalTasks = 0;
let activeTasks = 0;
let completedTasks = 0;
+ let errorTasks = 0;
Object.entries(data).forEach(([taskType, tasks]) => {
totalTasks += tasks.length;
tasks.forEach(task => {
if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') {
activeTasks++;
- } else if (task.status.status === 'Done') {
+ } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') {
completedTasks++;
+ } else if (task.status.status === 'Error') {
+ errorTasks++;
}
});
});
@@ -72,6 +78,19 @@ function updateStatistics(data) {
document.getElementById('total-tasks').textContent = totalTasks;
document.getElementById('active-tasks').textContent = activeTasks;
document.getElementById('completed-tasks').textContent = completedTasks;
+ document.getElementById('error-tasks').textContent = errorTasks;
+
+ // 高亮显示当前选中的统计卡片
+ document.querySelectorAll('.stat-card').forEach(card => card.classList.remove('selected'));
+ if (currentFilter === 'all') {
+ document.getElementById('total-tasks').parentElement.classList.add('selected');
+ } else if (currentFilter === 'active') {
+ document.getElementById('active-tasks').parentElement.classList.add('selected');
+ } else if (currentFilter === 'completed') {
+ document.getElementById('completed-tasks').parentElement.classList.add('selected');
+ } else if (currentFilter === 'error') {
+ document.getElementById('error-tasks').parentElement.classList.add('selected');
+ }
}
function renderTasks(data) {
@@ -86,7 +105,9 @@ function renderTasks(data) {
if (currentFilter === 'active') {
filteredTasks = tasks.filter(task => ['Running', 'Preparing', 'Initializing'].includes(task.status.status));
} else if (currentFilter === 'completed') {
- filteredTasks = tasks.filter(task => task.status.status === 'Done');
+ filteredTasks = tasks.filter(task => task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)');
+ } else if (currentFilter === 'error') {
+ filteredTasks = tasks.filter(task => task.status.status === 'Error');
}
if (filteredTasks.length > 0) {
filteredData[taskType] = filteredTasks;
@@ -107,7 +128,7 @@ function renderTasks(data) {
tasks.forEach(task => {
if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') {
runningCount++;
- } else if (task.status.status === 'Done') {
+ } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') {
completedCount++;
} else if (task.status.status === 'Error') {
errorCount++;
@@ -146,6 +167,12 @@ function renderTasks(data) {
noTasks.innerHTML = ' No Tasks Available';
tasksContainer.appendChild(noTasks);
} else {
+ // Add scrolling for large task lists
+ if (tasks.length > 10) {
+ tasksContainer.style.maxHeight = '600px';
+ tasksContainer.style.overflowY = 'auto';
+ }
+
tasks.forEach(task => {
const taskCard = document.createElement('div');
taskCard.className = 'task-card';
@@ -178,6 +205,8 @@ function renderTasks(data) {
statusIcon = 'fa-running';
break;
case 'Done':
+ case 'Done (Message Exit)':
+ case 'Done (Max Steps)':
statusClass = 'status-completed';
statusIcon = 'fa-check-circle';
break;
@@ -185,6 +214,10 @@ function renderTasks(data) {
statusClass = 'status-error';
statusIcon = 'fa-exclamation-circle';
break;
+ default:
+ statusClass = 'status-unknown';
+ statusIcon = 'fa-question-circle';
+ break;
}
taskStatus.classList.add(statusClass);
@@ -202,7 +235,7 @@ function renderTasks(data) {
if (task.status.progress > 0) {
const progressText = document.createElement('div');
- progressText.innerHTML = ` Progress: ${task.status.progress} step(s)`;
+ progressText.innerHTML = ` Progress: ${task.status.progress}/${task.status.max_steps} step(s)`;
taskProgress.appendChild(progressText);
const progressBar = document.createElement('div');
diff --git a/monitor/static/task_detail.css b/monitor/static/task_detail.css
index c399504..06da1cb 100644
--- a/monitor/static/task_detail.css
+++ b/monitor/static/task_detail.css
@@ -49,6 +49,11 @@ h2 { color: #0056b3; margin-top: 36px; font-size: 1.6em; font-weight: 600; }
.step-card {
border: none;
background: #fafdff;
+ box-shadow: 0 4px 15px rgba(0,0,0,0.08);
+ margin-bottom: 25px;
+ border-radius: 10px;
+ overflow: hidden;
+ transition: all 0.3s;
padding: 22px 26px;
margin-bottom: 24px;
border-radius: 10px;
@@ -57,19 +62,29 @@ h2 { color: #0056b3; margin-top: 36px; font-size: 1.6em; font-weight: 600; }
position: relative;
overflow: hidden;
}
+.step-intent {
+ padding: 10px 20px;
+ background: #f0f7ff;
+ border-left: 4px solid #4285f4;
+ margin: 10px 20px;
+ font-size: 0.95em;
+ line-height: 1.5;
+ color: #333;
+}
+.exit-condition {
+ background: #fff8e1;
+ padding: 8px 12px;
+ border-radius: 6px;
+ font-family: 'Courier New', monospace;
+ font-size: 0.9em;
+ border-left: 3px solid #ffa000;
+}
+
.step-card:hover {
box-shadow: 0 10px 30px rgba(0,123,255,0.1);
transform: translateY(-3px);
}
-.step-card:before {
- content: '';
- position: absolute;
- left: 0;
- top: 0;
- height: 100%;
- width: 4px;
- background: linear-gradient(to bottom, #007bff, #00c6ff);
-}
+
.step-header { display: flex; justify-content: space-between; margin-bottom: 12px; align-items: center; }
.step-title { font-weight: 600; color: #1a237e; font-size: 1.1em; }
.step-time { color: #6c757d; font-size: 0.92em; }
@@ -90,10 +105,7 @@ pre {
box-shadow: 0 5px 15px rgba(0,0,0,0.08);
transition: all 0.3s;
}
-.step-image:hover {
- transform: scale(1.01);
- box-shadow: 0 8px 25px rgba(0,0,0,0.12);
-}
+
.no-steps {
color: #8492a6;
font-style: italic;
@@ -154,5 +166,142 @@ pre {
.status-not-started { background: linear-gradient(135deg, #f0f0f0, #e6e6e6); color: #555; }
.status-preparing, .status-initializing { background: linear-gradient(135deg, #fff7e0, #ffe8a3); color: #8a6d00; }
.status-running { background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #0d47a1; }
-.status-done { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
+.status-done, .status-done-message-exit, .status-done-max-steps { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; }
.status-error { background: linear-gradient(135deg, #ffebee, #ffcdd2); color: #b71c1c; }
+
+.step-intent {
+ padding: 10px 20px;
+ background: #f0f7ff;
+ border-left: 4px solid #4285f4;
+ margin: 10px 0;
+ font-size: 0.95em;
+ line-height: 1.5;
+ color: #333;
+}
+
+.exit-condition {
+ background: #fff8e1;
+ padding: 8px 12px;
+ border-radius: 6px;
+ font-family: 'Courier New', monospace;
+ font-size: 0.9em;
+ border-left: 3px solid #ffa000;
+ position: relative;
+}
+
+.exit-message {
+ background: #e8f5e9;
+ padding: 12px 16px;
+ border-radius: 6px;
+ font-family: 'Segoe UI', Arial, sans-serif;
+ font-size: 1em;
+ border-left: 3px solid #4caf50;
+ position: relative;
+ line-height: 1.5;
+ color: #1b5e20;
+ margin-top: 4px;
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
+}
+
+.exit-condition-help {
+ margin-top: 8px;
+ font-family: 'Segoe UI', Arial, sans-serif;
+ font-size: 0.85em;
+ color: #666;
+ background: #f5f5f5;
+ padding: 6px 10px;
+ border-radius: 4px;
+ border-left: 2px solid #9e9e9e;
+}
+
+/* 工具提示样式 */
+.tooltip {
+ position: relative;
+ display: inline-block;
+ margin-left: 8px;
+ cursor: help;
+}
+
+.tooltip .tooltip-text {
+ visibility: hidden;
+ min-width: 200px;
+ max-width: 500px;
+ width: max-content;
+ background-color: #333;
+ color: #fff;
+ text-align: left;
+ border-radius: 6px;
+ padding: 10px 12px;
+ position: absolute;
+ z-index: 10;
+ bottom: 125%;
+ left: 50%;
+ transform: translateX(-50%);
+ opacity: 0;
+ transition: opacity 0.3s;
+ font-weight: normal;
+ font-size: 0.85em;
+ white-space: normal;
+ word-wrap: break-word;
+ line-height: 1.4;
+ box-shadow: 0 2px 10px rgba(0,0,0,0.2);
+}
+
+.tooltip .tooltip-text::after {
+ content: "";
+ position: absolute;
+ top: 100%;
+ left: 50%;
+ margin-left: -5px;
+ border-width: 5px;
+ border-style: solid;
+ border-color: #333 transparent transparent transparent;
+}
+
+.tooltip:hover .tooltip-text {
+ visibility: visible;
+ opacity: 1;
+}
+
+/* 移动设备上的工具提示调整 */
+@media (max-width: 768px) {
+ .tooltip .tooltip-text {
+ width: auto;
+ max-width: 250px;
+ left: auto;
+ right: 0;
+ transform: none;
+ }
+
+ .tooltip .tooltip-text::after {
+ left: auto;
+ right: 10px;
+ }
+}
+
+/* 进度条样式 */
+.progress-bar {
+ height: 12px;
+ background-color: #eef2f7;
+ border-radius: 6px;
+ margin: 10px 0;
+ overflow: hidden;
+ box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
+ width: 100%;
+ max-width: 300px;
+}
+
+.progress-fill {
+ height: 100%;
+ background: linear-gradient(90deg, #007bff, #00c6ff);
+ width: 0%;
+ transition: width 0.6s ease;
+}
+
+.progress-percentage {
+ text-align: right;
+ font-size: 0.85em;
+ color: #6c757d;
+ margin-top: 4px;
+ font-weight: normal;
+}
diff --git a/monitor/templates/index.html b/monitor/templates/index.html
index ceffbea..4388f64 100644
--- a/monitor/templates/index.html
+++ b/monitor/templates/index.html
@@ -4,6 +4,9 @@
OSWorld Monitor
+
+
+
@@ -12,11 +15,6 @@
OSWorld Monitor System Online
-
-
-
Loading...
-
Total Tasks
-
Loading...
@@ -27,6 +25,16 @@
Loading...
Completed
+
+
+
+
Loading...
+
Total Tasks
+
diff --git a/monitor/templates/task_detail.html b/monitor/templates/task_detail.html
index fd5fb8f..8fcd26c 100644
--- a/monitor/templates/task_detail.html
+++ b/monitor/templates/task_detail.html
@@ -4,6 +4,9 @@
Task Detail: {{ task_id }}
+
+
+
@@ -21,11 +24,41 @@
Instruction
{{ task_info.instruction }}
Status
-
{{ task_status.status }}
+
+ {{ task_status.status }}
+ {% if task_status.status == 'Error' %}
+
+
+ Error occurred during task execution
+
+ {% elif task_status.status == 'Done (Message Exit)' %}
+
+
+ Task completed with a message exit condition
+
+ {% elif task_status.status == 'Done (Max Steps)' %}
+
+
+ Maximum steps reached, task completed
+
+ {% endif %}
+
Current Step
{{ task_status.progress }}
Last Update
{{ task_status.last_update or 'None' }}
+ {% if task_status.log_data and task_status.log_data.exit_condition %}
+
Exit Condition
+
+ {{ task_status.log_data.exit_condition }}
+
+ {% endif %}
+ {% if task_status.status == 'Done (Message Exit)' and task_status.log_data and task_status.log_data.last_message %}
+
Exit Message
+
+ {{ task_status.log_data.last_message }}
+
+ {% endif %}
Result
{{ task_status.result }}
@@ -40,7 +73,15 @@
Step {{ step.step_num }}
{{ step.action_timestamp }}
-
{{ step.action.action }}
+ {% if task_status.log_data and task_status.log_data.agent_responses and loop.index0 < task_status.log_data.agent_responses|length %}
+
+ Agent Intent: {{ task_status.log_data.agent_responses[loop.index0] }}
+
+ {% endif %}
+
{% if step.action and step.action.action %}{{ step.action.action }}
+ {% elif step.Error %}Error: {{ step.Error }}
+ {% else %}{{ step|tojson }}
+ {% endif %}
{% if step.screenshot_file %}