Feat/monitor (#254)

* feat: add claude support

* feat: add script for end-to-end evaluation with logging and task distribution

* feat&fix: add tool result handling and update model default in evaluation script

* chore: remove run_test_env.py script

* feat&fix: implement action parsing for tool calls and update default action space

* fix: update text formatting in action parsing and replace logger import

* feat&fix: implement action parsing for tool calls and add screen size handling

* feat: add setup instructions for Anthropic API integration

* feat: add notice about image size limitations for Anthropic API

* Delete test_env/logger.py

* Delete test_env/utils.py

* fix: update logger usage to use global logger and improve error handling

* feat&fix: add configuration management API endpoints and update UI for configuration selection

* feat&fix: update environment configuration, enhance task statistics, and improve UI responsiveness

* feat&fix: add configuration toggle button in UI and improve task loading performance

* feat&fix: add accuracy percentage display to score and style updates for UI
This commit is contained in:
Zilong Zhou
2025-07-14 13:43:41 +08:00
committed by GitHub
parent 0651495d88
commit 74b7c189af
6 changed files with 662 additions and 37 deletions

View File

@@ -1,5 +1,63 @@
/* filepath: /home/adlsdztony/codes/OSWorld/monitor/static/index.css */
body { font-family: 'Segoe UI', Arial, sans-serif; margin: 0; padding: 0; background: linear-gradient(135deg, #f4f6fa 0%, #e9f0f9 100%); }
.layout-container {
position: relative;
max-width: 1200px;
margin: 20px auto;
padding: 0 20px;
}
.main-content {
background: #fff;
border-radius: 14px;
box-shadow: 0 8px 32px rgba(0,0,0,0.1);
padding: 36px 44px;
}
/* Floating Config Sidebar */
.config-sidebar {
position: fixed;
top: 20px;
left: -280px;
width: 300px;
height: calc(100vh - 40px);
z-index: 1000;
transition: left 0.3s ease;
}
.config-sidebar:hover {
left: 0;
}
.config-toggle-btn {
position: absolute;
right: -50px;
top: 50%;
transform: translateY(-50%);
width: 50px;
height: 50px;
background: linear-gradient(135deg, #007bff, #0056b3);
border-radius: 0 25px 25px 0;
display: flex;
align-items: center;
justify-content: center;
color: white;
font-size: 1.2em;
cursor: pointer;
box-shadow: 2px 0 10px rgba(0,0,0,0.2);
transition: all 0.3s ease;
}
.config-toggle-btn:hover {
background: linear-gradient(135deg, #0056b3, #004085);
transform: translateY(-50%) scale(1.05);
}
.config-sidebar:hover .config-toggle-btn {
opacity: 0.8;
}
.main-container { max-width: 1100px; margin: 40px auto; background: #fff; border-radius: 14px; box-shadow: 0 8px 32px rgba(0,0,0,0.1); padding: 36px 44px; }
h1 { font-size: 2.5em; margin-bottom: 24px; color: #1a237e; text-align: center; position: relative; }
h1:after { content: ''; display: block; width: 80px; height: 4px; background: linear-gradient(90deg, #007bff, #00c6ff); margin: 12px auto 0; border-radius: 2px; }
@@ -125,6 +183,18 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
text-shadow: 0 1px 2px rgba(0,0,0,0.05);
}
.accuracy-percentage {
font-size: 0.7em;
font-weight: 600;
color: #ffffff;
margin-left: 8px;
background: rgba(255, 255, 255, 0.1);
padding: 4px 8px;
border-radius: 12px;
display: inline-block;
vertical-align: middle;
}
.stat-card span {
font-size: 2em;
@@ -197,8 +267,9 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
.task-type-stats {
display: flex;
gap: 16px;
flex-wrap: wrap;
gap: 8px;
align-items: center;
}
.task-stat {
@@ -228,6 +299,22 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
color: #b71c1c;
}
/* Task type statistics styles */
.task-stat.score {
color: #ffc107;
background: rgba(255, 193, 7, 0.1);
}
.task-stat.steps {
color: #17a2b8;
background: rgba(23, 162, 184, 0.1);
}
.task-stat.rate {
color: #28a745;
background: rgba(40, 167, 69, 0.1);
}
.tasks-container {
padding: 20px;
transition: all 0.4s cubic-bezier(.4,0,.2,1);
@@ -427,3 +514,174 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
background: #a5c7e5;
}
/* Configuration Panel Styles */
.config-panel {
background: #fff;
border-radius: 0 14px 14px 0;
box-shadow: 0 8px 32px rgba(0,0,0,0.15);
overflow: hidden;
height: 100%;
display: flex;
flex-direction: column;
}
.config-header {
display: flex;
align-items: center;
padding: 16px 20px;
background: linear-gradient(135deg, #6c757d, #495057);
color: white;
flex-shrink: 0;
}
.config-header i {
margin-right: 10px;
font-size: 1.1em;
}
.config-header span {
font-weight: 600;
font-size: 1.1em;
}
.config-content {
padding: 20px;
flex: 1;
overflow-y: auto;
}
.config-selector {
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 1px solid #dee2e6;
}
.selector-item {
display: flex;
flex-direction: column;
gap: 8px;
}
.selector-item label {
font-weight: 600;
color: #495057;
font-size: 0.9em;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.selector-item select {
padding: 8px 12px;
border: 2px solid #e9ecef;
border-radius: 6px;
background: white;
font-size: 0.9em;
color: #495057;
cursor: pointer;
transition: all 0.3s ease;
}
.selector-item select:focus {
outline: none;
border-color: #007bff;
box-shadow: 0 0 0 3px rgba(0,123,255,0.1);
}
.selector-item select:hover {
border-color: #007bff;
}
.config-list {
display: flex;
flex-direction: column;
gap: 15px;
}
.config-item {
display: flex;
flex-direction: column;
background: #f8f9fa;
padding: 12px;
border-radius: 8px;
border-left: 4px solid #007bff;
transition: all 0.3s ease;
}
.config-item:hover {
transform: translateX(3px);
box-shadow: 0 4px 12px rgba(0,123,255,0.15);
}
.config-label {
font-weight: 600;
color: #495057;
margin-bottom: 5px;
font-size: 0.9em;
text-transform: uppercase;
color: #495057;
font-size: 0.85em;
margin-bottom: 6px;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.config-value {
color: #007bff;
font-family: 'Courier New', monospace;
font-size: 0.9em;
font-weight: 600;
word-break: break-word;
}
.config-path {
font-size: 0.8em;
line-height: 1.3;
}
/* Responsive design for sidebar layout */
@media (max-width: 1024px) {
.config-sidebar {
left: -250px;
width: 250px;
}
.config-toggle-btn {
right: -40px;
width: 40px;
height: 40px;
font-size: 1em;
}
}
@media (max-width: 768px) {
.layout-container {
padding: 0 10px;
}
.main-content {
padding: 20px 25px;
}
.config-sidebar {
left: -220px;
width: 220px;
height: calc(100vh - 20px);
top: 10px;
}
.config-toggle-btn {
right: -35px;
width: 35px;
height: 35px;
font-size: 0.9em;
}
.config-content {
padding: 15px;
}
.config-item {
padding: 10px;
}
}