Feat/monitor (#254)
* feat: add claude support * feat: add script for end-to-end evaluation with logging and task distribution * feat&fix: add tool result handling and update model default in evaluation script * chore: remove run_test_env.py script * feat&fix: implement action parsing for tool calls and update default action space * fix: update text formatting in action parsing and replace logger import * feat&fix: implement action parsing for tool calls and add screen size handling * feat: add setup instructions for Anthropic API integration * feat: add notice about image size limitations for Anthropic API * Delete test_env/logger.py * Delete test_env/utils.py * fix: update logger usage to use global logger and improve error handling * feat&fix: add configuration management API endpoints and update UI for configuration selection * feat&fix: update environment configuration, enhance task statistics, and improve UI responsiveness * feat&fix: add configuration toggle button in UI and improve task loading performance * feat&fix: add accuracy percentage display to score and style updates for UI
This commit is contained in:
134
monitor/main.py
134
monitor/main.py
@@ -1,14 +1,17 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from functools import cache
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from flask import Flask, render_template_string, jsonify, send_file, request, render_template
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
@@ -38,12 +41,51 @@ OBSERVATION_TYPE=os.getenv("OBSERVATION_TYPE", "screenshot")
|
||||
MODEL_NAME=os.getenv("MODEL_NAME", "computer-use-preview")
|
||||
MAX_STEPS = int(os.getenv("MAX_STEPS", "150"))
|
||||
|
||||
def initialize_default_config():
|
||||
"""Initialize default configuration from the first available config in results directory"""
|
||||
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
|
||||
|
||||
if os.path.exists(RESULTS_BASE_PATH):
|
||||
try:
|
||||
# Scan for the first available configuration
|
||||
for action_space in os.listdir(RESULTS_BASE_PATH):
|
||||
action_space_path = os.path.join(RESULTS_BASE_PATH, action_space)
|
||||
if os.path.isdir(action_space_path):
|
||||
for obs_type in os.listdir(action_space_path):
|
||||
obs_path = os.path.join(action_space_path, obs_type)
|
||||
if os.path.isdir(obs_path):
|
||||
for model_name in os.listdir(obs_path):
|
||||
model_path = os.path.join(obs_path, model_name)
|
||||
if os.path.isdir(model_path):
|
||||
# Use the first available configuration as default
|
||||
ACTION_SPACE = action_space
|
||||
OBSERVATION_TYPE = obs_type
|
||||
MODEL_NAME = model_name
|
||||
RESULTS_PATH = model_path
|
||||
print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Error scanning results directory for default config: {e}")
|
||||
|
||||
# Fallback to original environment-based path if no configs found
|
||||
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||
print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
|
||||
|
||||
# Initialize default configuration
|
||||
initialize_default_config()
|
||||
|
||||
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||
|
||||
if RESULTS_PATH not in TASK_STATUS_CACHE:
|
||||
# Initialize cache for this results path
|
||||
TASK_STATUS_CACHE[RESULTS_PATH] = {}
|
||||
|
||||
@cache
|
||||
def load_task_list():
|
||||
with open(TASK_CONFIG_PATH, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
@cache
|
||||
def get_task_info(task_type, task_id):
|
||||
task_file = os.path.join(EXAMPLES_BASE_PATH, task_type, f"{task_id}.json")
|
||||
if os.path.exists(task_file):
|
||||
@@ -183,8 +225,8 @@ def get_task_status_brief(task_type, task_id):
|
||||
# Check if the status is already cached
|
||||
current_time = time.time()
|
||||
last_cache_time = None
|
||||
if cache_key in TASK_STATUS_CACHE:
|
||||
cached_status, cached_time = TASK_STATUS_CACHE[cache_key]
|
||||
if cache_key in TASK_STATUS_CACHE[RESULTS_PATH]:
|
||||
cached_status, cached_time = TASK_STATUS_CACHE[RESULTS_PATH][cache_key]
|
||||
last_cache_time = cached_time
|
||||
# If cached status is "Done", check if it's within the stability period
|
||||
if cached_status["status"].startswith("Done"):
|
||||
@@ -312,7 +354,7 @@ def get_task_status_brief(task_type, task_id):
|
||||
# Cache the status if it is done or error
|
||||
if status.startswith("Done") or status == "Error":
|
||||
current_time = last_cache_time if last_cache_time else current_time
|
||||
TASK_STATUS_CACHE[cache_key] = (status_dict, current_time)
|
||||
TASK_STATUS_CACHE[RESULTS_PATH][cache_key] = (status_dict, current_time)
|
||||
|
||||
return status_dict
|
||||
|
||||
@@ -434,6 +476,90 @@ def api_task_detail(task_type, task_id):
|
||||
"status": task_status
|
||||
})
|
||||
|
||||
@app.route('/api/config')
|
||||
def api_config():
|
||||
"""Get configuration information from environment variables - deprecated, use /api/current-config instead"""
|
||||
config_info = {
|
||||
"task_config_path": TASK_CONFIG_PATH,
|
||||
"results_base_path": RESULTS_BASE_PATH,
|
||||
"action_space": ACTION_SPACE,
|
||||
"observation_type": OBSERVATION_TYPE,
|
||||
"model_name": MODEL_NAME,
|
||||
"max_steps": MAX_STEPS,
|
||||
"examples_base_path": EXAMPLES_BASE_PATH
|
||||
}
|
||||
return jsonify(config_info)
|
||||
|
||||
@app.route('/api/available-configs')
|
||||
def api_available_configs():
|
||||
"""Get all available configuration combinations by scanning the results directory"""
|
||||
configs = []
|
||||
|
||||
if os.path.exists(RESULTS_BASE_PATH):
|
||||
try:
|
||||
# Scan action spaces
|
||||
for action_space in os.listdir(RESULTS_BASE_PATH):
|
||||
action_space_path = os.path.join(RESULTS_BASE_PATH, action_space)
|
||||
if os.path.isdir(action_space_path):
|
||||
# Scan observation types
|
||||
for obs_type in os.listdir(action_space_path):
|
||||
obs_path = os.path.join(action_space_path, obs_type)
|
||||
if os.path.isdir(obs_path):
|
||||
# Scan model names
|
||||
for model_name in os.listdir(obs_path):
|
||||
model_path = os.path.join(obs_path, model_name)
|
||||
if os.path.isdir(model_path):
|
||||
configs.append({
|
||||
"action_space": action_space,
|
||||
"observation_type": obs_type,
|
||||
"model_name": model_name,
|
||||
"path": model_path
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Error scanning results directory: {e}")
|
||||
|
||||
return jsonify(configs)
|
||||
|
||||
@app.route('/api/current-config')
|
||||
def api_current_config():
|
||||
"""Get current configuration"""
|
||||
return jsonify({
|
||||
"action_space": ACTION_SPACE,
|
||||
"observation_type": OBSERVATION_TYPE,
|
||||
"model_name": MODEL_NAME,
|
||||
"max_steps": MAX_STEPS,
|
||||
"results_path": RESULTS_PATH
|
||||
})
|
||||
|
||||
@app.route('/api/set-config', methods=['POST'])
|
||||
def api_set_config():
|
||||
"""Set current configuration"""
|
||||
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
|
||||
|
||||
data = request.get_json()
|
||||
if not data:
|
||||
return jsonify({"error": "No data provided"}), 400
|
||||
|
||||
# Update global variables
|
||||
ACTION_SPACE = data.get('action_space', ACTION_SPACE)
|
||||
OBSERVATION_TYPE = data.get('observation_type', OBSERVATION_TYPE)
|
||||
MODEL_NAME = data.get('model_name', MODEL_NAME)
|
||||
|
||||
# Update results path
|
||||
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||
|
||||
if RESULTS_PATH not in TASK_STATUS_CACHE:
|
||||
# Initialize cache for this results path
|
||||
TASK_STATUS_CACHE[RESULTS_PATH] = {}
|
||||
|
||||
return jsonify({
|
||||
"action_space": ACTION_SPACE,
|
||||
"observation_type": OBSERVATION_TYPE,
|
||||
"model_name": MODEL_NAME,
|
||||
"max_steps": MAX_STEPS,
|
||||
"results_path": RESULTS_PATH
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Check if necessary directories exist
|
||||
if not os.path.exists(TASK_CONFIG_PATH):
|
||||
@@ -447,4 +573,4 @@ if __name__ == '__main__':
|
||||
port = 8080
|
||||
debug = os.getenv("FLASK_DEBUG", "false").lower() == "true"
|
||||
|
||||
app.run(host=host, port=port, debug=debug)
|
||||
app.run(host=host, port=port, debug=debug, threaded=True)
|
||||
Reference in New Issue
Block a user