feat&fix: update configuration management to save model arguments and enhance UI display for model args (#262)

This commit is contained in:
Zilong Zhou
2025-07-16 21:46:35 +08:00
committed by GitHub
parent e433f35c1f
commit dc164d5269
10 changed files with 140 additions and 17 deletions

View File

@@ -5,10 +5,10 @@
TASK_CONFIG_PATH=../evaluation_examples/test_all.json
EXAMPLES_BASE_PATH=../evaluation_examples/examples
RESULTS_BASE_PATH=../results
ACTION_SPACE=pyautogui
OBSERVATION_TYPE=screenshot
MODEL_NAME=computer-use-preview
MAX_STEPS=150
# ACTION_SPACE=pyautogui
# OBSERVATION_TYPE=screenshot
# MODEL_NAME=computer-use-preview
# MAX_STEPS=150
FLASK_PORT=80
FLASK_HOST=0.0.0.0
FLASK_DEBUG=false

View File

@@ -43,7 +43,7 @@ MAX_STEPS = int(os.getenv("MAX_STEPS", "150"))
def initialize_default_config():
"""Initialize default configuration from the first available config in results directory"""
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH, MAX_STEPS
if os.path.exists(RESULTS_BASE_PATH):
try:
@@ -62,14 +62,20 @@ def initialize_default_config():
OBSERVATION_TYPE = obs_type
MODEL_NAME = model_name
RESULTS_PATH = model_path
print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
# Read max_steps from args.json if available
model_args = get_model_args(action_space, obs_type, model_name)
if model_args and 'max_steps' in model_args:
MAX_STEPS = model_args['max_steps']
print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME} (max_steps: {MAX_STEPS})")
return
except Exception as e:
print(f"Error scanning results directory for default config: {e}")
# Fallback to original environment-based path if no configs found
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME} (max_steps: {MAX_STEPS})")
# Initialize default configuration
initialize_default_config()
@@ -522,19 +528,28 @@ def api_available_configs():
@app.route('/api/current-config')
def api_current_config():
"""Get current configuration"""
return jsonify({
"""Get current configuration including args.json data"""
config = {
"action_space": ACTION_SPACE,
"observation_type": OBSERVATION_TYPE,
"model_name": MODEL_NAME,
"max_steps": MAX_STEPS,
"results_path": RESULTS_PATH
})
}
# Add model args from args.json
model_args = get_model_args(ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
if model_args:
config["model_args"] = model_args
else:
config["model_args"] = {}
return jsonify(config)
@app.route('/api/set-config', methods=['POST'])
def api_set_config():
"""Set current configuration"""
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH, MAX_STEPS
data = request.get_json()
if not data:
@@ -548,6 +563,11 @@ def api_set_config():
# Update results path
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
# Update max_steps from args.json if available
model_args = get_model_args(ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
if model_args and 'max_steps' in model_args:
MAX_STEPS = model_args['max_steps']
if RESULTS_PATH not in TASK_STATUS_CACHE:
# Initialize cache for this results path
TASK_STATUS_CACHE[RESULTS_PATH] = {}
@@ -560,6 +580,17 @@ def api_set_config():
"results_path": RESULTS_PATH
})
def get_model_args(action_space, observation_type, model_name):
"""Get model arguments from args.json file"""
args_file = os.path.join(RESULTS_BASE_PATH, action_space, observation_type, model_name, "args.json")
if os.path.exists(args_file):
try:
with open(args_file, 'r') as f:
return json.load(f)
except Exception as e:
print(f"Error reading args.json: {e}")
return None
if __name__ == '__main__':
# Check if necessary directories exist
if not os.path.exists(TASK_CONFIG_PATH):

View File

@@ -299,9 +299,8 @@ function renderTasks(data) {
<span class="task-stat"><i class="fas fa-tasks"></i> ${tasks.length} total</span>
<span class="task-stat running"><i class="fas fa-running"></i> ${runningCount} active</span>
<span class="task-stat completed"><i class="fas fa-check-circle"></i> ${completedCount} completed</span>
${stats.avg_score ? `<span class="task-stat score"><i class="fas fa-star"></i> ${stats.avg_score} avg score</span>` : ''}
${stats.total_score ? `<span class="task-stat score"><i class="fas fa-star"></i> ${stats.total_score} total score</span>` : ''}
${stats.avg_steps ? `<span class="task-stat steps"><i class="fas fa-chart-line"></i> ${stats.avg_steps} avg steps</span>` : ''}
${stats.completion_rate ? `<span class="task-stat rate"><i class="fas fa-percentage"></i> ${stats.completion_rate}% completed</span>` : ''}
</div>
`;
typeSection.appendChild(typeHeader);
@@ -574,6 +573,25 @@ function displayConfig(config) {
document.getElementById('observation-type').textContent = config.observation_type || 'N/A';
document.getElementById('model-name').textContent = config.model_name || 'N/A';
document.getElementById('max-steps').textContent = config.max_steps || 'N/A';
// Display model args from args.json
const modelArgsElement = document.getElementById('model-args');
if (config.model_args && Object.keys(config.model_args).length > 0) {
let argsHtml = '';
Object.entries(config.model_args).forEach(([key, value]) => {
// Skip max_steps as it's already displayed above
if (key !== 'max_steps') {
argsHtml += `<div class="config-item">
<span class="config-label">${key}:</span>
<span class="config-value">${JSON.stringify(value)}</span>
</div>`;
}
});
modelArgsElement.innerHTML = argsHtml;
modelArgsElement.style.display = 'block';
} else {
modelArgsElement.style.display = 'none';
}
}
function displayConfigError() {

View File

@@ -49,6 +49,9 @@
<span class="config-label">Max Steps:</span>
<span class="config-value" id="max-steps">Loading...</span>
</div>
<div id="model-args" style="display: none;">
<!-- Model args from args.json will be populated here -->
</div>
</div>
</div>
</div>