feat&fix: update configuration management to save model arguments and enhance UI display for model args (#262)
This commit is contained in:
@@ -5,10 +5,10 @@
|
|||||||
TASK_CONFIG_PATH=../evaluation_examples/test_all.json
|
TASK_CONFIG_PATH=../evaluation_examples/test_all.json
|
||||||
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
||||||
RESULTS_BASE_PATH=../results
|
RESULTS_BASE_PATH=../results
|
||||||
ACTION_SPACE=pyautogui
|
# ACTION_SPACE=pyautogui
|
||||||
OBSERVATION_TYPE=screenshot
|
# OBSERVATION_TYPE=screenshot
|
||||||
MODEL_NAME=computer-use-preview
|
# MODEL_NAME=computer-use-preview
|
||||||
MAX_STEPS=150
|
# MAX_STEPS=150
|
||||||
FLASK_PORT=80
|
FLASK_PORT=80
|
||||||
FLASK_HOST=0.0.0.0
|
FLASK_HOST=0.0.0.0
|
||||||
FLASK_DEBUG=false
|
FLASK_DEBUG=false
|
||||||
@@ -43,7 +43,7 @@ MAX_STEPS = int(os.getenv("MAX_STEPS", "150"))
|
|||||||
|
|
||||||
def initialize_default_config():
|
def initialize_default_config():
|
||||||
"""Initialize default configuration from the first available config in results directory"""
|
"""Initialize default configuration from the first available config in results directory"""
|
||||||
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
|
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH, MAX_STEPS
|
||||||
|
|
||||||
if os.path.exists(RESULTS_BASE_PATH):
|
if os.path.exists(RESULTS_BASE_PATH):
|
||||||
try:
|
try:
|
||||||
@@ -62,14 +62,20 @@ def initialize_default_config():
|
|||||||
OBSERVATION_TYPE = obs_type
|
OBSERVATION_TYPE = obs_type
|
||||||
MODEL_NAME = model_name
|
MODEL_NAME = model_name
|
||||||
RESULTS_PATH = model_path
|
RESULTS_PATH = model_path
|
||||||
print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
|
|
||||||
|
# Read max_steps from args.json if available
|
||||||
|
model_args = get_model_args(action_space, obs_type, model_name)
|
||||||
|
if model_args and 'max_steps' in model_args:
|
||||||
|
MAX_STEPS = model_args['max_steps']
|
||||||
|
|
||||||
|
print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME} (max_steps: {MAX_STEPS})")
|
||||||
return
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error scanning results directory for default config: {e}")
|
print(f"Error scanning results directory for default config: {e}")
|
||||||
|
|
||||||
# Fallback to original environment-based path if no configs found
|
# Fallback to original environment-based path if no configs found
|
||||||
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||||
print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
|
print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME} (max_steps: {MAX_STEPS})")
|
||||||
|
|
||||||
# Initialize default configuration
|
# Initialize default configuration
|
||||||
initialize_default_config()
|
initialize_default_config()
|
||||||
@@ -522,19 +528,28 @@ def api_available_configs():
|
|||||||
|
|
||||||
@app.route('/api/current-config')
|
@app.route('/api/current-config')
|
||||||
def api_current_config():
|
def api_current_config():
|
||||||
"""Get current configuration"""
|
"""Get current configuration including args.json data"""
|
||||||
return jsonify({
|
config = {
|
||||||
"action_space": ACTION_SPACE,
|
"action_space": ACTION_SPACE,
|
||||||
"observation_type": OBSERVATION_TYPE,
|
"observation_type": OBSERVATION_TYPE,
|
||||||
"model_name": MODEL_NAME,
|
"model_name": MODEL_NAME,
|
||||||
"max_steps": MAX_STEPS,
|
"max_steps": MAX_STEPS,
|
||||||
"results_path": RESULTS_PATH
|
"results_path": RESULTS_PATH
|
||||||
})
|
}
|
||||||
|
|
||||||
|
# Add model args from args.json
|
||||||
|
model_args = get_model_args(ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||||
|
if model_args:
|
||||||
|
config["model_args"] = model_args
|
||||||
|
else:
|
||||||
|
config["model_args"] = {}
|
||||||
|
|
||||||
|
return jsonify(config)
|
||||||
|
|
||||||
@app.route('/api/set-config', methods=['POST'])
|
@app.route('/api/set-config', methods=['POST'])
|
||||||
def api_set_config():
|
def api_set_config():
|
||||||
"""Set current configuration"""
|
"""Set current configuration"""
|
||||||
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
|
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH, MAX_STEPS
|
||||||
|
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
if not data:
|
if not data:
|
||||||
@@ -548,6 +563,11 @@ def api_set_config():
|
|||||||
# Update results path
|
# Update results path
|
||||||
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||||
|
|
||||||
|
# Update max_steps from args.json if available
|
||||||
|
model_args = get_model_args(ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||||
|
if model_args and 'max_steps' in model_args:
|
||||||
|
MAX_STEPS = model_args['max_steps']
|
||||||
|
|
||||||
if RESULTS_PATH not in TASK_STATUS_CACHE:
|
if RESULTS_PATH not in TASK_STATUS_CACHE:
|
||||||
# Initialize cache for this results path
|
# Initialize cache for this results path
|
||||||
TASK_STATUS_CACHE[RESULTS_PATH] = {}
|
TASK_STATUS_CACHE[RESULTS_PATH] = {}
|
||||||
@@ -560,6 +580,17 @@ def api_set_config():
|
|||||||
"results_path": RESULTS_PATH
|
"results_path": RESULTS_PATH
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def get_model_args(action_space, observation_type, model_name):
|
||||||
|
"""Get model arguments from args.json file"""
|
||||||
|
args_file = os.path.join(RESULTS_BASE_PATH, action_space, observation_type, model_name, "args.json")
|
||||||
|
if os.path.exists(args_file):
|
||||||
|
try:
|
||||||
|
with open(args_file, 'r') as f:
|
||||||
|
return json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading args.json: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Check if necessary directories exist
|
# Check if necessary directories exist
|
||||||
if not os.path.exists(TASK_CONFIG_PATH):
|
if not os.path.exists(TASK_CONFIG_PATH):
|
||||||
|
|||||||
@@ -299,9 +299,8 @@ function renderTasks(data) {
|
|||||||
<span class="task-stat"><i class="fas fa-tasks"></i> ${tasks.length} total</span>
|
<span class="task-stat"><i class="fas fa-tasks"></i> ${tasks.length} total</span>
|
||||||
<span class="task-stat running"><i class="fas fa-running"></i> ${runningCount} active</span>
|
<span class="task-stat running"><i class="fas fa-running"></i> ${runningCount} active</span>
|
||||||
<span class="task-stat completed"><i class="fas fa-check-circle"></i> ${completedCount} completed</span>
|
<span class="task-stat completed"><i class="fas fa-check-circle"></i> ${completedCount} completed</span>
|
||||||
${stats.avg_score ? `<span class="task-stat score"><i class="fas fa-star"></i> ${stats.avg_score} avg score</span>` : ''}
|
${stats.total_score ? `<span class="task-stat score"><i class="fas fa-star"></i> ${stats.total_score} total score</span>` : ''}
|
||||||
${stats.avg_steps ? `<span class="task-stat steps"><i class="fas fa-chart-line"></i> ${stats.avg_steps} avg steps</span>` : ''}
|
${stats.avg_steps ? `<span class="task-stat steps"><i class="fas fa-chart-line"></i> ${stats.avg_steps} avg steps</span>` : ''}
|
||||||
${stats.completion_rate ? `<span class="task-stat rate"><i class="fas fa-percentage"></i> ${stats.completion_rate}% completed</span>` : ''}
|
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
typeSection.appendChild(typeHeader);
|
typeSection.appendChild(typeHeader);
|
||||||
@@ -574,6 +573,25 @@ function displayConfig(config) {
|
|||||||
document.getElementById('observation-type').textContent = config.observation_type || 'N/A';
|
document.getElementById('observation-type').textContent = config.observation_type || 'N/A';
|
||||||
document.getElementById('model-name').textContent = config.model_name || 'N/A';
|
document.getElementById('model-name').textContent = config.model_name || 'N/A';
|
||||||
document.getElementById('max-steps').textContent = config.max_steps || 'N/A';
|
document.getElementById('max-steps').textContent = config.max_steps || 'N/A';
|
||||||
|
|
||||||
|
// Display model args from args.json
|
||||||
|
const modelArgsElement = document.getElementById('model-args');
|
||||||
|
if (config.model_args && Object.keys(config.model_args).length > 0) {
|
||||||
|
let argsHtml = '';
|
||||||
|
Object.entries(config.model_args).forEach(([key, value]) => {
|
||||||
|
// Skip max_steps as it's already displayed above
|
||||||
|
if (key !== 'max_steps') {
|
||||||
|
argsHtml += `<div class="config-item">
|
||||||
|
<span class="config-label">${key}:</span>
|
||||||
|
<span class="config-value">${JSON.stringify(value)}</span>
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
modelArgsElement.innerHTML = argsHtml;
|
||||||
|
modelArgsElement.style.display = 'block';
|
||||||
|
} else {
|
||||||
|
modelArgsElement.style.display = 'none';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function displayConfigError() {
|
function displayConfigError() {
|
||||||
|
|||||||
@@ -49,6 +49,9 @@
|
|||||||
<span class="config-label">Max Steps:</span>
|
<span class="config-label">Max Steps:</span>
|
||||||
<span class="config-value" id="max-steps">Loading...</span>
|
<span class="config-value" id="max-steps">Loading...</span>
|
||||||
</div>
|
</div>
|
||||||
|
<div id="model-args" style="display: none;">
|
||||||
|
<!-- Model args from args.json will be populated here -->
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
12
run.py
12
run.py
@@ -290,6 +290,18 @@ if __name__ == "__main__":
|
|||||||
####### The complete version of the list of examples #######
|
####### The complete version of the list of examples #######
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
args = config()
|
args = config()
|
||||||
|
|
||||||
|
# save args to json in result_dir/action_space/observation_type/model/args.json
|
||||||
|
path_to_args = os.path.join(
|
||||||
|
args.result_dir,
|
||||||
|
args.action_space,
|
||||||
|
args.observation_type,
|
||||||
|
args.model,
|
||||||
|
"args.json",
|
||||||
|
)
|
||||||
|
os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
|
||||||
|
with open(path_to_args, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(vars(args), f, indent=4)
|
||||||
|
|
||||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||||
test_all_meta = json.load(f)
|
test_all_meta = json.load(f)
|
||||||
|
|||||||
@@ -342,6 +342,18 @@ if __name__ == "__main__":
|
|||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
|
|
||||||
args = config()
|
args = config()
|
||||||
|
|
||||||
|
# save args to json in result_dir/action_space/observation_type/model/args.json
|
||||||
|
path_to_args = os.path.join(
|
||||||
|
args.result_dir,
|
||||||
|
args.action_space,
|
||||||
|
args.observation_type,
|
||||||
|
args.model,
|
||||||
|
"args.json",
|
||||||
|
)
|
||||||
|
os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
|
||||||
|
with open(path_to_args, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(vars(args), f, indent=4)
|
||||||
|
|
||||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||||
test_all_meta = json.load(f)
|
test_all_meta = json.load(f)
|
||||||
|
|||||||
@@ -333,6 +333,18 @@ if __name__ == "__main__":
|
|||||||
####### The complete version of the list of examples #######
|
####### The complete version of the list of examples #######
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
args = config()
|
args = config()
|
||||||
|
|
||||||
|
# save args to json in result_dir/action_space/observation_type/model/args.json
|
||||||
|
path_to_args = os.path.join(
|
||||||
|
args.result_dir,
|
||||||
|
args.action_space,
|
||||||
|
args.observation_type,
|
||||||
|
args.model,
|
||||||
|
"args.json",
|
||||||
|
)
|
||||||
|
os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
|
||||||
|
with open(path_to_args, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(vars(args), f, indent=4)
|
||||||
|
|
||||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||||
test_all_meta = json.load(f)
|
test_all_meta = json.load(f)
|
||||||
|
|||||||
@@ -12,12 +12,12 @@ from typing import List, Dict
|
|||||||
import math
|
import math
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from multiprocessing import Process, Manager
|
from multiprocessing import Process, Manager
|
||||||
import lib_run_single
|
# import lib_run_single
|
||||||
from desktop_env.desktop_env import DesktopEnv
|
# from desktop_env.desktop_env import DesktopEnv
|
||||||
from mm_agents.anthropic import AnthropicAgent as PromptAgent
|
from mm_agents.anthropic import AnthropicAgent as PromptAgent
|
||||||
|
|
||||||
# import fake_run_single as lib_run_single
|
import fake_run_single as lib_run_single
|
||||||
# from test_env import DesktopEnv
|
from test_env import DesktopEnv
|
||||||
|
|
||||||
# .env
|
# .env
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -352,6 +352,17 @@ if __name__ == "__main__":
|
|||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
|
|
||||||
args = config()
|
args = config()
|
||||||
|
# save args to json in result_dir/action_space/observation_type/model/args.json
|
||||||
|
path_to_args = os.path.join(
|
||||||
|
args.result_dir,
|
||||||
|
args.action_space,
|
||||||
|
args.observation_type,
|
||||||
|
args.model,
|
||||||
|
"args.json",
|
||||||
|
)
|
||||||
|
os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
|
||||||
|
with open(path_to_args, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(vars(args), f, indent=4)
|
||||||
|
|
||||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||||
test_all_meta = json.load(f)
|
test_all_meta = json.load(f)
|
||||||
|
|||||||
@@ -464,6 +464,18 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
args = config()
|
args = config()
|
||||||
|
|
||||||
|
# save args to json in result_dir/action_space/observation_type/model/args.json
|
||||||
|
path_to_args = os.path.join(
|
||||||
|
args.result_dir,
|
||||||
|
args.action_space,
|
||||||
|
args.observation_type,
|
||||||
|
args.model,
|
||||||
|
"args.json",
|
||||||
|
)
|
||||||
|
os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
|
||||||
|
with open(path_to_args, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(vars(args), f, indent=4)
|
||||||
|
|
||||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||||
test_all_meta = json.load(f)
|
test_all_meta = json.load(f)
|
||||||
|
|||||||
@@ -321,6 +321,18 @@ if __name__ == "__main__":
|
|||||||
####### The complete version of the list of examples #######
|
####### The complete version of the list of examples #######
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
args = config()
|
args = config()
|
||||||
|
|
||||||
|
# save args to json in result_dir/action_space/observation_type/model/args.json
|
||||||
|
path_to_args = os.path.join(
|
||||||
|
args.result_dir,
|
||||||
|
args.action_space,
|
||||||
|
args.observation_type,
|
||||||
|
args.model,
|
||||||
|
"args.json",
|
||||||
|
)
|
||||||
|
os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
|
||||||
|
with open(path_to_args, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(vars(args), f, indent=4)
|
||||||
|
|
||||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||||
test_all_meta = json.load(f)
|
test_all_meta = json.load(f)
|
||||||
|
|||||||
Reference in New Issue
Block a user