From dc164d526956c1ba9dc43853a5a187132b2c4633 Mon Sep 17 00:00:00 2001 From: Zilong Zhou Date: Wed, 16 Jul 2025 21:46:35 +0800 Subject: [PATCH] feat&fix: update configuration management to save model arguments and enhance UI display for model args (#262) --- monitor/.env | 8 +++---- monitor/main.py | 45 ++++++++++++++++++++++++++++++------ monitor/static/index.js | 22 ++++++++++++++++-- monitor/templates/index.html | 3 +++ run.py | 12 ++++++++++ run_multienv.py | 12 ++++++++++ run_multienv_aguvis.py | 12 ++++++++++ run_multienv_claude.py | 19 +++++++++++---- run_multienv_openaicua.py | 12 ++++++++++ run_uitars.py | 12 ++++++++++ 10 files changed, 140 insertions(+), 17 deletions(-) diff --git a/monitor/.env b/monitor/.env index 1969ef7..2ba48cf 100644 --- a/monitor/.env +++ b/monitor/.env @@ -5,10 +5,10 @@ TASK_CONFIG_PATH=../evaluation_examples/test_all.json EXAMPLES_BASE_PATH=../evaluation_examples/examples RESULTS_BASE_PATH=../results -ACTION_SPACE=pyautogui -OBSERVATION_TYPE=screenshot -MODEL_NAME=computer-use-preview -MAX_STEPS=150 +# ACTION_SPACE=pyautogui +# OBSERVATION_TYPE=screenshot +# MODEL_NAME=computer-use-preview +# MAX_STEPS=150 FLASK_PORT=80 FLASK_HOST=0.0.0.0 FLASK_DEBUG=false \ No newline at end of file diff --git a/monitor/main.py b/monitor/main.py index acdf95a..5b56ea1 100644 --- a/monitor/main.py +++ b/monitor/main.py @@ -43,7 +43,7 @@ MAX_STEPS = int(os.getenv("MAX_STEPS", "150")) def initialize_default_config(): """Initialize default configuration from the first available config in results directory""" - global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH + global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH, MAX_STEPS if os.path.exists(RESULTS_BASE_PATH): try: @@ -62,14 +62,20 @@ def initialize_default_config(): OBSERVATION_TYPE = obs_type MODEL_NAME = model_name RESULTS_PATH = model_path - print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}") + + # Read max_steps from args.json if available + model_args = get_model_args(action_space, obs_type, model_name) + if model_args and 'max_steps' in model_args: + MAX_STEPS = model_args['max_steps'] + + print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME} (max_steps: {MAX_STEPS})") return except Exception as e: print(f"Error scanning results directory for default config: {e}") # Fallback to original environment-based path if no configs found RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME) - print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}") + print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME} (max_steps: {MAX_STEPS})") # Initialize default configuration initialize_default_config() @@ -522,19 +528,28 @@ def api_available_configs(): @app.route('/api/current-config') def api_current_config(): - """Get current configuration""" - return jsonify({ + """Get current configuration including args.json data""" + config = { "action_space": ACTION_SPACE, "observation_type": OBSERVATION_TYPE, "model_name": MODEL_NAME, "max_steps": MAX_STEPS, "results_path": RESULTS_PATH - }) + } + + # Add model args from args.json + model_args = get_model_args(ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME) + if model_args: + config["model_args"] = model_args + else: + config["model_args"] = {} + + return jsonify(config) @app.route('/api/set-config', methods=['POST']) def api_set_config(): """Set current configuration""" - global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH + global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH, MAX_STEPS data = request.get_json() if not data: @@ -548,6 +563,11 @@ def api_set_config(): # Update results path RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME) + # Update max_steps from args.json if available + model_args = get_model_args(ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME) + if model_args and 'max_steps' in model_args: + MAX_STEPS = model_args['max_steps'] + if RESULTS_PATH not in TASK_STATUS_CACHE: # Initialize cache for this results path TASK_STATUS_CACHE[RESULTS_PATH] = {} @@ -560,6 +580,17 @@ def api_set_config(): "results_path": RESULTS_PATH }) +def get_model_args(action_space, observation_type, model_name): + """Get model arguments from args.json file""" + args_file = os.path.join(RESULTS_BASE_PATH, action_space, observation_type, model_name, "args.json") + if os.path.exists(args_file): + try: + with open(args_file, 'r') as f: + return json.load(f) + except Exception as e: + print(f"Error reading args.json: {e}") + return None + if __name__ == '__main__': # Check if necessary directories exist if not os.path.exists(TASK_CONFIG_PATH): diff --git a/monitor/static/index.js b/monitor/static/index.js index ed2910e..2c61b3b 100644 --- a/monitor/static/index.js +++ b/monitor/static/index.js @@ -299,9 +299,8 @@ function renderTasks(data) { ${tasks.length} total ${runningCount} active ${completedCount} completed - ${stats.avg_score ? ` ${stats.avg_score} avg score` : ''} + ${stats.total_score ? ` ${stats.total_score} total score` : ''} ${stats.avg_steps ? ` ${stats.avg_steps} avg steps` : ''} - ${stats.completion_rate ? ` ${stats.completion_rate}% completed` : ''} `; typeSection.appendChild(typeHeader); @@ -574,6 +573,25 @@ function displayConfig(config) { document.getElementById('observation-type').textContent = config.observation_type || 'N/A'; document.getElementById('model-name').textContent = config.model_name || 'N/A'; document.getElementById('max-steps').textContent = config.max_steps || 'N/A'; + + // Display model args from args.json + const modelArgsElement = document.getElementById('model-args'); + if (config.model_args && Object.keys(config.model_args).length > 0) { + let argsHtml = ''; + Object.entries(config.model_args).forEach(([key, value]) => { + // Skip max_steps as it's already displayed above + if (key !== 'max_steps') { + argsHtml += `
+ ${key}: + ${JSON.stringify(value)} +
`; + } + }); + modelArgsElement.innerHTML = argsHtml; + modelArgsElement.style.display = 'block'; + } else { + modelArgsElement.style.display = 'none'; + } } function displayConfigError() { diff --git a/monitor/templates/index.html b/monitor/templates/index.html index ef91ab9..0b95c36 100644 --- a/monitor/templates/index.html +++ b/monitor/templates/index.html @@ -49,6 +49,9 @@ Max Steps: Loading... + diff --git a/run.py b/run.py index d0d91cd..a915ac2 100644 --- a/run.py +++ b/run.py @@ -290,6 +290,18 @@ if __name__ == "__main__": ####### The complete version of the list of examples ####### os.environ["TOKENIZERS_PARALLELISM"] = "false" args = config() + + # save args to json in result_dir/action_space/observation_type/model/args.json + path_to_args = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + "args.json", + ) + os.makedirs(os.path.dirname(path_to_args), exist_ok=True) + with open(path_to_args, "w", encoding="utf-8") as f: + json.dump(vars(args), f, indent=4) with open(args.test_all_meta_path, "r", encoding="utf-8") as f: test_all_meta = json.load(f) diff --git a/run_multienv.py b/run_multienv.py index 5be81ec..3b1d005 100644 --- a/run_multienv.py +++ b/run_multienv.py @@ -342,6 +342,18 @@ if __name__ == "__main__": os.environ["TOKENIZERS_PARALLELISM"] = "false" args = config() + + # save args to json in result_dir/action_space/observation_type/model/args.json + path_to_args = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + "args.json", + ) + os.makedirs(os.path.dirname(path_to_args), exist_ok=True) + with open(path_to_args, "w", encoding="utf-8") as f: + json.dump(vars(args), f, indent=4) with open(args.test_all_meta_path, "r", encoding="utf-8") as f: test_all_meta = json.load(f) diff --git a/run_multienv_aguvis.py b/run_multienv_aguvis.py index ba8328a..38892ab 100644 --- a/run_multienv_aguvis.py +++ b/run_multienv_aguvis.py @@ -333,6 +333,18 @@ if __name__ == "__main__": ####### The complete version of the list of examples ####### os.environ["TOKENIZERS_PARALLELISM"] = "false" args = config() + + # save args to json in result_dir/action_space/observation_type/model/args.json + path_to_args = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + "args.json", + ) + os.makedirs(os.path.dirname(path_to_args), exist_ok=True) + with open(path_to_args, "w", encoding="utf-8") as f: + json.dump(vars(args), f, indent=4) with open(args.test_all_meta_path, "r", encoding="utf-8") as f: test_all_meta = json.load(f) diff --git a/run_multienv_claude.py b/run_multienv_claude.py index 2770334..170ac2e 100644 --- a/run_multienv_claude.py +++ b/run_multienv_claude.py @@ -12,12 +12,12 @@ from typing import List, Dict import math from tqdm import tqdm from multiprocessing import Process, Manager -import lib_run_single -from desktop_env.desktop_env import DesktopEnv +# import lib_run_single +# from desktop_env.desktop_env import DesktopEnv from mm_agents.anthropic import AnthropicAgent as PromptAgent -# import fake_run_single as lib_run_single -# from test_env import DesktopEnv +import fake_run_single as lib_run_single +from test_env import DesktopEnv # .env from dotenv import load_dotenv @@ -352,6 +352,17 @@ if __name__ == "__main__": os.environ["TOKENIZERS_PARALLELISM"] = "false" args = config() + # save args to json in result_dir/action_space/observation_type/model/args.json + path_to_args = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + "args.json", + ) + os.makedirs(os.path.dirname(path_to_args), exist_ok=True) + with open(path_to_args, "w", encoding="utf-8") as f: + json.dump(vars(args), f, indent=4) with open(args.test_all_meta_path, "r", encoding="utf-8") as f: test_all_meta = json.load(f) diff --git a/run_multienv_openaicua.py b/run_multienv_openaicua.py index c4eb18c..a532386 100644 --- a/run_multienv_openaicua.py +++ b/run_multienv_openaicua.py @@ -464,6 +464,18 @@ if __name__ == "__main__": try: args = config() + + # save args to json in result_dir/action_space/observation_type/model/args.json + path_to_args = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + "args.json", + ) + os.makedirs(os.path.dirname(path_to_args), exist_ok=True) + with open(path_to_args, "w", encoding="utf-8") as f: + json.dump(vars(args), f, indent=4) with open(args.test_all_meta_path, "r", encoding="utf-8") as f: test_all_meta = json.load(f) diff --git a/run_uitars.py b/run_uitars.py index aa11246..3b6ea84 100644 --- a/run_uitars.py +++ b/run_uitars.py @@ -321,6 +321,18 @@ if __name__ == "__main__": ####### The complete version of the list of examples ####### os.environ["TOKENIZERS_PARALLELISM"] = "false" args = config() + + # save args to json in result_dir/action_space/observation_type/model/args.json + path_to_args = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + "args.json", + ) + os.makedirs(os.path.dirname(path_to_args), exist_ok=True) + with open(path_to_args, "w", encoding="utf-8") as f: + json.dump(vars(args), f, indent=4) with open(args.test_all_meta_path, "r", encoding="utf-8") as f: test_all_meta = json.load(f)