From dc164d526956c1ba9dc43853a5a187132b2c4633 Mon Sep 17 00:00:00 2001
From: Zilong Zhou <zzl0712@connect.hku.hk>
Date: Wed, 16 Jul 2025 21:46:35 +0800
Subject: [PATCH] feat&fix: update configuration management to save model
 arguments and enhance UI display for model args (#262)

---
 monitor/.env                 |  8 +++----
 monitor/main.py              | 45 ++++++++++++++++++++++++++++++------
 monitor/static/index.js      | 22 ++++++++++++++++--
 monitor/templates/index.html |  3 +++
 run.py                       | 12 ++++++++++
 run_multienv.py              | 12 ++++++++++
 run_multienv_aguvis.py       | 12 ++++++++++
 run_multienv_claude.py       | 19 +++++++++++----
 run_multienv_openaicua.py    | 12 ++++++++++
 run_uitars.py                | 12 ++++++++++
 10 files changed, 140 insertions(+), 17 deletions(-)

diff --git a/monitor/.env b/monitor/.env
index 1969ef7..2ba48cf 100644
--- a/monitor/.env
+++ b/monitor/.env
@@ -5,10 +5,10 @@
 TASK_CONFIG_PATH=../evaluation_examples/test_all.json
 EXAMPLES_BASE_PATH=../evaluation_examples/examples
 RESULTS_BASE_PATH=../results
-ACTION_SPACE=pyautogui
-OBSERVATION_TYPE=screenshot
-MODEL_NAME=computer-use-preview
-MAX_STEPS=150
+# ACTION_SPACE=pyautogui
+# OBSERVATION_TYPE=screenshot
+# MODEL_NAME=computer-use-preview
+# MAX_STEPS=150
 FLASK_PORT=80
 FLASK_HOST=0.0.0.0
 FLASK_DEBUG=false
\ No newline at end of file
diff --git a/monitor/main.py b/monitor/main.py
index acdf95a..5b56ea1 100644
--- a/monitor/main.py
+++ b/monitor/main.py
@@ -43,7 +43,7 @@ MAX_STEPS = int(os.getenv("MAX_STEPS", "150"))
 
 def initialize_default_config():
     """Initialize default configuration from the first available config in results directory"""
-    global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
+    global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH, MAX_STEPS
     
     if os.path.exists(RESULTS_BASE_PATH):
         try:
@@ -62,14 +62,20 @@ def initialize_default_config():
                                     OBSERVATION_TYPE = obs_type
                                     MODEL_NAME = model_name
                                     RESULTS_PATH = model_path
-                                    print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
+                                    
+                                    # Read max_steps from args.json if available
+                                    model_args = get_model_args(action_space, obs_type, model_name)
+                                    if model_args and 'max_steps' in model_args:
+                                        MAX_STEPS = model_args['max_steps']
+                                    
+                                    print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME} (max_steps: {MAX_STEPS})")
                                     return
         except Exception as e:
             print(f"Error scanning results directory for default config: {e}")
     
     # Fallback to original environment-based path if no configs found
     RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
-    print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
+    print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME} (max_steps: {MAX_STEPS})")
 
 # Initialize default configuration
 initialize_default_config()
@@ -522,19 +528,28 @@ def api_available_configs():
 
 @app.route('/api/current-config')
 def api_current_config():
-    """Get current configuration"""
-    return jsonify({
+    """Get current configuration including args.json data"""
+    config = {
         "action_space": ACTION_SPACE,
         "observation_type": OBSERVATION_TYPE,
         "model_name": MODEL_NAME,
         "max_steps": MAX_STEPS,
         "results_path": RESULTS_PATH
-    })
+    }
+    
+    # Add model args from args.json
+    model_args = get_model_args(ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
+    if model_args:
+        config["model_args"] = model_args
+    else:
+        config["model_args"] = {}
+    
+    return jsonify(config)
 
 @app.route('/api/set-config', methods=['POST'])
 def api_set_config():
     """Set current configuration"""
-    global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
+    global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH, MAX_STEPS
     
     data = request.get_json()
     if not data:
@@ -548,6 +563,11 @@ def api_set_config():
     # Update results path
     RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
     
+    # Update max_steps from args.json if available
+    model_args = get_model_args(ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
+    if model_args and 'max_steps' in model_args:
+        MAX_STEPS = model_args['max_steps']
+    
     if RESULTS_PATH not in TASK_STATUS_CACHE:
         # Initialize cache for this results path
         TASK_STATUS_CACHE[RESULTS_PATH] = {}
@@ -560,6 +580,17 @@ def api_set_config():
         "results_path": RESULTS_PATH
     })
 
+def get_model_args(action_space, observation_type, model_name):
+    """Get model arguments from args.json file"""
+    args_file = os.path.join(RESULTS_BASE_PATH, action_space, observation_type, model_name, "args.json")
+    if os.path.exists(args_file):
+        try:
+            with open(args_file, 'r') as f:
+                return json.load(f)
+        except Exception as e:
+            print(f"Error reading args.json: {e}")
+    return None
+
 if __name__ == '__main__':
     # Check if necessary directories exist
     if not os.path.exists(TASK_CONFIG_PATH):
diff --git a/monitor/static/index.js b/monitor/static/index.js
index ed2910e..2c61b3b 100644
--- a/monitor/static/index.js
+++ b/monitor/static/index.js
@@ -299,9 +299,8 @@ function renderTasks(data) {
                 <span class="task-stat"><i class="fas fa-tasks"></i> ${tasks.length} total</span>
                 <span class="task-stat running"><i class="fas fa-running"></i> ${runningCount} active</span>
                 <span class="task-stat completed"><i class="fas fa-check-circle"></i> ${completedCount} completed</span>
-                ${stats.avg_score ? `<span class="task-stat score"><i class="fas fa-star"></i> ${stats.avg_score} avg score</span>` : ''}
+                ${stats.total_score ? `<span class="task-stat score"><i class="fas fa-star"></i> ${stats.total_score} total score</span>` : ''}
                 ${stats.avg_steps ? `<span class="task-stat steps"><i class="fas fa-chart-line"></i> ${stats.avg_steps} avg steps</span>` : ''}
-                ${stats.completion_rate ? `<span class="task-stat rate"><i class="fas fa-percentage"></i> ${stats.completion_rate}% completed</span>` : ''}
             </div>
         `;
         typeSection.appendChild(typeHeader);
@@ -574,6 +573,25 @@ function displayConfig(config) {
     document.getElementById('observation-type').textContent = config.observation_type || 'N/A';
     document.getElementById('model-name').textContent = config.model_name || 'N/A';
     document.getElementById('max-steps').textContent = config.max_steps || 'N/A';
+    
+    // Display model args from args.json
+    const modelArgsElement = document.getElementById('model-args');
+    if (config.model_args && Object.keys(config.model_args).length > 0) {
+        let argsHtml = '';
+        Object.entries(config.model_args).forEach(([key, value]) => {
+            // Skip max_steps as it's already displayed above
+            if (key !== 'max_steps') {
+                argsHtml += `<div class="config-item">
+                    <span class="config-label">${key}:</span>
+                    <span class="config-value">${JSON.stringify(value)}</span>
+                </div>`;
+            }
+        });
+        modelArgsElement.innerHTML = argsHtml;
+        modelArgsElement.style.display = 'block';
+    } else {
+        modelArgsElement.style.display = 'none';
+    }
 }
 
 function displayConfigError() {
diff --git a/monitor/templates/index.html b/monitor/templates/index.html
index ef91ab9..0b95c36 100644
--- a/monitor/templates/index.html
+++ b/monitor/templates/index.html
@@ -49,6 +49,9 @@
                             <span class="config-label">Max Steps:</span>
                             <span class="config-value" id="max-steps">Loading...</span>
                         </div>
+                        <div id="model-args" style="display: none;">
+                            <!-- Model args from args.json will be populated here -->
+                        </div>
                     </div>
                 </div>
             </div>
diff --git a/run.py b/run.py
index d0d91cd..a915ac2 100644
--- a/run.py
+++ b/run.py
@@ -290,6 +290,18 @@ if __name__ == "__main__":
     ####### The complete version of the list of examples #######
     os.environ["TOKENIZERS_PARALLELISM"] = "false"
     args = config()
+    
+    # save args to json in result_dir/action_space/observation_type/model/args.json
+    path_to_args = os.path.join(
+        args.result_dir,
+        args.action_space,
+        args.observation_type,
+        args.model,
+        "args.json",
+    )
+    os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+    with open(path_to_args, "w", encoding="utf-8") as f:
+        json.dump(vars(args), f, indent=4)
 
     with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
         test_all_meta = json.load(f)
diff --git a/run_multienv.py b/run_multienv.py
index 5be81ec..3b1d005 100644
--- a/run_multienv.py
+++ b/run_multienv.py
@@ -342,6 +342,18 @@ if __name__ == "__main__":
     os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
     args = config()
+    
+    # save args to json in result_dir/action_space/observation_type/model/args.json
+    path_to_args = os.path.join(
+        args.result_dir,
+        args.action_space,
+        args.observation_type,
+        args.model,
+        "args.json",
+    )
+    os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+    with open(path_to_args, "w", encoding="utf-8") as f:
+        json.dump(vars(args), f, indent=4)
 
     with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
         test_all_meta = json.load(f)
diff --git a/run_multienv_aguvis.py b/run_multienv_aguvis.py
index ba8328a..38892ab 100644
--- a/run_multienv_aguvis.py
+++ b/run_multienv_aguvis.py
@@ -333,6 +333,18 @@ if __name__ == "__main__":
     ####### The complete version of the list of examples #######
     os.environ["TOKENIZERS_PARALLELISM"] = "false"
     args = config()
+    
+    # save args to json in result_dir/action_space/observation_type/model/args.json
+    path_to_args = os.path.join(
+        args.result_dir,
+        args.action_space,
+        args.observation_type,
+        args.model,
+        "args.json",
+    )
+    os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+    with open(path_to_args, "w", encoding="utf-8") as f:
+        json.dump(vars(args), f, indent=4)
 
     with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
         test_all_meta = json.load(f)
diff --git a/run_multienv_claude.py b/run_multienv_claude.py
index 2770334..170ac2e 100644
--- a/run_multienv_claude.py
+++ b/run_multienv_claude.py
@@ -12,12 +12,12 @@ from typing import List, Dict
 import math
 from tqdm import tqdm
 from multiprocessing import Process, Manager
-import lib_run_single
-from desktop_env.desktop_env import DesktopEnv
+# import lib_run_single
+# from desktop_env.desktop_env import DesktopEnv
 from mm_agents.anthropic import AnthropicAgent as PromptAgent
 
-# import fake_run_single as lib_run_single
-# from test_env import DesktopEnv
+import fake_run_single as lib_run_single
+from test_env import DesktopEnv
 
 # .env
 from dotenv import load_dotenv
@@ -352,6 +352,17 @@ if __name__ == "__main__":
     os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
     args = config()
+    # save args to json in result_dir/action_space/observation_type/model/args.json
+    path_to_args = os.path.join(
+        args.result_dir,
+        args.action_space,
+        args.observation_type,
+        args.model,
+        "args.json",
+    )
+    os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+    with open(path_to_args, "w", encoding="utf-8") as f:
+        json.dump(vars(args), f, indent=4)
 
     with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
         test_all_meta = json.load(f)
diff --git a/run_multienv_openaicua.py b/run_multienv_openaicua.py
index c4eb18c..a532386 100644
--- a/run_multienv_openaicua.py
+++ b/run_multienv_openaicua.py
@@ -464,6 +464,18 @@ if __name__ == "__main__":
     
     try:
         args = config()
+        
+        # save args to json in result_dir/action_space/observation_type/model/args.json
+        path_to_args = os.path.join(
+            args.result_dir,
+            args.action_space,
+            args.observation_type,
+            args.model,
+            "args.json",
+        )
+        os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+        with open(path_to_args, "w", encoding="utf-8") as f:
+            json.dump(vars(args), f, indent=4)
 
         with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
             test_all_meta = json.load(f)
diff --git a/run_uitars.py b/run_uitars.py
index aa11246..3b6ea84 100644
--- a/run_uitars.py
+++ b/run_uitars.py
@@ -321,6 +321,18 @@ if __name__ == "__main__":
     ####### The complete version of the list of examples #######
     os.environ["TOKENIZERS_PARALLELISM"] = "false"
     args = config()
+    
+    # save args to json in result_dir/action_space/observation_type/model/args.json
+    path_to_args = os.path.join(
+        args.result_dir,
+        args.action_space,
+        args.observation_type,
+        args.model,
+        "args.json",
+    )
+    os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+    with open(path_to_args, "w", encoding="utf-8") as f:
+        json.dump(vars(args), f, indent=4)
 
     with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
         test_all_meta = json.load(f)