fix(os_symphony_evaluation) (#410)

* fix(os_symphony)

* Update desktop_env_os_symphony.py

* fix(os_symphony_desktop)

* fix(os_symphony_start)

* Add docstring to run_multienv_os_symphony.py

Added documentation header for the evaluation script.
This commit is contained in:
Bowen Yang
2026-01-04 15:56:51 +08:00
committed by GitHub
parent 951e1928c8
commit 439e178a2e
3 changed files with 37 additions and 21 deletions

View File

@@ -333,6 +333,8 @@ class DesktopEnv(gym.Env):
def _set_evaluator_info(self, task_config: Dict[str, Any]): def _set_evaluator_info(self, task_config: Dict[str, Any]):
"""Set evaluator information from task config""" """Set evaluator information from task config"""
if "evaluator" not in task_config:
return
# evaluator dict # evaluator dict
# func -> metric function string, or list of metric function strings # func -> metric function string, or list of metric function strings
# conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or" # conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or"

View File

@@ -1,3 +1,16 @@
"""
OS-Symphony Official Evaluation Script
This script serves as the official evaluation entry point for OS-Symphony.
It handles the setup of the desktop environment, agent initialization, and
execution of evaluation tasks.
For detailed evaluation metrics, configuration options, and usage instructions,
please refer to the official repository:
https://github.com/OS-Copilot/OS-Symphony
"""
import argparse import argparse
import copy import copy
import datetime import datetime

View File

@@ -1,57 +1,58 @@
EXP_NAME="xxx"
EXP_NAME="os-osworld-origin-nogdrive-gpt5-gta1-32b-step50-20251220-ybw" export AWS_SECRET_ACCESS_KEY="xxx"
# enable_rewrite_instruction export AWS_ACCESS_KEY_ID="xxx"
export AWS_REGION="us-east-1"
export AWS_SUBNET_ID="xxx"
export AWS_SECURITY_GROUP_ID="xxx"
# >> logs/${EXP_NAME}.log 2>&1
python run_multienv_os_symphony.py \ python run_multienv_os_symphony.py \
--provider_name "docker" \ --provider_name "aws" \
--path_to_vm "xxx" \ --region "us-east-1" \
--client_password "osworld-public-evaluation" \
--headless \ --headless \
--num_envs 1 \ --num_envs 7 \
--max_steps 50 \ --max_steps 50 \
--benchmark osworld \ --benchmark osworld \
--domain "all" \ --domain "all" \
--test_all_meta_path evaluation_examples/test_nogdrive.json \ --test_all_meta_path evaluation_examples/test_nogdrive.json \
--result_dir "results" \ --result_dir "results" \
--region "us-east-1" \
--tool_config mm_agents/os_symphony/tool/all_tool_config.yaml \ --tool_config mm_agents/os_symphony/tool/all_tool_config.yaml \
--orchestrator_provider "openai" \ --orchestrator_provider "openai" \
--orchestrator_model "gpt-5" \ --orchestrator_model "gpt-5" \
--orchestrator_url "https://api.boyuerichdata.opensphereai.com/v1" \ --orchestrator_url "xxx" \
--orchestrator_api_key "xxx" \ --orchestrator_api_key "xxx" \
--orchestrator_temperature 0.1 \ --orchestrator_temperature 0.1 \
--orchestrator_keep_first_image \ --orchestrator_keep_first_image \
--max_trajectory_length 8 \ --max_trajectory_length 8 \
--grounder_provider "vllm" \ --grounder_provider "vllm" \
--grounder_model "gta1_32b" \ --grounder_model "UI-TARS-1.5-7B" \
--grounder_api_key "none" \ --grounder_api_key "none" \
--grounder_url "https://h.pjlab.org.cn/kapi/workspace.kubebrain.io/ailab-intern11/dingzichen-7jzkt-932268-worker-0.dingzichen/18080/v1/" \ --grounder_url "xxx" \
--grounding_smart_resize \ --grounding_smart_resize \
--grounding_width 1280 \ --grounding_width 1920 \
--grounding_height 800 \ --grounding_height 1080 \
--coder_provider "openai" \ --coder_provider "openai" \
--coder_model "gpt-5" \ --coder_model "gpt-5" \
--coder_url "https://api.boyuerichdata.opensphereai.com/v1" \ --coder_url "xxx" \
--coder_api_key "xxx" \ --coder_api_key "xxx" \
--coder_temperature 0.1 \ --coder_temperature 0.1 \
--coder_budget 20 \ --coder_budget 20 \
--memoryer_provider "openai" \ --memoryer_provider "openai" \
--memoryer_model "gpt-5" \ --memoryer_model "gpt-5" \
--memoryer_url "https://api.boyuerichdata.opensphereai.com/v1" \ --memoryer_url "xxx" \
--memoryer_api_key "xxx" \ --memoryer_api_key "xxx" \
--memoryer_temperature 0.1 \ --memoryer_temperature 0.1 \
--memoryer_max_images 8 \ --memoryer_max_images 8 \
--searcher_provider "openai" \ --searcher_provider "openai" \
--searcher_model "gpt-5" \ --searcher_model "gpt-5" \
--searcher_url "https://api.boyuerichdata.opensphereai.com/v1" \ --searcher_url "xxx" \
--searcher_api_key "xxx" \ --searcher_api_key "xxx" \
--searcher_temperature 0.1 \ --searcher_temperature 0.1 \
--searcher_type "vlm" \ --searcher_type "vlm" \
--searcher_engine "duckduckgo" \ --searcher_engine "google" \
--searcher_budget 20\ --searcher_budget 20 \
--searcher_screen_width 1920 \ --searcher_screen_width 1920 \
--searcher_screen_height 1080 \ --searcher_screen_height 1080 \
--searcher_path_to_vm "xxx" \
--sleep_after_execution 3 \ --sleep_after_execution 3 \
--exp_name ${EXP_NAME} \ --exp_name ${EXP_NAME} \
--enable_reflection --enable_reflection >> logs/${EXP_NAME}.log 2>&1
# bash scripts/remove_all_osworld_container.sh > logs/${EXP_NAME}.log 2>&1 --enable_rewrite_instruction --grounding_smart_resize