diff --git a/desktop_env/desktop_env_os_symphony.py b/desktop_env/desktop_env_os_symphony.py index 4d65924..983c4aa 100644 --- a/desktop_env/desktop_env_os_symphony.py +++ b/desktop_env/desktop_env_os_symphony.py @@ -333,6 +333,8 @@ class DesktopEnv(gym.Env): def _set_evaluator_info(self, task_config: Dict[str, Any]): """Set evaluator information from task config""" + if "evaluator" not in task_config: + return # evaluator dict # func -> metric function string, or list of metric function strings # conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or" diff --git a/run_multienv_os_symphony.py b/run_multienv_os_symphony.py index b62c5c6..5de68c7 100644 --- a/run_multienv_os_symphony.py +++ b/run_multienv_os_symphony.py @@ -1,3 +1,16 @@ +""" +OS-Symphony Official Evaluation Script + +This script serves as the official evaluation entry point for OS-Symphony. +It handles the setup of the desktop environment, agent initialization, and +execution of evaluation tasks. + +For detailed evaluation metrics, configuration options, and usage instructions, +please refer to the official repository: +https://github.com/OS-Copilot/OS-Symphony +""" + + import argparse import copy import datetime diff --git a/run_os_symphony.sh b/run_os_symphony.sh index 772c371..ade3790 100644 --- a/run_os_symphony.sh +++ b/run_os_symphony.sh @@ -1,57 +1,58 @@ - -EXP_NAME="os-osworld-origin-nogdrive-gpt5-gta1-32b-step50-20251220-ybw" -# enable_rewrite_instruction +EXP_NAME="xxx" +export AWS_SECRET_ACCESS_KEY="xxx" +export AWS_ACCESS_KEY_ID="xxx" +export AWS_REGION="us-east-1" +export AWS_SUBNET_ID="xxx" +export AWS_SECURITY_GROUP_ID="xxx" +# >> logs/${EXP_NAME}.log 2>&1 python run_multienv_os_symphony.py \ - --provider_name "docker" \ - --path_to_vm "xxx" \ + --provider_name "aws" \ + --region "us-east-1" \ + --client_password "osworld-public-evaluation" \ --headless \ - --num_envs 1 \ + --num_envs 7 \ --max_steps 50 \ --benchmark osworld \ --domain "all" \ --test_all_meta_path evaluation_examples/test_nogdrive.json \ --result_dir "results" \ - --region "us-east-1" \ --tool_config mm_agents/os_symphony/tool/all_tool_config.yaml \ --orchestrator_provider "openai" \ --orchestrator_model "gpt-5" \ - --orchestrator_url "https://api.boyuerichdata.opensphereai.com/v1" \ + --orchestrator_url "xxx" \ --orchestrator_api_key "xxx" \ --orchestrator_temperature 0.1 \ --orchestrator_keep_first_image \ --max_trajectory_length 8 \ --grounder_provider "vllm" \ - --grounder_model "gta1_32b" \ + --grounder_model "UI-TARS-1.5-7B" \ --grounder_api_key "none" \ - --grounder_url "https://h.pjlab.org.cn/kapi/workspace.kubebrain.io/ailab-intern11/dingzichen-7jzkt-932268-worker-0.dingzichen/18080/v1/" \ + --grounder_url "xxx" \ --grounding_smart_resize \ - --grounding_width 1280 \ - --grounding_height 800 \ + --grounding_width 1920 \ + --grounding_height 1080 \ --coder_provider "openai" \ --coder_model "gpt-5" \ - --coder_url "https://api.boyuerichdata.opensphereai.com/v1" \ + --coder_url "xxx" \ --coder_api_key "xxx" \ --coder_temperature 0.1 \ --coder_budget 20 \ --memoryer_provider "openai" \ --memoryer_model "gpt-5" \ - --memoryer_url "https://api.boyuerichdata.opensphereai.com/v1" \ + --memoryer_url "xxx" \ --memoryer_api_key "xxx" \ --memoryer_temperature 0.1 \ --memoryer_max_images 8 \ --searcher_provider "openai" \ --searcher_model "gpt-5" \ - --searcher_url "https://api.boyuerichdata.opensphereai.com/v1" \ + --searcher_url "xxx" \ --searcher_api_key "xxx" \ --searcher_temperature 0.1 \ --searcher_type "vlm" \ - --searcher_engine "duckduckgo" \ - --searcher_budget 20\ + --searcher_engine "google" \ + --searcher_budget 20 \ --searcher_screen_width 1920 \ --searcher_screen_height 1080 \ - --searcher_path_to_vm "xxx" \ --sleep_after_execution 3 \ --exp_name ${EXP_NAME} \ - --enable_reflection - -# bash scripts/remove_all_osworld_container.sh > logs/${EXP_NAME}.log 2>&1 --enable_rewrite_instruction --grounding_smart_resize \ No newline at end of file + --enable_reflection >> logs/${EXP_NAME}.log 2>&1