fix(os_symphony_evaluation) (#410)
* fix(os_symphony) * Update desktop_env_os_symphony.py * fix(os_symphony_desktop) * fix(os_symphony_start) * Add docstring to run_multienv_os_symphony.py Added documentation header for the evaluation script.
This commit is contained in:
@@ -333,6 +333,8 @@ class DesktopEnv(gym.Env):
|
|||||||
|
|
||||||
def _set_evaluator_info(self, task_config: Dict[str, Any]):
|
def _set_evaluator_info(self, task_config: Dict[str, Any]):
|
||||||
"""Set evaluator information from task config"""
|
"""Set evaluator information from task config"""
|
||||||
|
if "evaluator" not in task_config:
|
||||||
|
return
|
||||||
# evaluator dict
|
# evaluator dict
|
||||||
# func -> metric function string, or list of metric function strings
|
# func -> metric function string, or list of metric function strings
|
||||||
# conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or"
|
# conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or"
|
||||||
|
|||||||
@@ -1,3 +1,16 @@
|
|||||||
|
"""
|
||||||
|
OS-Symphony Official Evaluation Script
|
||||||
|
|
||||||
|
This script serves as the official evaluation entry point for OS-Symphony.
|
||||||
|
It handles the setup of the desktop environment, agent initialization, and
|
||||||
|
execution of evaluation tasks.
|
||||||
|
|
||||||
|
For detailed evaluation metrics, configuration options, and usage instructions,
|
||||||
|
please refer to the official repository:
|
||||||
|
https://github.com/OS-Copilot/OS-Symphony
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
|
|||||||
@@ -1,57 +1,58 @@
|
|||||||
|
EXP_NAME="xxx"
|
||||||
EXP_NAME="os-osworld-origin-nogdrive-gpt5-gta1-32b-step50-20251220-ybw"
|
export AWS_SECRET_ACCESS_KEY="xxx"
|
||||||
# enable_rewrite_instruction
|
export AWS_ACCESS_KEY_ID="xxx"
|
||||||
|
export AWS_REGION="us-east-1"
|
||||||
|
export AWS_SUBNET_ID="xxx"
|
||||||
|
export AWS_SECURITY_GROUP_ID="xxx"
|
||||||
|
# >> logs/${EXP_NAME}.log 2>&1
|
||||||
python run_multienv_os_symphony.py \
|
python run_multienv_os_symphony.py \
|
||||||
--provider_name "docker" \
|
--provider_name "aws" \
|
||||||
--path_to_vm "xxx" \
|
--region "us-east-1" \
|
||||||
|
--client_password "osworld-public-evaluation" \
|
||||||
--headless \
|
--headless \
|
||||||
--num_envs 1 \
|
--num_envs 7 \
|
||||||
--max_steps 50 \
|
--max_steps 50 \
|
||||||
--benchmark osworld \
|
--benchmark osworld \
|
||||||
--domain "all" \
|
--domain "all" \
|
||||||
--test_all_meta_path evaluation_examples/test_nogdrive.json \
|
--test_all_meta_path evaluation_examples/test_nogdrive.json \
|
||||||
--result_dir "results" \
|
--result_dir "results" \
|
||||||
--region "us-east-1" \
|
|
||||||
--tool_config mm_agents/os_symphony/tool/all_tool_config.yaml \
|
--tool_config mm_agents/os_symphony/tool/all_tool_config.yaml \
|
||||||
--orchestrator_provider "openai" \
|
--orchestrator_provider "openai" \
|
||||||
--orchestrator_model "gpt-5" \
|
--orchestrator_model "gpt-5" \
|
||||||
--orchestrator_url "https://api.boyuerichdata.opensphereai.com/v1" \
|
--orchestrator_url "xxx" \
|
||||||
--orchestrator_api_key "xxx" \
|
--orchestrator_api_key "xxx" \
|
||||||
--orchestrator_temperature 0.1 \
|
--orchestrator_temperature 0.1 \
|
||||||
--orchestrator_keep_first_image \
|
--orchestrator_keep_first_image \
|
||||||
--max_trajectory_length 8 \
|
--max_trajectory_length 8 \
|
||||||
--grounder_provider "vllm" \
|
--grounder_provider "vllm" \
|
||||||
--grounder_model "gta1_32b" \
|
--grounder_model "UI-TARS-1.5-7B" \
|
||||||
--grounder_api_key "none" \
|
--grounder_api_key "none" \
|
||||||
--grounder_url "https://h.pjlab.org.cn/kapi/workspace.kubebrain.io/ailab-intern11/dingzichen-7jzkt-932268-worker-0.dingzichen/18080/v1/" \
|
--grounder_url "xxx" \
|
||||||
--grounding_smart_resize \
|
--grounding_smart_resize \
|
||||||
--grounding_width 1280 \
|
--grounding_width 1920 \
|
||||||
--grounding_height 800 \
|
--grounding_height 1080 \
|
||||||
--coder_provider "openai" \
|
--coder_provider "openai" \
|
||||||
--coder_model "gpt-5" \
|
--coder_model "gpt-5" \
|
||||||
--coder_url "https://api.boyuerichdata.opensphereai.com/v1" \
|
--coder_url "xxx" \
|
||||||
--coder_api_key "xxx" \
|
--coder_api_key "xxx" \
|
||||||
--coder_temperature 0.1 \
|
--coder_temperature 0.1 \
|
||||||
--coder_budget 20 \
|
--coder_budget 20 \
|
||||||
--memoryer_provider "openai" \
|
--memoryer_provider "openai" \
|
||||||
--memoryer_model "gpt-5" \
|
--memoryer_model "gpt-5" \
|
||||||
--memoryer_url "https://api.boyuerichdata.opensphereai.com/v1" \
|
--memoryer_url "xxx" \
|
||||||
--memoryer_api_key "xxx" \
|
--memoryer_api_key "xxx" \
|
||||||
--memoryer_temperature 0.1 \
|
--memoryer_temperature 0.1 \
|
||||||
--memoryer_max_images 8 \
|
--memoryer_max_images 8 \
|
||||||
--searcher_provider "openai" \
|
--searcher_provider "openai" \
|
||||||
--searcher_model "gpt-5" \
|
--searcher_model "gpt-5" \
|
||||||
--searcher_url "https://api.boyuerichdata.opensphereai.com/v1" \
|
--searcher_url "xxx" \
|
||||||
--searcher_api_key "xxx" \
|
--searcher_api_key "xxx" \
|
||||||
--searcher_temperature 0.1 \
|
--searcher_temperature 0.1 \
|
||||||
--searcher_type "vlm" \
|
--searcher_type "vlm" \
|
||||||
--searcher_engine "duckduckgo" \
|
--searcher_engine "google" \
|
||||||
--searcher_budget 20\
|
--searcher_budget 20 \
|
||||||
--searcher_screen_width 1920 \
|
--searcher_screen_width 1920 \
|
||||||
--searcher_screen_height 1080 \
|
--searcher_screen_height 1080 \
|
||||||
--searcher_path_to_vm "xxx" \
|
|
||||||
--sleep_after_execution 3 \
|
--sleep_after_execution 3 \
|
||||||
--exp_name ${EXP_NAME} \
|
--exp_name ${EXP_NAME} \
|
||||||
--enable_reflection
|
--enable_reflection >> logs/${EXP_NAME}.log 2>&1
|
||||||
|
|
||||||
# bash scripts/remove_all_osworld_container.sh > logs/${EXP_NAME}.log 2>&1 --enable_rewrite_instruction --grounding_smart_resize
|
|
||||||
|
|||||||
Reference in New Issue
Block a user