From 5d90faa548e88ce8883f0fa1ed3db4093952ebce Mon Sep 17 00:00:00 2001 From: yuanmengqi Date: Mon, 14 Jul 2025 07:13:17 +0000 Subject: [PATCH] run operagor --- .../chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json | 2 +- .../chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json | 2 +- evaluation_examples/settings/proxy/dataimpulse.json | 4 ++-- monitor/.env | 4 ++-- run_operator.sh | 9 +++++++++ show_result.py | 2 +- 6 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 run_operator.sh diff --git a/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json b/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json index 9b37187..a93c959 100644 --- a/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json +++ b/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json @@ -57,5 +57,5 @@ } } }, - "proxy": true + "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json b/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json index 9e5d730..6bdffe9 100644 --- a/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json +++ b/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json @@ -56,5 +56,5 @@ } } }, - "proxy": true + "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/settings/proxy/dataimpulse.json b/evaluation_examples/settings/proxy/dataimpulse.json index 4cd99ac..3c552a5 100644 --- a/evaluation_examples/settings/proxy/dataimpulse.json +++ b/evaluation_examples/settings/proxy/dataimpulse.json @@ -2,8 +2,8 @@ { "host": "gw.dataimpulse.com", "port": 823, - "username": "your_username", - "password": "your_password", + "username": "e750e5abb74376d28361", + "password": "e5ec245537e1e76a", "protocol": "http", "provider": "dataimpulse", "type": "residential", diff --git a/monitor/.env b/monitor/.env index 05618af..26de7b2 100644 --- a/monitor/.env +++ b/monitor/.env @@ -4,11 +4,11 @@ # Monitor configuration TASK_CONFIG_PATH=../evaluation_examples/test_all.json EXAMPLES_BASE_PATH=../evaluation_examples/examples -RESULTS_BASE_PATH=../results_all +RESULTS_BASE_PATH=../results_operator_full_test_0713 ACTION_SPACE=pyautogui OBSERVATION_TYPE=screenshot MODEL_NAME=computer-use-preview -MAX_STEPS=150 +MAX_STEPS=100 FLASK_PORT=80 FLASK_HOST=0.0.0.0 FLASK_DEBUG=true \ No newline at end of file diff --git a/run_operator.sh b/run_operator.sh new file mode 100644 index 0000000..154df38 --- /dev/null +++ b/run_operator.sh @@ -0,0 +1,9 @@ +python run_multienv_openaicua.py \ +--headless \ +--observation_type screenshot \ +--model computer-use-preview \ +--result_dir ./results_operator_full_test_0713 \ +--test_all_meta_path evaluation_examples/test_all.json \ +--max_steps 100 \ +--num_envs 15 \ +--provider_name aws \ No newline at end of file diff --git a/show_result.py b/show_result.py index c6bbbc5..623833d 100644 --- a/show_result.py +++ b/show_result.py @@ -68,4 +68,4 @@ def get_result(action_space, use_model, observation_type, result_dir): if __name__ == '__main__': - get_result("pyautogui", "gpt-4o", "a11y_tree", "./results") + get_result("pyautogui", "computer-use-preview", "screenshot", "./results_operator_full_test_0713")