+

OSWorld Monitor System Online

+ + +
+
+ + Score: + Loading... +
+
+ +
Loading... @@ -46,10 +89,11 @@
Total Tasks
-
-
-
-
Loading task data...
+
+
+
+
Loading task data...
+
diff --git a/run_operator.sh b/run_operator.sh new file mode 100644 index 0000000..154df38 --- /dev/null +++ b/run_operator.sh @@ -0,0 +1,9 @@ +python run_multienv_openaicua.py \ +--headless \ +--observation_type screenshot \ +--model computer-use-preview \ +--result_dir ./results_operator_full_test_0713 \ +--test_all_meta_path evaluation_examples/test_all.json \ +--max_steps 100 \ +--num_envs 15 \ +--provider_name aws \ No newline at end of file diff --git a/show_result.py b/show_result.py index c6bbbc5..623833d 100644 --- a/show_result.py +++ b/show_result.py @@ -68,4 +68,4 @@ def get_result(action_space, use_model, observation_type, result_dir): if __name__ == '__main__': - get_result("pyautogui", "gpt-4o", "a11y_tree", "./results") + get_result("pyautogui", "computer-use-preview", "screenshot", "./results_operator_full_test_0713")