sci-gui-agent-benchmark/run_operator.sh

python run_multienv_openaicua.py \
--headless \
--observation_type screenshot \
--model computer-use-preview \
--result_dir ./results_operator_full_test_0713 \
--test_all_meta_path evaluation_examples/test_all.json \
--max_steps 100 \
--num_envs 15 \
--provider_name aws