eval update

This commit is contained in:
yuanmengqi
2025-06-07 13:19:22 +00:00
parent 4ade4114da
commit c57b1d4e7a
6 changed files with 38 additions and 17 deletions

View File

@@ -2,8 +2,8 @@ python run_multienv_openaicua.py \
--headless \
--observation_type screenshot \
--model computer-use-preview \
--result_dir ./results_small_retest \
--test_all_meta_path evaluation_examples/test_small.json \
--result_dir ./results_all_ifmessage_promptnochange \
--test_all_meta_path evaluation_examples/test_all.json \
--region us-east-1 \
--max_steps 150 \
--num_envs 10