diff --git a/evaluation_examples/test_all.json b/evaluation_examples/test_all.json index 0514d47..7153d86 100644 --- a/evaluation_examples/test_all.json +++ b/evaluation_examples/test_all.json @@ -103,7 +103,6 @@ "1e8df695-bd1b-45b3-b557-e7d599cf7597", "ecb0df7a-4e8d-4a03-b162-053391d3afaf", "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14", - "7b802dad-6e0f-4204-9815-d4e3f57627d8", "a01fbce3-2793-461f-ab86-43680ccbae25", "0326d92d-d218-48a8-9ca1-981cd6d064c7", "0a2e43bf-b26c-4631-a966-af9dfa12c9e5", @@ -380,7 +379,6 @@ "9439a27b-18ae-42d8-9778-5f68f891805e", "ae506c68-352c-4094-9caa-ee9d42052317", "ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae", - "c714dcee-cad3-4e12-8f3c-12bdcfcdb048", "930fdb3b-11a8-46fe-9bac-577332e2640e", "276cc624-87ea-4f08-ab93-f770e3790175", "9d425400-e9b2-4424-9a4b-d4c7abac4140", diff --git a/run.py b/run.py index 719222a..5bc4415 100644 --- a/run.py +++ b/run.py @@ -79,7 +79,7 @@ def config() -> argparse.Namespace: # agent config parser.add_argument("--max_trajectory_length", type=int, default=3) parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples") - parser.add_argument("--example_time_limit", type=int, default=600) + parser.add_argument("--example_time_limit", type=int, default=1200) # lm config parser.add_argument("--model", type=str, default="gpt-4-vision-preview") @@ -211,5 +211,4 @@ if __name__ == '__main__': for domain in test_file_list: left_info += f"{domain}: {len(test_file_list[domain])}\n" logger.info(f"Left tasks:\n{left_info}") - os.environ["OPENAI_API_KEY"] = "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" test(args, test_all_meta)