fix timeout
This commit is contained in:
5
evaluation_examples/test_bug.json
Normal file
5
evaluation_examples/test_bug.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"multi_apps": [
|
||||
"46407397-a7d5-4c6b-92c6-dbe038b1457b"
|
||||
]
|
||||
}
|
||||
25
evaluation_examples/test_small_test.json
Normal file
25
evaluation_examples/test_small_test.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"libreoffice_writer": [
|
||||
"0810415c-bde4-4443-9047-d5f70165a697",
|
||||
"0a0faba3-5580-44df-965d-f562a99b291c"
|
||||
],
|
||||
"multi_apps": [
|
||||
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
|
||||
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
|
||||
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
|
||||
"c867c42d-a52d-4a24-8ae3-f75d256b5618",
|
||||
"b5062e3e-641c-4e3a-907b-ac864d2e7652",
|
||||
"716a6079-22da-47f1-ba73-c9d58f986a38"
|
||||
],
|
||||
"os": [
|
||||
"5812b315-e7bd-4265-b51f-863c02174c28"
|
||||
],
|
||||
"thunderbird": [
|
||||
"dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
|
||||
"15c3b339-88f7-4a86-ab16-e71c58dcb01e"
|
||||
],
|
||||
"vlc": [
|
||||
"59f21cfb-0120-4326-b255-a5b827b38967",
|
||||
"8f080098-ddb1-424c-b438-4e96e5e4786e"
|
||||
]
|
||||
}
|
||||
18
evaluation_examples/test_small_test2.json
Normal file
18
evaluation_examples/test_small_test2.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"multi_apps": [
|
||||
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
|
||||
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
|
||||
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
|
||||
"c867c42d-a52d-4a24-8ae3-f75d256b5618"
|
||||
],
|
||||
"os": [
|
||||
"5812b315-e7bd-4265-b51f-863c02174c28"
|
||||
],
|
||||
"thunderbird": [
|
||||
"dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
|
||||
"15c3b339-88f7-4a86-ab16-e71c58dcb01e"
|
||||
],
|
||||
"vlc": [
|
||||
"59f21cfb-0120-4326-b255-a5b827b38967"
|
||||
]
|
||||
}
|
||||
@@ -301,8 +301,7 @@ class OpenAICUAAgent:
|
||||
Raises:
|
||||
requests.exceptions.RequestException: If the API request fails
|
||||
"""
|
||||
retry_count = 0
|
||||
while retry_count < 3:
|
||||
while True:
|
||||
try:
|
||||
from openai import OpenAI
|
||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY_CUA"))
|
||||
@@ -319,13 +318,8 @@ class OpenAICUAAgent:
|
||||
logger.info(f"Response: {response}")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"OpenAI API error: {str(e)}")
|
||||
new_screenshot = self.env._get_obs()
|
||||
new_screenshot_base64 = base64.b64encode(new_screenshot["screenshot"]).decode('utf-8')
|
||||
self.cua_messages[-1]["output"]["image_url"] = f"data:image/png;base64,{new_screenshot_base64}"
|
||||
retry_count += 1
|
||||
logger.error(f"OpenAI API error: {str(e)},will retry in 1s...")
|
||||
time.sleep(1)
|
||||
raise Exception("Failed to make OpenAI API call after 3 retries")
|
||||
|
||||
def _handle_item(self, item: Dict[str, Any]) -> Optional[Union[str, Dict[str, Any]]]:
|
||||
"""Parse a response item from the OpenAI API.
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
# Do not write any secret keys or sensitive information here.
|
||||
|
||||
# Monitor configuration
|
||||
TASK_CONFIG_PATH=../evaluation_examples/test_small_debug.json
|
||||
TASK_CONFIG_PATH=../evaluation_examples/test_small_test2.json
|
||||
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
||||
RESULTS_BASE_PATH=../results_operator_aws2/pyautogui/screenshot/computer-use-preview
|
||||
MAX_STEPS=50
|
||||
RESULTS_BASE_PATH=../results_operator_timeoutcheck3/pyautogui/screenshot/computer-use-preview
|
||||
MAX_STEPS=150
|
||||
FLASK_PORT=80
|
||||
FLASK_HOST=0.0.0.0
|
||||
FLASK_DEBUG=true
|
||||
@@ -2,8 +2,7 @@ python run_multienv_openaicua.py \
|
||||
--headless \
|
||||
--observation_type screenshot \
|
||||
--model computer-use-preview \
|
||||
--result_dir ./results_operator_aws_new \
|
||||
--test_all_meta_path evaluation_examples/test_small_debug.json \
|
||||
--result_dir ./results_operator_timeoutcheck3 \
|
||||
--test_all_meta_path evaluation_examples/test_small_test2.json \
|
||||
--region us-east-1 \
|
||||
--max_steps 150 \
|
||||
--num_envs 5
|
||||
--max_steps 150
|
||||
|
||||
Reference in New Issue
Block a user