fix timeout
This commit is contained in:
5
evaluation_examples/test_bug.json
Normal file
5
evaluation_examples/test_bug.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"multi_apps": [
|
||||||
|
"46407397-a7d5-4c6b-92c6-dbe038b1457b"
|
||||||
|
]
|
||||||
|
}
|
||||||
25
evaluation_examples/test_small_test.json
Normal file
25
evaluation_examples/test_small_test.json
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"libreoffice_writer": [
|
||||||
|
"0810415c-bde4-4443-9047-d5f70165a697",
|
||||||
|
"0a0faba3-5580-44df-965d-f562a99b291c"
|
||||||
|
],
|
||||||
|
"multi_apps": [
|
||||||
|
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
|
||||||
|
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
|
||||||
|
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
|
||||||
|
"c867c42d-a52d-4a24-8ae3-f75d256b5618",
|
||||||
|
"b5062e3e-641c-4e3a-907b-ac864d2e7652",
|
||||||
|
"716a6079-22da-47f1-ba73-c9d58f986a38"
|
||||||
|
],
|
||||||
|
"os": [
|
||||||
|
"5812b315-e7bd-4265-b51f-863c02174c28"
|
||||||
|
],
|
||||||
|
"thunderbird": [
|
||||||
|
"dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
|
||||||
|
"15c3b339-88f7-4a86-ab16-e71c58dcb01e"
|
||||||
|
],
|
||||||
|
"vlc": [
|
||||||
|
"59f21cfb-0120-4326-b255-a5b827b38967",
|
||||||
|
"8f080098-ddb1-424c-b438-4e96e5e4786e"
|
||||||
|
]
|
||||||
|
}
|
||||||
18
evaluation_examples/test_small_test2.json
Normal file
18
evaluation_examples/test_small_test2.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"multi_apps": [
|
||||||
|
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
|
||||||
|
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
|
||||||
|
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
|
||||||
|
"c867c42d-a52d-4a24-8ae3-f75d256b5618"
|
||||||
|
],
|
||||||
|
"os": [
|
||||||
|
"5812b315-e7bd-4265-b51f-863c02174c28"
|
||||||
|
],
|
||||||
|
"thunderbird": [
|
||||||
|
"dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397",
|
||||||
|
"15c3b339-88f7-4a86-ab16-e71c58dcb01e"
|
||||||
|
],
|
||||||
|
"vlc": [
|
||||||
|
"59f21cfb-0120-4326-b255-a5b827b38967"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -301,8 +301,7 @@ class OpenAICUAAgent:
|
|||||||
Raises:
|
Raises:
|
||||||
requests.exceptions.RequestException: If the API request fails
|
requests.exceptions.RequestException: If the API request fails
|
||||||
"""
|
"""
|
||||||
retry_count = 0
|
while True:
|
||||||
while retry_count < 3:
|
|
||||||
try:
|
try:
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY_CUA"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY_CUA"))
|
||||||
@@ -319,13 +318,8 @@ class OpenAICUAAgent:
|
|||||||
logger.info(f"Response: {response}")
|
logger.info(f"Response: {response}")
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"OpenAI API error: {str(e)}")
|
logger.error(f"OpenAI API error: {str(e)},will retry in 1s...")
|
||||||
new_screenshot = self.env._get_obs()
|
|
||||||
new_screenshot_base64 = base64.b64encode(new_screenshot["screenshot"]).decode('utf-8')
|
|
||||||
self.cua_messages[-1]["output"]["image_url"] = f"data:image/png;base64,{new_screenshot_base64}"
|
|
||||||
retry_count += 1
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
raise Exception("Failed to make OpenAI API call after 3 retries")
|
|
||||||
|
|
||||||
def _handle_item(self, item: Dict[str, Any]) -> Optional[Union[str, Dict[str, Any]]]:
|
def _handle_item(self, item: Dict[str, Any]) -> Optional[Union[str, Dict[str, Any]]]:
|
||||||
"""Parse a response item from the OpenAI API.
|
"""Parse a response item from the OpenAI API.
|
||||||
|
|||||||
@@ -2,10 +2,10 @@
|
|||||||
# Do not write any secret keys or sensitive information here.
|
# Do not write any secret keys or sensitive information here.
|
||||||
|
|
||||||
# Monitor configuration
|
# Monitor configuration
|
||||||
TASK_CONFIG_PATH=../evaluation_examples/test_small_debug.json
|
TASK_CONFIG_PATH=../evaluation_examples/test_small_test2.json
|
||||||
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
||||||
RESULTS_BASE_PATH=../results_operator_aws2/pyautogui/screenshot/computer-use-preview
|
RESULTS_BASE_PATH=../results_operator_timeoutcheck3/pyautogui/screenshot/computer-use-preview
|
||||||
MAX_STEPS=50
|
MAX_STEPS=150
|
||||||
FLASK_PORT=80
|
FLASK_PORT=80
|
||||||
FLASK_HOST=0.0.0.0
|
FLASK_HOST=0.0.0.0
|
||||||
FLASK_DEBUG=true
|
FLASK_DEBUG=true
|
||||||
@@ -2,8 +2,7 @@ python run_multienv_openaicua.py \
|
|||||||
--headless \
|
--headless \
|
||||||
--observation_type screenshot \
|
--observation_type screenshot \
|
||||||
--model computer-use-preview \
|
--model computer-use-preview \
|
||||||
--result_dir ./results_operator_aws_new \
|
--result_dir ./results_operator_timeoutcheck3 \
|
||||||
--test_all_meta_path evaluation_examples/test_small_debug.json \
|
--test_all_meta_path evaluation_examples/test_small_test2.json \
|
||||||
--region us-east-1 \
|
--region us-east-1 \
|
||||||
--max_steps 150 \
|
--max_steps 150
|
||||||
--num_envs 5
|
|
||||||
|
|||||||
Reference in New Issue
Block a user