Merge remote-tracking branch 'origin/main'
# Conflicts: # mm_agents/agent.py # run.py
This commit is contained in:
@@ -55,12 +55,12 @@ def judge_node(node: ET, platform="ubuntu") -> bool:
|
||||
or platform=="windows"\
|
||||
and node.get("{{{:}}}visible".format(state_ns), "false")=="true"\
|
||||
)\
|
||||
and ( node.get("{{{:}}}enabled".format(state_ns), "false")=="true"\
|
||||
or node.get("{{{:}}}editable".format(state_ns), "false")=="true"\
|
||||
or node.get("{{{:}}}expandable".format(state_ns), "false")=="true"\
|
||||
or node.get("{{{:}}}checkable".format(state_ns), "false")=="true"
|
||||
)\
|
||||
and (node.get("name", "") != "" or node.text is not None and len(node.text)>0)
|
||||
and ( node.get("{{{:}}}enabled".format(state_ns), "false")=="true"\
|
||||
or node.get("{{{:}}}editable".format(state_ns), "false")=="true"\
|
||||
or node.get("{{{:}}}expandable".format(state_ns), "false")=="true"\
|
||||
or node.get("{{{:}}}checkable".format(state_ns), "false")=="true"
|
||||
)\
|
||||
and (node.get("name", "") != "" or node.text is not None and len(node.text)>0)
|
||||
|
||||
coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(component_ns), "(-1, -1)"))
|
||||
sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(component_ns), "(-1, -1)"))
|
||||
|
||||
@@ -5,11 +5,13 @@ import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
import openai
|
||||
import xml.etree.ElementTree as ET
|
||||
from http import HTTPStatus
|
||||
from io import BytesIO
|
||||
from typing import Dict, List
|
||||
|
||||
from google.api_core.exceptions import InvalidArgument
|
||||
import backoff
|
||||
import dashscope
|
||||
import google.generativeai as genai
|
||||
import requests
|
||||
@@ -22,6 +24,8 @@ from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_S
|
||||
SYS_PROMPT_IN_SOM_A11Y_OUT_TAG, \
|
||||
SYS_PROMPT_SEEACT, ACTION_DESCRIPTION_PROMPT_SEEACT, ACTION_GROUNDING_PROMPT_SEEACT
|
||||
|
||||
# todo: cross-check with visualwebarena
|
||||
|
||||
logger = logging.getLogger("desktopenv.agent")
|
||||
|
||||
|
||||
@@ -506,18 +510,25 @@ class PromptAgent:
|
||||
try:
|
||||
actions = self.parse_actions(response, masks)
|
||||
self.thoughts.append(response)
|
||||
except Exception as e:
|
||||
except ValueError as e:
|
||||
print("Failed to parse action from response", e)
|
||||
actions = None
|
||||
self.thoughts.append("")
|
||||
|
||||
return actions
|
||||
|
||||
# @backoff.on_exception(
|
||||
# backoff.expo,
|
||||
# (Exception),
|
||||
# max_tries=5
|
||||
# )
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
# here you should add more model exceptions as you want,
|
||||
# but you are forbidden to add "Exception", that is, a common type of exception
|
||||
# because we want to catch this kind of Exception in the outside to ensure each example won't exceed the time limit
|
||||
(openai.RateLimitError,
|
||||
openai.BadRequestError,
|
||||
openai.InternalServerError,
|
||||
InvalidArgument),
|
||||
max_tries=5
|
||||
)
|
||||
|
||||
def call_llm(self, payload):
|
||||
|
||||
if self.model.startswith("gpt"):
|
||||
@@ -525,7 +536,7 @@ class PromptAgent:
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
|
||||
}
|
||||
logger.info("Generating content with GPT model: %s", self.model)
|
||||
# logger.info("Generating content with GPT model: %s", self.model)
|
||||
response = requests.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers=headers,
|
||||
|
||||
Reference in New Issue
Block a user