Merge remote-tracking branch 'origin/main'

# Conflicts:
#	mm_agents/agent.py
#	run.py
This commit is contained in:
Timothyxxx
2024-03-15 21:10:32 +08:00
11 changed files with 215 additions and 85 deletions

View File

@@ -55,12 +55,12 @@ def judge_node(node: ET, platform="ubuntu") -> bool:
or platform=="windows"\
and node.get("{{{:}}}visible".format(state_ns), "false")=="true"\
)\
and ( node.get("{{{:}}}enabled".format(state_ns), "false")=="true"\
or node.get("{{{:}}}editable".format(state_ns), "false")=="true"\
or node.get("{{{:}}}expandable".format(state_ns), "false")=="true"\
or node.get("{{{:}}}checkable".format(state_ns), "false")=="true"
)\
and (node.get("name", "") != "" or node.text is not None and len(node.text)>0)
and ( node.get("{{{:}}}enabled".format(state_ns), "false")=="true"\
or node.get("{{{:}}}editable".format(state_ns), "false")=="true"\
or node.get("{{{:}}}expandable".format(state_ns), "false")=="true"\
or node.get("{{{:}}}checkable".format(state_ns), "false")=="true"
)\
and (node.get("name", "") != "" or node.text is not None and len(node.text)>0)
coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(component_ns), "(-1, -1)"))
sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(component_ns), "(-1, -1)"))

View File

@@ -5,11 +5,13 @@ import os
import re
import time
import uuid
import openai
import xml.etree.ElementTree as ET
from http import HTTPStatus
from io import BytesIO
from typing import Dict, List
from google.api_core.exceptions import InvalidArgument
import backoff
import dashscope
import google.generativeai as genai
import requests
@@ -22,6 +24,8 @@ from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_S
SYS_PROMPT_IN_SOM_A11Y_OUT_TAG, \
SYS_PROMPT_SEEACT, ACTION_DESCRIPTION_PROMPT_SEEACT, ACTION_GROUNDING_PROMPT_SEEACT
# todo: cross-check with visualwebarena
logger = logging.getLogger("desktopenv.agent")
@@ -506,18 +510,25 @@ class PromptAgent:
try:
actions = self.parse_actions(response, masks)
self.thoughts.append(response)
except Exception as e:
except ValueError as e:
print("Failed to parse action from response", e)
actions = None
self.thoughts.append("")
return actions
# @backoff.on_exception(
# backoff.expo,
# (Exception),
# max_tries=5
# )
@backoff.on_exception(
backoff.expo,
# here you should add more model exceptions as you want,
# but you are forbidden to add "Exception", that is, a common type of exception
# because we want to catch this kind of Exception in the outside to ensure each example won't exceed the time limit
(openai.RateLimitError,
openai.BadRequestError,
openai.InternalServerError,
InvalidArgument),
max_tries=5
)
def call_llm(self, payload):
if self.model.startswith("gpt"):
@@ -525,7 +536,7 @@ class PromptAgent:
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
}
logger.info("Generating content with GPT model: %s", self.model)
# logger.info("Generating content with GPT model: %s", self.model)
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,