Merge branch 'fix_chrome'

This commit is contained in:
yuanmengqi
2025-07-15 02:13:58 +00:00
parent 7c807d4f3e
commit 756ef96850
21 changed files with 922 additions and 103 deletions

View File

@@ -369,9 +369,10 @@ class AnthropicAgent:
)
except (APIError, APIStatusError, APIResponseValidationError) as e:
self.logger.exception(f"Anthropic API error: {str(e)}")
logger.exception(f"Anthropic API error: {str(e)}")
try:
self.logger.warning("Retrying with backup API key...")
logger.warning("Retrying with backup API key...")
backup_client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY_BACKUP"), max_retries=4)
if self.model_name == "claude-3-7-sonnet-20250219" or self.model_name == "claude-4-opus-20250514" or self.model_name == "claude-4-sonnet-20250514":
@@ -393,13 +394,13 @@ class AnthropicAgent:
tools=tools,
betas=betas,
)
self.logger.info("Successfully used backup API key")
logger.info("Successfully used backup API key")
except Exception as backup_e:
self.logger.exception(f"Backup API call also failed: {str(backup_e)}")
logger.exception(f"Backup API call also failed: {str(backup_e)}")
return None, None
except Exception as e:
self.logger.exception(f"Error in Anthropic API: {str(e)}")
logger.exception(f"Error in Anthropic API: {str(e)}")
return None, None
response_params = _response_to_params(response)
@@ -434,9 +435,15 @@ class AnthropicAgent:
actions = ["DONE"]
return reasonings, actions
def reset(self, *args, **kwargs):
def reset(self, _logger = None, *args, **kwargs):
"""
Reset the agent's state.
"""
global logger
if _logger:
logger = _logger
else:
logger = logging.getLogger("desktopenv.agent")
self.messages = []
self.logger.info(f"{self.class_name} reset.")
logger.info(f"{self.class_name} reset.")

View File

@@ -671,8 +671,14 @@ class OpenAICUAAgent:
action_exit = False
thought_exit = False
message_exit = False
infeasible_message = False
infeasible_word_list = ["infeasible", "unfeasible", "impossible", "not feasible", "cannot be done"]
for item in response.output:
parsed_item = self._handle_item(item)
if item.type == "message" and any(word in parsed_item.lower() for word in infeasible_word_list):
actions.append({"action_space": "pyautogui", "action": "FAIL", "pending_checks": [], "call_id": ""})
infeasible_message = True
break
if isinstance(parsed_item, dict) and parsed_item.get("action_space", None) == "pyautogui":
actions.append(parsed_item)
else:
@@ -693,7 +699,7 @@ class OpenAICUAAgent:
# state_correct = True
# if action_exit and not message_exit:
# state_correct = True
if action_exit:
if action_exit and not infeasible_message:
state_correct = True
if not state_correct:
logger.warning("The state of the agent is not correct, action_exit: %s, thought_exit: %s, message_exit: %s", action_exit, thought_exit, message_exit)
@@ -747,6 +753,7 @@ class OpenAICUAAgent:
# Convert the action to an Action object
step_action = Action(action.get("action", ""), self.action_space)
# Execute the action in the environment
print(f"Executing action: {step_action.get_action()}")
obs, reward, terminated, info = self.env.step(step_action.get_action())
screenshot_base64 = encode_image(obs["screenshot"])