diff --git a/mm_agents/agent.py b/mm_agents/agent.py index 7dd45b3..28b5f11 100644 --- a/mm_agents/agent.py +++ b/mm_agents/agent.py @@ -505,7 +505,7 @@ class PromptAgent: if part['type'] == "image_url": image_source = {} image_source["type"] = "base64" - image_source["media_type"] = "image/jpeg" + image_source["media_type"] = "image/png" image_source["data"] = part['image_url']['url'].replace("data:image/png;base64,", "") claude_message['content'].append({"type": "image", "source": image_source}) @@ -514,6 +514,12 @@ class PromptAgent: claude_messages.append(claude_message) + # the claude not support system message in our endpoint, so we concatenate it at the first user message + if claude_messages[0]['role'] == "system": + claude_system_message_item = claude_messages[0]['content'][0] + claude_messages[1]['content'].insert(0, claude_system_message_item) + claude_messages.pop(0) + headers = { "x-api-key": os.environ["ANTHROPIC_API_KEY"], @@ -534,17 +540,6 @@ class PromptAgent: ) if response.status_code != 200: - # if response.json()['error']['code'] == "context_length_exceeded": - # logger.error("Context length exceeded. Retrying with a smaller context.") - # payload["messages"] = payload["messages"][-1:] - # retry_response = requests.post( - # "https://api.anthropic.com/v1/messages", - # headers=headers, - # json=payload - # ) - # if retry_response.status_code != 200: - # logger.error("Failed to call LLM: " + retry_response.text) - # return "" logger.error("Failed to call LLM: " + response.text) time.sleep(5)