add_os_symphony (#399)
This commit is contained in:
0
mm_agents/os_symphony/core/__init__.py
Executable file
0
mm_agents/os_symphony/core/__init__.py
Executable file
480
mm_agents/os_symphony/core/engine.py
Executable file
480
mm_agents/os_symphony/core/engine.py
Executable file
@@ -0,0 +1,480 @@
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import base64
|
||||
import backoff
|
||||
from anthropic import Anthropic
|
||||
from openai import (
|
||||
AzureOpenAI,
|
||||
APIConnectionError,
|
||||
APIError,
|
||||
AzureOpenAI,
|
||||
OpenAI,
|
||||
RateLimitError,
|
||||
)
|
||||
logger = logging.getLogger("desktopenv.agents.engine")
|
||||
|
||||
logger = logging.getLogger("desktopenv.agents.engine")
|
||||
|
||||
|
||||
class LMMEngine:
|
||||
pass
|
||||
|
||||
|
||||
class LMMEngineOpenAI(LMMEngine):
|
||||
def __init__(
|
||||
self,
|
||||
base_url=None,
|
||||
api_key=None,
|
||||
model=None,
|
||||
rate_limit=-1,
|
||||
temperature=None,
|
||||
organization=None,
|
||||
**kwargs,
|
||||
):
|
||||
assert model is not None, "model must be provided"
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.organization = organization
|
||||
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
|
||||
self.llm_client = None
|
||||
self.temperature = temperature # Can force temperature to be the same (in the case of o3 requiring temperature to be 1)
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
|
||||
api_key = self.api_key or os.getenv("OPENAI_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"An API Key needs to be provided in either the api_key parameter or as an environment variable named OPENAI_API_KEY"
|
||||
)
|
||||
organization = self.organization or os.getenv("OPENAI_ORG_ID")
|
||||
|
||||
# H 集群认证 最后再删!!!!!!
|
||||
if self.model.lower().startswith("ui") or self.model.lower().startswith("qwen") or self.model.lower().startswith("scale") or self.model.lower().startswith("holo"):
|
||||
custom_headers = {
|
||||
"Authorization": "Basic NWFkMzQxMDBlZTA1NWE0YmFlNjYzNzBhNWU2ODNiYWM6NjA3ZGU4MjQ5NjU3YTNiM2JkMDM2ZGM5NmQ0YzBiMmY="
|
||||
}
|
||||
else:
|
||||
custom_headers = {}
|
||||
if not self.llm_client:
|
||||
if not self.base_url:
|
||||
self.llm_client = OpenAI(
|
||||
api_key=api_key,
|
||||
organization=organization,
|
||||
default_headers=custom_headers
|
||||
)
|
||||
else:
|
||||
self.llm_client = OpenAI(
|
||||
base_url=self.base_url,
|
||||
api_key=api_key,
|
||||
organization=organization,
|
||||
default_headers=custom_headers
|
||||
)
|
||||
|
||||
# print(**kwargs)
|
||||
payload_size = len(json.dumps(messages)) / 1024 / 1024
|
||||
logger.info(f"Payload size: {len(json.dumps(messages)) / 1024 / 1024:.2f} MB")
|
||||
if payload_size > 30:
|
||||
logger.info("Payload size exceeds 30MB!!!")
|
||||
|
||||
result = self.llm_client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
# max_completion_tokens=max_new_tokens if max_new_tokens else 4096,
|
||||
temperature=(
|
||||
temperature if self.temperature is None else self.temperature
|
||||
),
|
||||
**kwargs,
|
||||
)
|
||||
usage = result.usage
|
||||
response = result.choices[0].message.content
|
||||
return (response, usage)
|
||||
|
||||
|
||||
class LMMEngineAnthropic(LMMEngine):
|
||||
def __init__(
|
||||
self,
|
||||
base_url=None,
|
||||
api_key=None,
|
||||
model=None,
|
||||
thinking=False,
|
||||
temperature=None,
|
||||
**kwargs,
|
||||
):
|
||||
assert model is not None, "model must be provided"
|
||||
self.model = model
|
||||
self.thinking = thinking
|
||||
self.api_key = api_key
|
||||
self.llm_client = None
|
||||
self.temperature = temperature
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
|
||||
api_key = self.api_key or os.getenv("ANTHROPIC_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"An API Key needs to be provided in either the api_key parameter or as an environment variable named ANTHROPIC_API_KEY"
|
||||
)
|
||||
self.llm_client = Anthropic(api_key=api_key)
|
||||
# Use the instance temperature if not specified in the call
|
||||
temp = self.temperature if temperature is None else temperature
|
||||
if self.thinking:
|
||||
full_response = self.llm_client.messages.create(
|
||||
system=messages[0]["content"][0]["text"],
|
||||
model=self.model,
|
||||
messages=messages[1:],
|
||||
max_tokens=8192,
|
||||
thinking={"type": "enabled", "budget_tokens": 4096},
|
||||
**kwargs,
|
||||
)
|
||||
thoughts = full_response.content[0].thinking
|
||||
return full_response.content[1].text
|
||||
return (
|
||||
self.llm_client.messages.create(
|
||||
system=messages[0]["content"][0]["text"],
|
||||
model=self.model,
|
||||
messages=messages[1:],
|
||||
max_tokens=max_new_tokens if max_new_tokens else 4096,
|
||||
temperature=temp,
|
||||
**kwargs,
|
||||
)
|
||||
.content[0]
|
||||
.text
|
||||
)
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
# Compatible with Claude-3.7 Sonnet thinking mode
|
||||
def generate_with_thinking(
|
||||
self, messages, temperature=0.0, max_new_tokens=None, **kwargs
|
||||
):
|
||||
"""Generate the next message based on previous messages, and keeps the thinking tokens"""
|
||||
api_key = self.api_key or os.getenv("ANTHROPIC_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"An API Key needs to be provided in either the api_key parameter or as an environment variable named ANTHROPIC_API_KEY"
|
||||
)
|
||||
self.llm_client = Anthropic(api_key=api_key)
|
||||
full_response = self.llm_client.messages.create(
|
||||
system=messages[0]["content"][0]["text"],
|
||||
model=self.model,
|
||||
messages=messages[1:],
|
||||
max_tokens=8192,
|
||||
thinking={"type": "enabled", "budget_tokens": 4096},
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
thoughts = full_response.content[0].thinking
|
||||
answer = full_response.content[1].text
|
||||
full_response = (
|
||||
f"<thoughts>\n{thoughts}\n</thoughts>\n\n<answer>\n{answer}\n</answer>\n"
|
||||
)
|
||||
return full_response
|
||||
|
||||
|
||||
class LMMEngineGemini(LMMEngine):
|
||||
def __init__(
|
||||
self,
|
||||
base_url=None,
|
||||
api_key=None,
|
||||
model=None,
|
||||
rate_limit=-1,
|
||||
temperature=None,
|
||||
**kwargs,
|
||||
):
|
||||
assert model is not None, "model must be provided"
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
|
||||
self.llm_client = None
|
||||
self.temperature = temperature
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
|
||||
api_key = self.api_key or os.getenv("GEMINI_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"An API Key needs to be provided in either the api_key parameter or as an environment variable named GEMINI_API_KEY"
|
||||
)
|
||||
base_url = self.base_url or os.getenv("GEMINI_ENDPOINT_URL")
|
||||
if base_url is None:
|
||||
raise ValueError(
|
||||
"An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named GEMINI_ENDPOINT_URL"
|
||||
)
|
||||
if not self.llm_client:
|
||||
self.llm_client = OpenAI(base_url=base_url, api_key=api_key)
|
||||
# Use the temperature passed to generate, otherwise use the instance's temperature, otherwise default to 0.0
|
||||
temp = self.temperature if temperature is None else temperature
|
||||
return (
|
||||
self.llm_client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
max_tokens=max_new_tokens if max_new_tokens else 4096,
|
||||
temperature=temp,
|
||||
**kwargs,
|
||||
)
|
||||
.choices[0]
|
||||
.message.content
|
||||
)
|
||||
|
||||
|
||||
class LMMEngineOpenRouter(LMMEngine):
|
||||
def __init__(
|
||||
self,
|
||||
base_url=None,
|
||||
api_key=None,
|
||||
model=None,
|
||||
rate_limit=-1,
|
||||
temperature=None,
|
||||
**kwargs,
|
||||
):
|
||||
assert model is not None, "model must be provided"
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
|
||||
self.llm_client = None
|
||||
self.temperature = temperature
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
|
||||
api_key = self.api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"An API Key needs to be provided in either the api_key parameter or as an environment variable named OPENROUTER_API_KEY"
|
||||
)
|
||||
base_url = self.base_url or os.getenv("OPEN_ROUTER_ENDPOINT_URL")
|
||||
if base_url is None:
|
||||
raise ValueError(
|
||||
"An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named OPEN_ROUTER_ENDPOINT_URL"
|
||||
)
|
||||
if not self.llm_client:
|
||||
self.llm_client = OpenAI(base_url=base_url, api_key=api_key)
|
||||
# Use self.temperature if set, otherwise use the temperature argument
|
||||
temp = self.temperature if self.temperature is not None else temperature
|
||||
return (
|
||||
self.llm_client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
max_tokens=max_new_tokens if max_new_tokens else 4096,
|
||||
temperature=temp,
|
||||
**kwargs,
|
||||
)
|
||||
.choices[0]
|
||||
.message.content
|
||||
)
|
||||
|
||||
|
||||
class LMMEngineAzureOpenAI(LMMEngine):
|
||||
def __init__(
|
||||
self,
|
||||
base_url=None,
|
||||
api_key=None,
|
||||
azure_endpoint=None,
|
||||
model=None,
|
||||
api_version=None,
|
||||
rate_limit=-1,
|
||||
temperature=None,
|
||||
**kwargs,
|
||||
):
|
||||
assert model is not None, "model must be provided"
|
||||
self.model = model
|
||||
self.api_version = api_version
|
||||
self.api_key = api_key
|
||||
self.azure_endpoint = azure_endpoint
|
||||
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
|
||||
self.llm_client = None
|
||||
self.cost = 0.0
|
||||
self.temperature = temperature
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
|
||||
api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"An API Key needs to be provided in either the api_key parameter or as an environment variable named AZURE_OPENAI_API_KEY"
|
||||
)
|
||||
api_version = self.api_version or os.getenv("OPENAI_API_VERSION")
|
||||
if api_version is None:
|
||||
raise ValueError(
|
||||
"api_version must be provided either as a parameter or as an environment variable named OPENAI_API_VERSION"
|
||||
)
|
||||
azure_endpoint = self.azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
|
||||
if azure_endpoint is None:
|
||||
raise ValueError(
|
||||
"An Azure API endpoint needs to be provided in either the azure_endpoint parameter or as an environment variable named AZURE_OPENAI_ENDPOINT"
|
||||
)
|
||||
if not self.llm_client:
|
||||
self.llm_client = AzureOpenAI(
|
||||
azure_endpoint=azure_endpoint,
|
||||
api_key=api_key,
|
||||
api_version=api_version,
|
||||
)
|
||||
# Use self.temperature if set, otherwise use the temperature argument
|
||||
temp = self.temperature if self.temperature is not None else temperature
|
||||
completion = self.llm_client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
max_tokens=max_new_tokens if max_new_tokens else 4096,
|
||||
temperature=temp,
|
||||
**kwargs,
|
||||
)
|
||||
total_tokens = completion.usage.total_tokens
|
||||
self.cost += 0.02 * ((total_tokens + 500) / 1000)
|
||||
return completion.choices[0].message.content
|
||||
|
||||
|
||||
class LMMEnginevLLM(LMMEngine):
|
||||
def __init__(
|
||||
self,
|
||||
base_url=None,
|
||||
api_key=None,
|
||||
model=None,
|
||||
rate_limit=-1,
|
||||
temperature=None,
|
||||
**kwargs,
|
||||
):
|
||||
assert model is not None, "model must be provided"
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
|
||||
self.llm_client = None
|
||||
self.temperature = temperature
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
def generate(
|
||||
self,
|
||||
messages,
|
||||
temperature=0.0,
|
||||
top_p=0.8,
|
||||
repetition_penalty=1.05,
|
||||
max_new_tokens=4096,
|
||||
**kwargs,
|
||||
):
|
||||
api_key = self.api_key or os.getenv("vLLM_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"A vLLM API key needs to be provided in either the api_key parameter or as an environment variable named vLLM_API_KEY"
|
||||
)
|
||||
base_url = self.base_url or os.getenv("vLLM_ENDPOINT_URL")
|
||||
if base_url is None:
|
||||
raise ValueError(
|
||||
"An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named vLLM_ENDPOINT_URL"
|
||||
)
|
||||
if not self.llm_client:
|
||||
USERNAME = "5ad34100ee055a4bae66370a5e683bac"
|
||||
PASSWORD = "607de8249657a3b3bd036dc96d4c0b2f"
|
||||
auth_string = f"{USERNAME}:{PASSWORD}".encode("utf-8")
|
||||
basic_auth_encoded = base64.b64encode(auth_string).decode("utf-8")
|
||||
basic_auth_header = f"Basic {basic_auth_encoded}"
|
||||
self.llm_client = OpenAI(base_url=base_url, api_key=api_key, default_headers={"Authorization": basic_auth_header},)
|
||||
# Use self.temperature if set, otherwise use the temperature argument
|
||||
temp = self.temperature if self.temperature is not None else temperature
|
||||
completion = self.llm_client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
max_tokens=max_new_tokens if max_new_tokens else 4096,
|
||||
temperature=temp,
|
||||
top_p=top_p,
|
||||
extra_body={"repetition_penalty": repetition_penalty},
|
||||
)
|
||||
|
||||
usage = completion.usage
|
||||
response = completion.choices[0].message.content
|
||||
return (response, usage)
|
||||
|
||||
|
||||
class LMMEngineHuggingFace(LMMEngine):
|
||||
def __init__(self, base_url=None, api_key=None, rate_limit=-1, **kwargs):
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
|
||||
self.llm_client = None
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
|
||||
api_key = self.api_key or os.getenv("HF_TOKEN")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"A HuggingFace token needs to be provided in either the api_key parameter or as an environment variable named HF_TOKEN"
|
||||
)
|
||||
base_url = self.base_url or os.getenv("HF_ENDPOINT_URL")
|
||||
if base_url is None:
|
||||
raise ValueError(
|
||||
"HuggingFace endpoint must be provided as base_url parameter or as an environment variable named HF_ENDPOINT_URL."
|
||||
)
|
||||
if not self.llm_client:
|
||||
self.llm_client = OpenAI(base_url=base_url, api_key=api_key)
|
||||
return (
|
||||
self.llm_client.chat.completions.create(
|
||||
model="tgi",
|
||||
messages=messages,
|
||||
max_tokens=max_new_tokens if max_new_tokens else 4096,
|
||||
temperature=temperature,
|
||||
**kwargs,
|
||||
)
|
||||
.choices[0]
|
||||
.message.content
|
||||
)
|
||||
|
||||
|
||||
class LMMEngineParasail(LMMEngine):
|
||||
def __init__(
|
||||
self, base_url=None, api_key=None, model=None, rate_limit=-1, **kwargs
|
||||
):
|
||||
assert model is not None, "Parasail model id must be provided"
|
||||
self.base_url = base_url
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
|
||||
self.llm_client = None
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
|
||||
)
|
||||
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
|
||||
api_key = self.api_key or os.getenv("PARASAIL_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"A Parasail API key needs to be provided in either the api_key parameter or as an environment variable named PARASAIL_API_KEY"
|
||||
)
|
||||
base_url = self.base_url
|
||||
if base_url is None:
|
||||
raise ValueError(
|
||||
"Parasail endpoint must be provided as base_url parameter or as an environment variable named PARASAIL_ENDPOINT_URL"
|
||||
)
|
||||
if not self.llm_client:
|
||||
self.llm_client = OpenAI(
|
||||
base_url=base_url if base_url else "https://api.parasail.io/v1",
|
||||
api_key=api_key,
|
||||
)
|
||||
return (
|
||||
self.llm_client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
max_tokens=max_new_tokens if max_new_tokens else 4096,
|
||||
temperature=temperature,
|
||||
**kwargs,
|
||||
)
|
||||
.choices[0]
|
||||
.message.content
|
||||
)
|
||||
308
mm_agents/os_symphony/core/mllm.py
Executable file
308
mm_agents/os_symphony/core/mllm.py
Executable file
@@ -0,0 +1,308 @@
|
||||
import base64
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mm_agents.os_symphony.core.engine import (
|
||||
LMMEngineAnthropic,
|
||||
LMMEngineAzureOpenAI,
|
||||
LMMEngineHuggingFace,
|
||||
LMMEngineOpenAI,
|
||||
LMMEngineOpenRouter,
|
||||
LMMEngineParasail,
|
||||
LMMEnginevLLM,
|
||||
LMMEngineGemini,
|
||||
)
|
||||
|
||||
|
||||
class LMMAgent:
|
||||
def __init__(self, engine_params: dict, system_prompt=None, engine=None):
|
||||
if engine is None:
|
||||
if engine_params is not None:
|
||||
engine_type = engine_params.get("engine_type")
|
||||
if engine_type == "openai":
|
||||
self.engine = LMMEngineOpenAI(**engine_params)
|
||||
elif engine_type == "anthropic":
|
||||
self.engine = LMMEngineAnthropic(**engine_params)
|
||||
elif engine_type == "azure":
|
||||
self.engine = LMMEngineAzureOpenAI(**engine_params)
|
||||
elif engine_type == "vllm":
|
||||
self.engine = LMMEnginevLLM(**engine_params)
|
||||
elif engine_type == "huggingface":
|
||||
self.engine = LMMEngineHuggingFace(**engine_params)
|
||||
elif engine_type == "gemini":
|
||||
self.engine = LMMEngineGemini(**engine_params)
|
||||
elif engine_type == "open_router":
|
||||
self.engine = LMMEngineOpenRouter(**engine_params)
|
||||
elif engine_type == "parasail":
|
||||
self.engine = LMMEngineParasail(**engine_params)
|
||||
else:
|
||||
raise ValueError(f"engine_type '{engine_type}' is not supported")
|
||||
else:
|
||||
raise ValueError("engine_params must be provided")
|
||||
else:
|
||||
self.engine = engine
|
||||
|
||||
self.messages = [] # Empty messages
|
||||
self.agent_name = engine_params.get("agent_name")
|
||||
if system_prompt:
|
||||
self.add_system_prompt(system_prompt)
|
||||
else:
|
||||
self.add_system_prompt("You are a helpful assistant.")
|
||||
|
||||
def encode_image(self, image_content):
|
||||
# if image_content is a path to an image file, check type of the image_content to verify
|
||||
if isinstance(image_content, str):
|
||||
with open(image_content, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
else:
|
||||
return base64.b64encode(image_content).decode("utf-8")
|
||||
|
||||
def reset(
|
||||
self,
|
||||
):
|
||||
|
||||
self.messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": [{"type": "text", "text": self.system_prompt}],
|
||||
}
|
||||
]
|
||||
|
||||
def add_system_prompt(self, system_prompt):
|
||||
self.system_prompt = system_prompt
|
||||
if len(self.messages) > 0:
|
||||
self.messages[0] = {
|
||||
"role": "system",
|
||||
"content": [{"type": "text", "text": self.system_prompt}],
|
||||
}
|
||||
else:
|
||||
self.messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": [{"type": "text", "text": self.system_prompt}],
|
||||
}
|
||||
)
|
||||
|
||||
def remove_message_at(self, index):
|
||||
"""Remove a message at a given index"""
|
||||
if index < len(self.messages):
|
||||
self.messages.pop(index)
|
||||
|
||||
def replace_message_at(
|
||||
self, index, text_content, image_content=None, image_detail="high"
|
||||
):
|
||||
"""Replace a message at a given index"""
|
||||
if index < len(self.messages):
|
||||
self.messages[index] = {
|
||||
"role": self.messages[index]["role"],
|
||||
"content": [{"type": "text", "text": text_content}],
|
||||
}
|
||||
if image_content:
|
||||
base64_image = self.encode_image(image_content)
|
||||
self.messages[index]["content"].append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{base64_image}",
|
||||
"detail": image_detail,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
def add_message(
|
||||
self,
|
||||
text_content,
|
||||
image_content=None,
|
||||
role=None,
|
||||
image_detail="high",
|
||||
put_text_last=True,
|
||||
):
|
||||
"""Add a new message to the list of messages"""
|
||||
|
||||
# API-style inference from OpenAI and AzureOpenAI
|
||||
if isinstance(
|
||||
self.engine,
|
||||
(
|
||||
LMMEngineOpenAI,
|
||||
LMMEngineAzureOpenAI,
|
||||
LMMEngineHuggingFace,
|
||||
LMMEngineGemini,
|
||||
LMMEngineOpenRouter,
|
||||
LMMEngineParasail,
|
||||
),
|
||||
):
|
||||
# infer role from previous message
|
||||
if role != "user":
|
||||
if self.messages[-1]["role"] == "system":
|
||||
role = "user"
|
||||
elif self.messages[-1]["role"] == "user":
|
||||
role = "assistant"
|
||||
elif self.messages[-1]["role"] == "assistant":
|
||||
role = "user"
|
||||
|
||||
message = {
|
||||
"role": role,
|
||||
"content": [{"type": "text", "text": text_content}],
|
||||
}
|
||||
|
||||
if isinstance(image_content, np.ndarray) or image_content:
|
||||
# Check if image_content is a list or a single image
|
||||
if isinstance(image_content, list):
|
||||
# If image_content is a list of images, loop through each image
|
||||
for image in image_content:
|
||||
base64_image = self.encode_image(image)
|
||||
message["content"].append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{base64_image}",
|
||||
"detail": image_detail,
|
||||
},
|
||||
}
|
||||
)
|
||||
else:
|
||||
# If image_content is a single image, handle it directly
|
||||
base64_image = self.encode_image(image_content)
|
||||
message["content"].append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{base64_image}",
|
||||
"detail": image_detail,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Rotate text to be the last message if desired
|
||||
if put_text_last:
|
||||
text_content = message["content"].pop(0)
|
||||
message["content"].append(text_content)
|
||||
|
||||
self.messages.append(message)
|
||||
|
||||
# For API-style inference from Anthropic
|
||||
elif isinstance(self.engine, LMMEngineAnthropic):
|
||||
# infer role from previous message
|
||||
if role != "user":
|
||||
if self.messages[-1]["role"] == "system":
|
||||
role = "user"
|
||||
elif self.messages[-1]["role"] == "user":
|
||||
role = "assistant"
|
||||
elif self.messages[-1]["role"] == "assistant":
|
||||
role = "user"
|
||||
|
||||
message = {
|
||||
"role": role,
|
||||
"content": [{"type": "text", "text": text_content}],
|
||||
}
|
||||
|
||||
if image_content:
|
||||
# Check if image_content is a list or a single image
|
||||
if isinstance(image_content, list):
|
||||
# If image_content is a list of images, loop through each image
|
||||
for image in image_content:
|
||||
base64_image = self.encode_image(image)
|
||||
message["content"].append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/png",
|
||||
"data": base64_image,
|
||||
},
|
||||
}
|
||||
)
|
||||
else:
|
||||
# If image_content is a single image, handle it directly
|
||||
base64_image = self.encode_image(image_content)
|
||||
message["content"].append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/png",
|
||||
"data": base64_image,
|
||||
},
|
||||
}
|
||||
)
|
||||
self.messages.append(message)
|
||||
|
||||
# Locally hosted vLLM model inference
|
||||
elif isinstance(self.engine, LMMEnginevLLM):
|
||||
# infer role from previous message
|
||||
if role != "user":
|
||||
if self.messages[-1]["role"] == "system":
|
||||
role = "user"
|
||||
elif self.messages[-1]["role"] == "user":
|
||||
role = "assistant"
|
||||
elif self.messages[-1]["role"] == "assistant":
|
||||
role = "user"
|
||||
|
||||
message = {
|
||||
"role": role,
|
||||
"content": [{"type": "text", "text": text_content}],
|
||||
}
|
||||
|
||||
if image_content:
|
||||
# Check if image_content is a list or a single image
|
||||
if isinstance(image_content, list):
|
||||
# If image_content is a list of images, loop through each image
|
||||
for image in image_content:
|
||||
base64_image = self.encode_image(image)
|
||||
message["content"].append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image;base64,{base64_image}"
|
||||
},
|
||||
}
|
||||
)
|
||||
else:
|
||||
# If image_content is a single image, handle it directly
|
||||
base64_image = self.encode_image(image_content)
|
||||
message["content"].append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image;base64,{base64_image}"},
|
||||
}
|
||||
)
|
||||
|
||||
if put_text_last:
|
||||
text_content = message["content"].pop(0)
|
||||
message["content"].append(text_content)
|
||||
self.messages.append(message)
|
||||
else:
|
||||
raise ValueError("engine_type is not supported")
|
||||
|
||||
def get_response(
|
||||
self,
|
||||
user_message=None,
|
||||
messages=None,
|
||||
temperature=0.0,
|
||||
max_new_tokens=32168,
|
||||
use_thinking=False,
|
||||
**kwargs,
|
||||
):
|
||||
"""Generate the next response based on previous messages"""
|
||||
if messages is None:
|
||||
messages = self.messages
|
||||
if user_message:
|
||||
messages.append(
|
||||
{"role": "user", "content": [{"type": "text", "text": user_message}]}
|
||||
)
|
||||
|
||||
# Regular generation
|
||||
# if use_thinking:
|
||||
# return self.engine.generate_with_thinking(
|
||||
# messages,
|
||||
# temperature=temperature,
|
||||
# max_new_tokens=max_new_tokens,
|
||||
# **kwargs,
|
||||
# )
|
||||
|
||||
return self.engine.generate(
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_new_tokens=max_new_tokens,
|
||||
**kwargs,
|
||||
)
|
||||
17
mm_agents/os_symphony/core/module.py
Executable file
17
mm_agents/os_symphony/core/module.py
Executable file
@@ -0,0 +1,17 @@
|
||||
from typing import Dict, Optional
|
||||
from mm_agents.os_symphony.core.mllm import LMMAgent
|
||||
|
||||
|
||||
class BaseModule:
|
||||
def __init__(self, engine_params: Dict = None, platform: str = "Linux"):
|
||||
self.engine_params = engine_params
|
||||
self.platform = platform
|
||||
|
||||
def _create_agent(
|
||||
self, system_prompt: str = None, engine_params: Optional[Dict] = None
|
||||
) -> LMMAgent:
|
||||
"""Create a new LMMAgent instance"""
|
||||
agent = LMMAgent(engine_params or self.engine_params)
|
||||
if system_prompt:
|
||||
agent.add_system_prompt(system_prompt)
|
||||
return agent
|
||||
Reference in New Issue
Block a user