Files
sci-gui-agent-benchmark/mm_agents/os_symphony/core/engine.py
2025-12-23 14:30:44 +08:00

481 lines
18 KiB
Python
Executable File

import copy
import json
import logging
import os
import base64
import backoff
from anthropic import Anthropic
from openai import (
AzureOpenAI,
APIConnectionError,
APIError,
AzureOpenAI,
OpenAI,
RateLimitError,
)
logger = logging.getLogger("desktopenv.agents.engine")
logger = logging.getLogger("desktopenv.agents.engine")
class LMMEngine:
pass
class LMMEngineOpenAI(LMMEngine):
def __init__(
self,
base_url=None,
api_key=None,
model=None,
rate_limit=-1,
temperature=None,
organization=None,
**kwargs,
):
assert model is not None, "model must be provided"
self.model = model
self.base_url = base_url
self.api_key = api_key
self.organization = organization
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
self.llm_client = None
self.temperature = temperature # Can force temperature to be the same (in the case of o3 requiring temperature to be 1)
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
api_key = self.api_key or os.getenv("OPENAI_API_KEY")
if api_key is None:
raise ValueError(
"An API Key needs to be provided in either the api_key parameter or as an environment variable named OPENAI_API_KEY"
)
organization = self.organization or os.getenv("OPENAI_ORG_ID")
# H 集群认证 最后再删!!!!!!
if self.model.lower().startswith("ui") or self.model.lower().startswith("qwen") or self.model.lower().startswith("scale") or self.model.lower().startswith("holo"):
custom_headers = {
"Authorization": "Basic NWFkMzQxMDBlZTA1NWE0YmFlNjYzNzBhNWU2ODNiYWM6NjA3ZGU4MjQ5NjU3YTNiM2JkMDM2ZGM5NmQ0YzBiMmY="
}
else:
custom_headers = {}
if not self.llm_client:
if not self.base_url:
self.llm_client = OpenAI(
api_key=api_key,
organization=organization,
default_headers=custom_headers
)
else:
self.llm_client = OpenAI(
base_url=self.base_url,
api_key=api_key,
organization=organization,
default_headers=custom_headers
)
# print(**kwargs)
payload_size = len(json.dumps(messages)) / 1024 / 1024
logger.info(f"Payload size: {len(json.dumps(messages)) / 1024 / 1024:.2f} MB")
if payload_size > 30:
logger.info("Payload size exceeds 30MB!!!")
result = self.llm_client.chat.completions.create(
model=self.model,
messages=messages,
# max_completion_tokens=max_new_tokens if max_new_tokens else 4096,
temperature=(
temperature if self.temperature is None else self.temperature
),
**kwargs,
)
usage = result.usage
response = result.choices[0].message.content
return (response, usage)
class LMMEngineAnthropic(LMMEngine):
def __init__(
self,
base_url=None,
api_key=None,
model=None,
thinking=False,
temperature=None,
**kwargs,
):
assert model is not None, "model must be provided"
self.model = model
self.thinking = thinking
self.api_key = api_key
self.llm_client = None
self.temperature = temperature
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
api_key = self.api_key or os.getenv("ANTHROPIC_API_KEY")
if api_key is None:
raise ValueError(
"An API Key needs to be provided in either the api_key parameter or as an environment variable named ANTHROPIC_API_KEY"
)
self.llm_client = Anthropic(api_key=api_key)
# Use the instance temperature if not specified in the call
temp = self.temperature if temperature is None else temperature
if self.thinking:
full_response = self.llm_client.messages.create(
system=messages[0]["content"][0]["text"],
model=self.model,
messages=messages[1:],
max_tokens=8192,
thinking={"type": "enabled", "budget_tokens": 4096},
**kwargs,
)
thoughts = full_response.content[0].thinking
return full_response.content[1].text
return (
self.llm_client.messages.create(
system=messages[0]["content"][0]["text"],
model=self.model,
messages=messages[1:],
max_tokens=max_new_tokens if max_new_tokens else 4096,
temperature=temp,
**kwargs,
)
.content[0]
.text
)
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
# Compatible with Claude-3.7 Sonnet thinking mode
def generate_with_thinking(
self, messages, temperature=0.0, max_new_tokens=None, **kwargs
):
"""Generate the next message based on previous messages, and keeps the thinking tokens"""
api_key = self.api_key or os.getenv("ANTHROPIC_API_KEY")
if api_key is None:
raise ValueError(
"An API Key needs to be provided in either the api_key parameter or as an environment variable named ANTHROPIC_API_KEY"
)
self.llm_client = Anthropic(api_key=api_key)
full_response = self.llm_client.messages.create(
system=messages[0]["content"][0]["text"],
model=self.model,
messages=messages[1:],
max_tokens=8192,
thinking={"type": "enabled", "budget_tokens": 4096},
**kwargs,
)
thoughts = full_response.content[0].thinking
answer = full_response.content[1].text
full_response = (
f"<thoughts>\n{thoughts}\n</thoughts>\n\n<answer>\n{answer}\n</answer>\n"
)
return full_response
class LMMEngineGemini(LMMEngine):
def __init__(
self,
base_url=None,
api_key=None,
model=None,
rate_limit=-1,
temperature=None,
**kwargs,
):
assert model is not None, "model must be provided"
self.model = model
self.base_url = base_url
self.api_key = api_key
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
self.llm_client = None
self.temperature = temperature
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
api_key = self.api_key or os.getenv("GEMINI_API_KEY")
if api_key is None:
raise ValueError(
"An API Key needs to be provided in either the api_key parameter or as an environment variable named GEMINI_API_KEY"
)
base_url = self.base_url or os.getenv("GEMINI_ENDPOINT_URL")
if base_url is None:
raise ValueError(
"An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named GEMINI_ENDPOINT_URL"
)
if not self.llm_client:
self.llm_client = OpenAI(base_url=base_url, api_key=api_key)
# Use the temperature passed to generate, otherwise use the instance's temperature, otherwise default to 0.0
temp = self.temperature if temperature is None else temperature
return (
self.llm_client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=max_new_tokens if max_new_tokens else 4096,
temperature=temp,
**kwargs,
)
.choices[0]
.message.content
)
class LMMEngineOpenRouter(LMMEngine):
def __init__(
self,
base_url=None,
api_key=None,
model=None,
rate_limit=-1,
temperature=None,
**kwargs,
):
assert model is not None, "model must be provided"
self.model = model
self.base_url = base_url
self.api_key = api_key
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
self.llm_client = None
self.temperature = temperature
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
api_key = self.api_key or os.getenv("OPENROUTER_API_KEY")
if api_key is None:
raise ValueError(
"An API Key needs to be provided in either the api_key parameter or as an environment variable named OPENROUTER_API_KEY"
)
base_url = self.base_url or os.getenv("OPEN_ROUTER_ENDPOINT_URL")
if base_url is None:
raise ValueError(
"An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named OPEN_ROUTER_ENDPOINT_URL"
)
if not self.llm_client:
self.llm_client = OpenAI(base_url=base_url, api_key=api_key)
# Use self.temperature if set, otherwise use the temperature argument
temp = self.temperature if self.temperature is not None else temperature
return (
self.llm_client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=max_new_tokens if max_new_tokens else 4096,
temperature=temp,
**kwargs,
)
.choices[0]
.message.content
)
class LMMEngineAzureOpenAI(LMMEngine):
def __init__(
self,
base_url=None,
api_key=None,
azure_endpoint=None,
model=None,
api_version=None,
rate_limit=-1,
temperature=None,
**kwargs,
):
assert model is not None, "model must be provided"
self.model = model
self.api_version = api_version
self.api_key = api_key
self.azure_endpoint = azure_endpoint
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
self.llm_client = None
self.cost = 0.0
self.temperature = temperature
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY")
if api_key is None:
raise ValueError(
"An API Key needs to be provided in either the api_key parameter or as an environment variable named AZURE_OPENAI_API_KEY"
)
api_version = self.api_version or os.getenv("OPENAI_API_VERSION")
if api_version is None:
raise ValueError(
"api_version must be provided either as a parameter or as an environment variable named OPENAI_API_VERSION"
)
azure_endpoint = self.azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
if azure_endpoint is None:
raise ValueError(
"An Azure API endpoint needs to be provided in either the azure_endpoint parameter or as an environment variable named AZURE_OPENAI_ENDPOINT"
)
if not self.llm_client:
self.llm_client = AzureOpenAI(
azure_endpoint=azure_endpoint,
api_key=api_key,
api_version=api_version,
)
# Use self.temperature if set, otherwise use the temperature argument
temp = self.temperature if self.temperature is not None else temperature
completion = self.llm_client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=max_new_tokens if max_new_tokens else 4096,
temperature=temp,
**kwargs,
)
total_tokens = completion.usage.total_tokens
self.cost += 0.02 * ((total_tokens + 500) / 1000)
return completion.choices[0].message.content
class LMMEnginevLLM(LMMEngine):
def __init__(
self,
base_url=None,
api_key=None,
model=None,
rate_limit=-1,
temperature=None,
**kwargs,
):
assert model is not None, "model must be provided"
self.model = model
self.api_key = api_key
self.base_url = base_url
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
self.llm_client = None
self.temperature = temperature
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
def generate(
self,
messages,
temperature=0.0,
top_p=0.8,
repetition_penalty=1.05,
max_new_tokens=4096,
**kwargs,
):
api_key = self.api_key or os.getenv("vLLM_API_KEY")
if api_key is None:
raise ValueError(
"A vLLM API key needs to be provided in either the api_key parameter or as an environment variable named vLLM_API_KEY"
)
base_url = self.base_url or os.getenv("vLLM_ENDPOINT_URL")
if base_url is None:
raise ValueError(
"An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named vLLM_ENDPOINT_URL"
)
if not self.llm_client:
USERNAME = "5ad34100ee055a4bae66370a5e683bac"
PASSWORD = "607de8249657a3b3bd036dc96d4c0b2f"
auth_string = f"{USERNAME}:{PASSWORD}".encode("utf-8")
basic_auth_encoded = base64.b64encode(auth_string).decode("utf-8")
basic_auth_header = f"Basic {basic_auth_encoded}"
self.llm_client = OpenAI(base_url=base_url, api_key=api_key, default_headers={"Authorization": basic_auth_header},)
# Use self.temperature if set, otherwise use the temperature argument
temp = self.temperature if self.temperature is not None else temperature
completion = self.llm_client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=max_new_tokens if max_new_tokens else 4096,
temperature=temp,
top_p=top_p,
extra_body={"repetition_penalty": repetition_penalty},
)
usage = completion.usage
response = completion.choices[0].message.content
return (response, usage)
class LMMEngineHuggingFace(LMMEngine):
def __init__(self, base_url=None, api_key=None, rate_limit=-1, **kwargs):
self.base_url = base_url
self.api_key = api_key
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
self.llm_client = None
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
api_key = self.api_key or os.getenv("HF_TOKEN")
if api_key is None:
raise ValueError(
"A HuggingFace token needs to be provided in either the api_key parameter or as an environment variable named HF_TOKEN"
)
base_url = self.base_url or os.getenv("HF_ENDPOINT_URL")
if base_url is None:
raise ValueError(
"HuggingFace endpoint must be provided as base_url parameter or as an environment variable named HF_ENDPOINT_URL."
)
if not self.llm_client:
self.llm_client = OpenAI(base_url=base_url, api_key=api_key)
return (
self.llm_client.chat.completions.create(
model="tgi",
messages=messages,
max_tokens=max_new_tokens if max_new_tokens else 4096,
temperature=temperature,
**kwargs,
)
.choices[0]
.message.content
)
class LMMEngineParasail(LMMEngine):
def __init__(
self, base_url=None, api_key=None, model=None, rate_limit=-1, **kwargs
):
assert model is not None, "Parasail model id must be provided"
self.base_url = base_url
self.model = model
self.api_key = api_key
self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit
self.llm_client = None
@backoff.on_exception(
backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60
)
def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs):
api_key = self.api_key or os.getenv("PARASAIL_API_KEY")
if api_key is None:
raise ValueError(
"A Parasail API key needs to be provided in either the api_key parameter or as an environment variable named PARASAIL_API_KEY"
)
base_url = self.base_url
if base_url is None:
raise ValueError(
"Parasail endpoint must be provided as base_url parameter or as an environment variable named PARASAIL_ENDPOINT_URL"
)
if not self.llm_client:
self.llm_client = OpenAI(
base_url=base_url if base_url else "https://api.parasail.io/v1",
api_key=api_key,
)
return (
self.llm_client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=max_new_tokens if max_new_tokens else 4096,
temperature=temperature,
**kwargs,
)
.choices[0]
.message.content
)