import copy import json import logging import os import base64 import backoff from anthropic import Anthropic from openai import ( AzureOpenAI, APIConnectionError, APIError, AzureOpenAI, OpenAI, RateLimitError, ) logger = logging.getLogger("desktopenv.agents.engine") logger = logging.getLogger("desktopenv.agents.engine") class LMMEngine: pass class LMMEngineOpenAI(LMMEngine): def __init__( self, base_url=None, api_key=None, model=None, rate_limit=-1, temperature=None, organization=None, **kwargs, ): assert model is not None, "model must be provided" self.model = model self.base_url = base_url self.api_key = api_key self.organization = organization self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit self.llm_client = None self.temperature = temperature # Can force temperature to be the same (in the case of o3 requiring temperature to be 1) @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs): api_key = self.api_key or os.getenv("OPENAI_API_KEY") if api_key is None: raise ValueError( "An API Key needs to be provided in either the api_key parameter or as an environment variable named OPENAI_API_KEY" ) organization = self.organization or os.getenv("OPENAI_ORG_ID") # H 集群认证 最后再删!!!!!! if self.model.lower().startswith("ui") or self.model.lower().startswith("qwen") or self.model.lower().startswith("scale") or self.model.lower().startswith("holo"): custom_headers = { "Authorization": "Basic NWFkMzQxMDBlZTA1NWE0YmFlNjYzNzBhNWU2ODNiYWM6NjA3ZGU4MjQ5NjU3YTNiM2JkMDM2ZGM5NmQ0YzBiMmY=" } else: custom_headers = {} if not self.llm_client: if not self.base_url: self.llm_client = OpenAI( api_key=api_key, organization=organization, default_headers=custom_headers ) else: self.llm_client = OpenAI( base_url=self.base_url, api_key=api_key, organization=organization, default_headers=custom_headers ) # print(**kwargs) payload_size = len(json.dumps(messages)) / 1024 / 1024 logger.info(f"Payload size: {len(json.dumps(messages)) / 1024 / 1024:.2f} MB") if payload_size > 30: logger.info("Payload size exceeds 30MB!!!") result = self.llm_client.chat.completions.create( model=self.model, messages=messages, # max_completion_tokens=max_new_tokens if max_new_tokens else 4096, temperature=( temperature if self.temperature is None else self.temperature ), **kwargs, ) usage = result.usage response = result.choices[0].message.content return (response, usage) class LMMEngineAnthropic(LMMEngine): def __init__( self, base_url=None, api_key=None, model=None, thinking=False, temperature=None, **kwargs, ): assert model is not None, "model must be provided" self.model = model self.thinking = thinking self.api_key = api_key self.llm_client = None self.temperature = temperature @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs): api_key = self.api_key or os.getenv("ANTHROPIC_API_KEY") if api_key is None: raise ValueError( "An API Key needs to be provided in either the api_key parameter or as an environment variable named ANTHROPIC_API_KEY" ) self.llm_client = Anthropic(api_key=api_key) # Use the instance temperature if not specified in the call temp = self.temperature if temperature is None else temperature if self.thinking: full_response = self.llm_client.messages.create( system=messages[0]["content"][0]["text"], model=self.model, messages=messages[1:], max_tokens=8192, thinking={"type": "enabled", "budget_tokens": 4096}, **kwargs, ) thoughts = full_response.content[0].thinking return full_response.content[1].text return ( self.llm_client.messages.create( system=messages[0]["content"][0]["text"], model=self.model, messages=messages[1:], max_tokens=max_new_tokens if max_new_tokens else 4096, temperature=temp, **kwargs, ) .content[0] .text ) @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) # Compatible with Claude-3.7 Sonnet thinking mode def generate_with_thinking( self, messages, temperature=0.0, max_new_tokens=None, **kwargs ): """Generate the next message based on previous messages, and keeps the thinking tokens""" api_key = self.api_key or os.getenv("ANTHROPIC_API_KEY") if api_key is None: raise ValueError( "An API Key needs to be provided in either the api_key parameter or as an environment variable named ANTHROPIC_API_KEY" ) self.llm_client = Anthropic(api_key=api_key) full_response = self.llm_client.messages.create( system=messages[0]["content"][0]["text"], model=self.model, messages=messages[1:], max_tokens=8192, thinking={"type": "enabled", "budget_tokens": 4096}, **kwargs, ) thoughts = full_response.content[0].thinking answer = full_response.content[1].text full_response = ( f"\n{thoughts}\n\n\n\n{answer}\n\n" ) return full_response class LMMEngineGemini(LMMEngine): def __init__( self, base_url=None, api_key=None, model=None, rate_limit=-1, temperature=None, **kwargs, ): assert model is not None, "model must be provided" self.model = model self.base_url = base_url self.api_key = api_key self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit self.llm_client = None self.temperature = temperature @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs): api_key = self.api_key or os.getenv("GEMINI_API_KEY") if api_key is None: raise ValueError( "An API Key needs to be provided in either the api_key parameter or as an environment variable named GEMINI_API_KEY" ) base_url = self.base_url or os.getenv("GEMINI_ENDPOINT_URL") if base_url is None: raise ValueError( "An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named GEMINI_ENDPOINT_URL" ) if not self.llm_client: self.llm_client = OpenAI(base_url=base_url, api_key=api_key) # Use the temperature passed to generate, otherwise use the instance's temperature, otherwise default to 0.0 temp = self.temperature if temperature is None else temperature return ( self.llm_client.chat.completions.create( model=self.model, messages=messages, max_tokens=max_new_tokens if max_new_tokens else 4096, temperature=temp, **kwargs, ) .choices[0] .message.content ) class LMMEngineOpenRouter(LMMEngine): def __init__( self, base_url=None, api_key=None, model=None, rate_limit=-1, temperature=None, **kwargs, ): assert model is not None, "model must be provided" self.model = model self.base_url = base_url self.api_key = api_key self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit self.llm_client = None self.temperature = temperature @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs): api_key = self.api_key or os.getenv("OPENROUTER_API_KEY") if api_key is None: raise ValueError( "An API Key needs to be provided in either the api_key parameter or as an environment variable named OPENROUTER_API_KEY" ) base_url = self.base_url or os.getenv("OPEN_ROUTER_ENDPOINT_URL") if base_url is None: raise ValueError( "An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named OPEN_ROUTER_ENDPOINT_URL" ) if not self.llm_client: self.llm_client = OpenAI(base_url=base_url, api_key=api_key) # Use self.temperature if set, otherwise use the temperature argument temp = self.temperature if self.temperature is not None else temperature return ( self.llm_client.chat.completions.create( model=self.model, messages=messages, max_tokens=max_new_tokens if max_new_tokens else 4096, temperature=temp, **kwargs, ) .choices[0] .message.content ) class LMMEngineAzureOpenAI(LMMEngine): def __init__( self, base_url=None, api_key=None, azure_endpoint=None, model=None, api_version=None, rate_limit=-1, temperature=None, **kwargs, ): assert model is not None, "model must be provided" self.model = model self.api_version = api_version self.api_key = api_key self.azure_endpoint = azure_endpoint self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit self.llm_client = None self.cost = 0.0 self.temperature = temperature @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs): api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY") if api_key is None: raise ValueError( "An API Key needs to be provided in either the api_key parameter or as an environment variable named AZURE_OPENAI_API_KEY" ) api_version = self.api_version or os.getenv("OPENAI_API_VERSION") if api_version is None: raise ValueError( "api_version must be provided either as a parameter or as an environment variable named OPENAI_API_VERSION" ) azure_endpoint = self.azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT") if azure_endpoint is None: raise ValueError( "An Azure API endpoint needs to be provided in either the azure_endpoint parameter or as an environment variable named AZURE_OPENAI_ENDPOINT" ) if not self.llm_client: self.llm_client = AzureOpenAI( azure_endpoint=azure_endpoint, api_key=api_key, api_version=api_version, ) # Use self.temperature if set, otherwise use the temperature argument temp = self.temperature if self.temperature is not None else temperature completion = self.llm_client.chat.completions.create( model=self.model, messages=messages, max_tokens=max_new_tokens if max_new_tokens else 4096, temperature=temp, **kwargs, ) total_tokens = completion.usage.total_tokens self.cost += 0.02 * ((total_tokens + 500) / 1000) return completion.choices[0].message.content class LMMEnginevLLM(LMMEngine): def __init__( self, base_url=None, api_key=None, model=None, rate_limit=-1, temperature=None, **kwargs, ): assert model is not None, "model must be provided" self.model = model self.api_key = api_key self.base_url = base_url self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit self.llm_client = None self.temperature = temperature @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) def generate( self, messages, temperature=0.0, top_p=0.8, repetition_penalty=1.05, max_new_tokens=4096, **kwargs, ): api_key = self.api_key or os.getenv("vLLM_API_KEY") if api_key is None: raise ValueError( "A vLLM API key needs to be provided in either the api_key parameter or as an environment variable named vLLM_API_KEY" ) base_url = self.base_url or os.getenv("vLLM_ENDPOINT_URL") if base_url is None: raise ValueError( "An endpoint URL needs to be provided in either the endpoint_url parameter or as an environment variable named vLLM_ENDPOINT_URL" ) if not self.llm_client: USERNAME = "5ad34100ee055a4bae66370a5e683bac" PASSWORD = "607de8249657a3b3bd036dc96d4c0b2f" auth_string = f"{USERNAME}:{PASSWORD}".encode("utf-8") basic_auth_encoded = base64.b64encode(auth_string).decode("utf-8") basic_auth_header = f"Basic {basic_auth_encoded}" self.llm_client = OpenAI(base_url=base_url, api_key=api_key, default_headers={"Authorization": basic_auth_header},) # Use self.temperature if set, otherwise use the temperature argument temp = self.temperature if self.temperature is not None else temperature completion = self.llm_client.chat.completions.create( model=self.model, messages=messages, max_tokens=max_new_tokens if max_new_tokens else 4096, temperature=temp, top_p=top_p, extra_body={"repetition_penalty": repetition_penalty}, ) usage = completion.usage response = completion.choices[0].message.content return (response, usage) class LMMEngineHuggingFace(LMMEngine): def __init__(self, base_url=None, api_key=None, rate_limit=-1, **kwargs): self.base_url = base_url self.api_key = api_key self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit self.llm_client = None @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs): api_key = self.api_key or os.getenv("HF_TOKEN") if api_key is None: raise ValueError( "A HuggingFace token needs to be provided in either the api_key parameter or as an environment variable named HF_TOKEN" ) base_url = self.base_url or os.getenv("HF_ENDPOINT_URL") if base_url is None: raise ValueError( "HuggingFace endpoint must be provided as base_url parameter or as an environment variable named HF_ENDPOINT_URL." ) if not self.llm_client: self.llm_client = OpenAI(base_url=base_url, api_key=api_key) return ( self.llm_client.chat.completions.create( model="tgi", messages=messages, max_tokens=max_new_tokens if max_new_tokens else 4096, temperature=temperature, **kwargs, ) .choices[0] .message.content ) class LMMEngineParasail(LMMEngine): def __init__( self, base_url=None, api_key=None, model=None, rate_limit=-1, **kwargs ): assert model is not None, "Parasail model id must be provided" self.base_url = base_url self.model = model self.api_key = api_key self.request_interval = 0 if rate_limit == -1 else 60.0 / rate_limit self.llm_client = None @backoff.on_exception( backoff.expo, (APIConnectionError, APIError, RateLimitError), max_time=60 ) def generate(self, messages, temperature=0.0, max_new_tokens=None, **kwargs): api_key = self.api_key or os.getenv("PARASAIL_API_KEY") if api_key is None: raise ValueError( "A Parasail API key needs to be provided in either the api_key parameter or as an environment variable named PARASAIL_API_KEY" ) base_url = self.base_url if base_url is None: raise ValueError( "Parasail endpoint must be provided as base_url parameter or as an environment variable named PARASAIL_ENDPOINT_URL" ) if not self.llm_client: self.llm_client = OpenAI( base_url=base_url if base_url else "https://api.parasail.io/v1", api_key=api_key, ) return ( self.llm_client.chat.completions.create( model=self.model, messages=messages, max_tokens=max_new_tokens if max_new_tokens else 4096, temperature=temperature, **kwargs, ) .choices[0] .message.content )