* evocua init * setup max_token * evocua update --------- Co-authored-by: xuetaofeng <xuetaofeng@meituan.com> Co-authored-by: Tianbao Xie <47296835+Timothyxxx@users.noreply.github.com>
654 lines
26 KiB
Python
654 lines
26 KiB
Python
import os
|
|
import re
|
|
import json
|
|
import logging
|
|
import backoff
|
|
import openai
|
|
from typing import Dict, List, Tuple, Optional
|
|
|
|
from io import BytesIO
|
|
from PIL import Image
|
|
|
|
from mm_agents.evocua.utils import (
|
|
process_image,
|
|
encode_image,
|
|
rewrite_pyautogui_text_inputs,
|
|
project_coordinate_to_absolute_scale,
|
|
log_messages
|
|
)
|
|
|
|
from mm_agents.evocua.prompts import (
|
|
S1_SYSTEM_PROMPT,
|
|
S1_INSTRUTION_TEMPLATE,
|
|
S1_STEP_TEMPLATE,
|
|
S1_ACTION_HISTORY_TEMPLATE,
|
|
S2_ACTION_DESCRIPTION,
|
|
S2_DESCRIPTION_PROMPT_TEMPLATE,
|
|
S2_SYSTEM_PROMPT,
|
|
build_s2_tools_def
|
|
)
|
|
|
|
logger = logging.getLogger("desktopenv.evocua")
|
|
|
|
class EvoCUAAgent:
|
|
"""
|
|
EvoCUA - A Native GUI agent model for desktop automation.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
model: str = "EvoCUA-S2",
|
|
max_tokens: int = 32768,
|
|
top_p: float = 0.9,
|
|
temperature: float = 0.0,
|
|
action_space: str = "pyautogui",
|
|
observation_type: str = "screenshot",
|
|
max_steps: int = 50,
|
|
prompt_style: str = "S2", # "S1" or "S2"
|
|
max_history_turns: int = 4,
|
|
screen_size: Tuple[int, int] = (1920, 1080),
|
|
coordinate_type: str = "relative",
|
|
password: str = "osworld-public-evaluation",
|
|
resize_factor: int = 32,
|
|
**kwargs
|
|
):
|
|
self.model = model
|
|
self.max_tokens = max_tokens
|
|
self.top_p = top_p
|
|
self.temperature = temperature
|
|
self.action_space = action_space
|
|
self.observation_type = observation_type
|
|
self.max_steps = max_steps
|
|
|
|
self.prompt_style = prompt_style
|
|
assert self.prompt_style in ["S1", "S2"], f"Invalid prompt_style: {self.prompt_style}"
|
|
|
|
self.max_history_turns = max_history_turns
|
|
|
|
self.screen_size = screen_size
|
|
self.coordinate_type = coordinate_type
|
|
self.password = password
|
|
self.resize_factor = resize_factor
|
|
|
|
# Action space assertion
|
|
assert self.action_space == "pyautogui", f"Invalid action space: {self.action_space}"
|
|
assert self.observation_type == "screenshot", f"Invalid observation type: {self.observation_type}"
|
|
|
|
# State
|
|
self.thoughts = []
|
|
self.actions = []
|
|
self.observations = []
|
|
self.responses = []
|
|
self.screenshots = [] # Stores encoded string
|
|
self.cots = [] # For S1 style history
|
|
|
|
def reset(self, _logger=None, vm_ip=None):
|
|
global logger
|
|
if _logger:
|
|
logger = _logger
|
|
|
|
self.thoughts = []
|
|
self.actions = []
|
|
self.observations = []
|
|
self.responses = []
|
|
self.screenshots = []
|
|
self.cots = []
|
|
|
|
def predict(self, instruction: str, obs: Dict) -> List:
|
|
"""
|
|
Main prediction loop.
|
|
"""
|
|
|
|
logger.info(f"========================== {self.model} ===================================")
|
|
logger.info(f"Instruction: \n{instruction}")
|
|
|
|
screenshot_bytes = obs["screenshot"]
|
|
|
|
try:
|
|
original_img = Image.open(BytesIO(screenshot_bytes))
|
|
original_width, original_height = original_img.size
|
|
except Exception as e:
|
|
logger.warning(f"Failed to read screenshot size, falling back to screen_size: {e}")
|
|
original_width, original_height = self.screen_size
|
|
|
|
if self.prompt_style == "S1":
|
|
raw_b64 = encode_image(screenshot_bytes)
|
|
self.screenshots.append(raw_b64)
|
|
return self._predict_s1(instruction, obs, raw_b64)
|
|
else:
|
|
processed_b64, p_width, p_height = process_image(screenshot_bytes, factor=self.resize_factor)
|
|
self.screenshots.append(processed_b64)
|
|
return self._predict_s2(
|
|
instruction,
|
|
obs,
|
|
processed_b64,
|
|
p_width,
|
|
p_height,
|
|
original_width,
|
|
original_height,
|
|
)
|
|
|
|
|
|
def _predict_s2(self, instruction, obs, processed_b64, p_width, p_height, original_width, original_height):
|
|
current_step = len(self.actions)
|
|
current_history_n = self.max_history_turns
|
|
|
|
response = None
|
|
|
|
if self.coordinate_type == "absolute":
|
|
resolution_info = f"* The screen's resolution is {p_width}x{p_height}."
|
|
else:
|
|
resolution_info = "* The screen's resolution is 1000x1000."
|
|
|
|
description_prompt = S2_DESCRIPTION_PROMPT_TEMPLATE.format(resolution_info=resolution_info)
|
|
|
|
tools_def = build_s2_tools_def(description_prompt)
|
|
|
|
system_prompt = S2_SYSTEM_PROMPT.format(tools_xml=json.dumps(tools_def))
|
|
|
|
# Retry loop for context length
|
|
while True:
|
|
messages = self._build_s2_messages(
|
|
instruction,
|
|
processed_b64,
|
|
current_step,
|
|
current_history_n,
|
|
system_prompt
|
|
)
|
|
|
|
try:
|
|
response = self.call_llm({
|
|
"model": self.model,
|
|
"messages": messages,
|
|
"max_tokens": self.max_tokens,
|
|
"top_p": self.top_p,
|
|
"temperature": self.temperature,
|
|
})
|
|
break
|
|
except Exception as e:
|
|
# Handle Context Too Large
|
|
if self._should_giveup_on_context_error(e) and current_history_n > 0:
|
|
current_history_n -= 1
|
|
logger.warning(f"Context too large, retrying with history_n={current_history_n}")
|
|
else:
|
|
logger.error(f"Error in predict: {e}")
|
|
break
|
|
|
|
self.responses.append(response)
|
|
|
|
low_level_instruction, pyautogui_code = self._parse_response_s2(
|
|
response, p_width, p_height, original_width, original_height
|
|
)
|
|
|
|
# new added
|
|
current_step = len(self.actions) + 1
|
|
first_action = pyautogui_code[0] if pyautogui_code else ""
|
|
if current_step >= self.max_steps and str(first_action).upper() not in ("DONE", "FAIL"):
|
|
logger.warning(f"Reached maximum steps {self.max_steps}. Forcing termination with FAIL.")
|
|
low_level_instruction = "Fail the task because reaching the maximum step limit."
|
|
pyautogui_code = ["FAIL"]
|
|
|
|
logger.info(f"Low level instruction: {low_level_instruction}")
|
|
logger.info(f"Pyautogui code: {pyautogui_code}")
|
|
|
|
self.actions.append(low_level_instruction)
|
|
return response, pyautogui_code
|
|
|
|
def _build_s2_messages(self, instruction, current_img, step, history_n, system_prompt):
|
|
messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
|
|
|
|
previous_actions = []
|
|
history_start_idx = max(0, step - history_n)
|
|
for i in range(history_start_idx):
|
|
if i < len(self.actions):
|
|
previous_actions.append(f"Step {i+1}: {self.actions[i]}")
|
|
previous_actions_str = "\n".join(previous_actions) if previous_actions else "None"
|
|
|
|
# Add History
|
|
history_len = min(history_n, len(self.responses))
|
|
if history_len > 0:
|
|
hist_responses = self.responses[-history_len:]
|
|
hist_imgs = self.screenshots[-history_len-1:-1]
|
|
|
|
for i in range(history_len):
|
|
if i < len(hist_imgs):
|
|
screenshot_b64 = hist_imgs[i]
|
|
if i == 0:
|
|
# First history item: Inject Instruction + Previous Actions Context
|
|
img_url = f"data:image/png;base64,{screenshot_b64}"
|
|
instruction_prompt = f"""
|
|
Please generate the next move according to the UI screenshot, instruction and previous actions.
|
|
|
|
Instruction: {instruction}
|
|
|
|
Previous actions:
|
|
{previous_actions_str}"""
|
|
messages.append({
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "image_url", "image_url": {"url": img_url}},
|
|
{"type": "text", "text": instruction_prompt}
|
|
]
|
|
})
|
|
else:
|
|
img_url = f"data:image/png;base64,{screenshot_b64}"
|
|
messages.append({
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "image_url", "image_url": {"url": img_url}},
|
|
]
|
|
})
|
|
|
|
messages.append({
|
|
"role": "assistant",
|
|
"content": [{"type": "text", "text": hist_responses[i]}]
|
|
})
|
|
|
|
# Current Turn
|
|
# We re-use previous_actions_str logic for the case where history_len == 0
|
|
|
|
if history_len == 0:
|
|
# First turn logic: Include Instruction + Previous Actions
|
|
instruction_prompt = f"""
|
|
Please generate the next move according to the UI screenshot, instruction and previous actions.
|
|
|
|
Instruction: {instruction}
|
|
|
|
Previous actions:
|
|
{previous_actions_str}"""
|
|
messages.append({
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{current_img}"}},
|
|
{"type": "text", "text": instruction_prompt}
|
|
]
|
|
})
|
|
else:
|
|
# Subsequent turns logic (context already in first history message): Image Only
|
|
messages.append({
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{current_img}"}}
|
|
]
|
|
})
|
|
|
|
return messages
|
|
|
|
|
|
def _parse_response_s2(
|
|
self,
|
|
response: str,
|
|
processed_width: int = None,
|
|
processed_height: int = None,
|
|
original_width: Optional[int] = None,
|
|
original_height: Optional[int] = None,
|
|
) -> Tuple[str, List[str]]:
|
|
"""
|
|
Parse LLM response and convert it to low level action and pyautogui code.
|
|
"""
|
|
# Prefer the real screenshot resolution (passed from predict), fallback to configured screen_size.
|
|
if not (original_width and original_height):
|
|
original_width, original_height = self.screen_size
|
|
low_level_instruction = ""
|
|
pyautogui_code: List[str] = []
|
|
|
|
if response is None or not response.strip():
|
|
return low_level_instruction, pyautogui_code
|
|
|
|
def adjust_coordinates(x: float, y: float) -> Tuple[int, int]:
|
|
if not (original_width and original_height):
|
|
return int(x), int(y)
|
|
if self.coordinate_type == "absolute":
|
|
# scale from processed pixels to original
|
|
if processed_width and processed_height:
|
|
x_scale = original_width / processed_width
|
|
y_scale = original_height / processed_height
|
|
return int(x * x_scale), int(y * y_scale)
|
|
return int(x), int(y)
|
|
# relative: scale from 0..999 grid
|
|
x_scale = original_width / 999
|
|
y_scale = original_height / 999
|
|
return int(x * x_scale), int(y * y_scale)
|
|
|
|
def process_tool_call(json_str: str) -> None:
|
|
try:
|
|
tool_call = json.loads(json_str)
|
|
if tool_call.get("name") == "computer_use":
|
|
args = tool_call["arguments"]
|
|
action = args["action"]
|
|
|
|
def _clean_keys(raw_keys):
|
|
keys = raw_keys if isinstance(raw_keys, list) else [raw_keys]
|
|
cleaned_keys = []
|
|
for key in keys:
|
|
if isinstance(key, str):
|
|
if key.startswith("keys=["):
|
|
key = key[6:]
|
|
if key.endswith("]"):
|
|
key = key[:-1]
|
|
if key.startswith("['") or key.startswith('["'):
|
|
key = key[2:] if len(key) > 2 else key
|
|
if key.endswith("']") or key.endswith('"]'):
|
|
key = key[:-2] if len(key) > 2 else key
|
|
key = key.strip()
|
|
cleaned_keys.append(key)
|
|
else:
|
|
cleaned_keys.append(key)
|
|
return cleaned_keys
|
|
|
|
if action == "left_click" or action == "click":
|
|
if "coordinate" in args:
|
|
x, y = args["coordinate"]
|
|
adj_x, adj_y = adjust_coordinates(x, y)
|
|
pyautogui_code.append(f"pyautogui.click({adj_x}, {adj_y})")
|
|
else:
|
|
pyautogui_code.append("pyautogui.click()")
|
|
|
|
elif action == "right_click":
|
|
if "coordinate" in args:
|
|
x, y = args["coordinate"]
|
|
adj_x, adj_y = adjust_coordinates(x, y)
|
|
pyautogui_code.append(
|
|
f"pyautogui.rightClick({adj_x}, {adj_y})"
|
|
)
|
|
else:
|
|
pyautogui_code.append("pyautogui.rightClick()")
|
|
|
|
elif action == "middle_click":
|
|
if "coordinate" in args:
|
|
x, y = args["coordinate"]
|
|
adj_x, adj_y = adjust_coordinates(x, y)
|
|
pyautogui_code.append(
|
|
f"pyautogui.middleClick({adj_x}, {adj_y})"
|
|
)
|
|
else:
|
|
pyautogui_code.append("pyautogui.middleClick()")
|
|
|
|
elif action == "double_click":
|
|
if "coordinate" in args:
|
|
x, y = args["coordinate"]
|
|
adj_x, adj_y = adjust_coordinates(x, y)
|
|
pyautogui_code.append(
|
|
f"pyautogui.doubleClick({adj_x}, {adj_y})"
|
|
)
|
|
else:
|
|
pyautogui_code.append("pyautogui.doubleClick()")
|
|
|
|
elif action == "triple_click":
|
|
if "coordinate" in args:
|
|
x, y = args["coordinate"]
|
|
adj_x, adj_y = adjust_coordinates(x, y)
|
|
pyautogui_code.append(
|
|
f"pyautogui.tripleClick({adj_x}, {adj_y})"
|
|
)
|
|
else:
|
|
pyautogui_code.append("pyautogui.tripleClick()")
|
|
|
|
elif action == "type":
|
|
text = args.get("text", "")
|
|
|
|
try:
|
|
text = text.encode('latin-1', 'backslashreplace').decode('unicode_escape')
|
|
except Exception as e:
|
|
logger.error(f"Failed to unescape text: {e}")
|
|
|
|
logger.info(f"Pyautogui code[before rewrite]: {text}")
|
|
|
|
result = ""
|
|
for char in text:
|
|
if char == '\n':
|
|
result += "pyautogui.press('enter')\n"
|
|
elif char == "'":
|
|
result += 'pyautogui.press("\'")\n'
|
|
elif char == '\\':
|
|
result += "pyautogui.press('\\\\')\n"
|
|
elif char == '"':
|
|
result += "pyautogui.press('\"')\n"
|
|
else:
|
|
result += f"pyautogui.press('{char}')\n"
|
|
|
|
pyautogui_code.append(result)
|
|
logger.info(f"Pyautogui code[after rewrite]: {pyautogui_code}")
|
|
|
|
|
|
elif action == "key":
|
|
keys = _clean_keys(args.get("keys", []))
|
|
|
|
keys_str = ", ".join([f"'{key}'" for key in keys])
|
|
if len(keys) > 1:
|
|
pyautogui_code.append(f"pyautogui.hotkey({keys_str})")
|
|
else:
|
|
pyautogui_code.append(f"pyautogui.press({keys_str})")
|
|
|
|
elif action == "key_down":
|
|
keys = _clean_keys(args.get("keys", []))
|
|
for k in keys:
|
|
pyautogui_code.append(f"pyautogui.keyDown('{k}')")
|
|
|
|
elif action == "key_up":
|
|
keys = _clean_keys(args.get("keys", []))
|
|
for k in reversed(keys):
|
|
pyautogui_code.append(f"pyautogui.keyUp('{k}')")
|
|
|
|
elif action == "scroll":
|
|
pixels = args.get("pixels", 0)
|
|
pyautogui_code.append(f"pyautogui.scroll({pixels})")
|
|
|
|
elif action == "wait":
|
|
pyautogui_code.append("WAIT")
|
|
|
|
elif action == "terminate":
|
|
# Termination should respect status:
|
|
# - success -> DONE
|
|
# - failure -> FAIL
|
|
# Backward compatible: missing status defaults to success.
|
|
status = args.get("status", "success")
|
|
if str(status).lower() == "failure":
|
|
pyautogui_code.append("FAIL")
|
|
else:
|
|
pyautogui_code.append("DONE")
|
|
|
|
elif action == "mouse_move":
|
|
if "coordinate" in args:
|
|
x, y = args["coordinate"]
|
|
adj_x, adj_y = adjust_coordinates(x, y)
|
|
pyautogui_code.append(
|
|
f"pyautogui.moveTo({adj_x}, {adj_y})"
|
|
)
|
|
else:
|
|
pyautogui_code.append("pyautogui.moveTo(0, 0)")
|
|
|
|
elif action == "left_click_drag":
|
|
if "coordinate" in args:
|
|
x, y = args["coordinate"]
|
|
adj_x, adj_y = adjust_coordinates(x, y)
|
|
duration = args.get("duration", 0.5)
|
|
pyautogui_code.append(
|
|
f"pyautogui.dragTo({adj_x}, {adj_y}, duration={duration})"
|
|
)
|
|
else:
|
|
pyautogui_code.append("pyautogui.dragTo(0, 0)")
|
|
except (json.JSONDecodeError, KeyError) as e:
|
|
logger.error(f"Failed to parse tool call: {e}")
|
|
|
|
lines = response.split("\n")
|
|
inside_tool_call = False
|
|
current_tool_call: List[str] = []
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
if line.lower().startswith(("action:")):
|
|
if not low_level_instruction:
|
|
low_level_instruction = line.split("Action:")[-1].strip()
|
|
continue
|
|
|
|
if line.startswith("<tool_call>"):
|
|
inside_tool_call = True
|
|
continue
|
|
elif line.startswith("</tool_call>"):
|
|
if current_tool_call:
|
|
process_tool_call("\n".join(current_tool_call))
|
|
current_tool_call = []
|
|
inside_tool_call = False
|
|
continue
|
|
|
|
if inside_tool_call:
|
|
current_tool_call.append(line)
|
|
continue
|
|
|
|
if line.startswith("{") and line.endswith("}"):
|
|
try:
|
|
json_obj = json.loads(line)
|
|
if "name" in json_obj and "arguments" in json_obj:
|
|
process_tool_call(line)
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
if current_tool_call:
|
|
process_tool_call("\n".join(current_tool_call))
|
|
|
|
if not low_level_instruction and len(pyautogui_code) > 0:
|
|
first_action = pyautogui_code[0]
|
|
if "." in first_action:
|
|
action_type = first_action.split(".", 1)[1].split("(", 1)[0]
|
|
else:
|
|
action_type = first_action.lower()
|
|
low_level_instruction = f"Performing {action_type} action"
|
|
|
|
return low_level_instruction, pyautogui_code
|
|
|
|
|
|
|
|
def _predict_s1(self, instruction, obs, processed_b64):
|
|
messages = [{"role": "system", "content": S1_SYSTEM_PROMPT.format(password=self.password)}]
|
|
|
|
# Reconstruct History Logic for S1 mode
|
|
history_step_texts = []
|
|
|
|
for i in range(len(self.actions)):
|
|
cot = self.cots[i] if i < len(self.cots) else {}
|
|
|
|
# Step Content string
|
|
step_content = S1_STEP_TEMPLATE.format(step_num=i+1) + S1_ACTION_HISTORY_TEMPLATE.format(action=cot.get('action', ''))
|
|
|
|
if i > len(self.actions) - self.max_history_turns:
|
|
# Recent history: Add User(Image) and Assistant(Text)
|
|
if i < len(self.screenshots) - 1: # Screenshot exists for this step
|
|
img = self.screenshots[i]
|
|
messages.append({
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img}"}}
|
|
]
|
|
})
|
|
messages.append({"role": "assistant", "content": step_content})
|
|
else:
|
|
# Old history: Collect text
|
|
history_step_texts.append(step_content)
|
|
# If this is the last step before the recent window, flush collected texts
|
|
if i == len(self.actions) - self.max_history_turns:
|
|
messages.append({
|
|
"role": "assistant",
|
|
"content": "\n".join(history_step_texts)
|
|
})
|
|
|
|
# Current
|
|
messages.append({
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{processed_b64}"}},
|
|
{"type": "text", "text": S1_INSTRUTION_TEMPLATE.format(instruction=instruction)}
|
|
]
|
|
})
|
|
|
|
response = self.call_llm({
|
|
"model": self.model,
|
|
"messages": messages,
|
|
"max_tokens": self.max_tokens
|
|
})
|
|
|
|
low_level, codes, cot_data = self._parse_response_s1(response)
|
|
|
|
self.observations.append(obs)
|
|
self.cots.append(cot_data)
|
|
self.actions.append(low_level)
|
|
self.responses.append(response)
|
|
|
|
return response, codes
|
|
|
|
|
|
def _parse_response_s1(self, response):
|
|
sections = {}
|
|
# Simple Regex Parsing
|
|
for key, pattern in [
|
|
('observation', r'#{1,2}\s*Observation\s*:?[\n\r]+(.*?)(?=^#{1,2}\s|$)'),
|
|
('thought', r'#{1,2}\s*Thought\s*:?[\n\r]+(.*?)(?=^#{1,2}\s|$)'),
|
|
('action', r'#{1,2}\s*Action\s*:?[\n\r]+(.*?)(?=^#{1,2}\s|$)')
|
|
]:
|
|
m = re.search(pattern, response, re.DOTALL | re.MULTILINE)
|
|
if m: sections[key] = m.group(1).strip()
|
|
|
|
code_blocks = re.findall(r'```(?:code|python)?\s*(.*?)\s*```', response, re.DOTALL | re.IGNORECASE)
|
|
code = code_blocks[-1].strip() if code_blocks else "FAIL"
|
|
|
|
sections['code'] = code
|
|
|
|
# Post-process code
|
|
if "computer.terminate" in code:
|
|
final_code = ["DONE"] if "success" in code.lower() else ["FAIL"]
|
|
elif "computer.wait" in code:
|
|
final_code = ["WAIT"]
|
|
else:
|
|
# Project coordinates
|
|
code = project_coordinate_to_absolute_scale(
|
|
code,
|
|
self.screen_size[0],
|
|
self.screen_size[1],
|
|
self.coordinate_type,
|
|
self.resize_factor
|
|
)
|
|
logger.info(f"[rewrite before]: {code}")
|
|
final_code = [rewrite_pyautogui_text_inputs(code)]
|
|
logger.info(f"[rewrite after]: {final_code}")
|
|
|
|
return sections.get('action', 'Acting'), final_code, sections
|
|
|
|
|
|
@staticmethod
|
|
def _should_giveup_on_context_error(e):
|
|
"""对于 context length 相关的错误,立即放弃重试,交给外层处理"""
|
|
error_str = str(e)
|
|
return "Too Large" in error_str or "context_length_exceeded" in error_str or "413" in error_str
|
|
|
|
@backoff.on_exception(backoff.constant, Exception, interval=30, max_tries=10, giveup=_should_giveup_on_context_error.__func__)
|
|
def call_llm(self, payload):
|
|
"""Unified OpenAI-compatible API call"""
|
|
# Get env vars
|
|
base_url = os.environ.get("OPENAI_BASE_URL", "url-xxx")
|
|
api_key = os.environ.get("OPENAI_API_KEY", "sk-xxx")
|
|
|
|
client = openai.OpenAI(base_url=base_url, api_key=api_key)
|
|
|
|
messages = payload["messages"]
|
|
log_messages(messages, "LLM Request")
|
|
|
|
params = {
|
|
"model": payload["model"],
|
|
"messages": messages,
|
|
"max_tokens": payload["max_tokens"],
|
|
"temperature": self.temperature,
|
|
"top_p": self.top_p
|
|
}
|
|
|
|
try:
|
|
resp = client.chat.completions.create(**params)
|
|
content = resp.choices[0].message.content
|
|
logger.info(f"LLM Response:\n{content}")
|
|
return content
|
|
except Exception as e:
|
|
logger.error(f"LLM Call failed: {e}")
|
|
raise e
|