351 lines
14 KiB
Python
Executable File
351 lines
14 KiB
Python
Executable File
import logging
|
|
from typing import Dict, List, Tuple, Optional
|
|
|
|
from mm_agents.os_symphony.memory.procedural_memory import PROCEDURAL_MEMORY
|
|
from mm_agents.os_symphony.utils.common_utils import call_llm_safe, parse_code_from_string
|
|
from mm_agents.os_symphony.core.mllm import LMMAgent
|
|
|
|
logger = logging.getLogger("desktopenv.coder_agent")
|
|
|
|
|
|
def extract_code_block(action: str) -> Tuple[Optional[str], Optional[str]]:
|
|
"""Extract code and determine type from action string."""
|
|
if "```python" in action:
|
|
code_type = "python"
|
|
code = action.split("```python")[1].split("```")[0].strip()
|
|
elif "```bash" in action:
|
|
code_type = "bash"
|
|
code = action.split("```bash")[1].split("```")[0].strip()
|
|
elif "```" in action:
|
|
code_type = None
|
|
code = action.split("```")[1].split("```")[0].strip()
|
|
else:
|
|
code_type = None
|
|
code = None
|
|
|
|
logger.debug(
|
|
f"Extracted code block: type={code_type}, length={len(code) if code else 0}"
|
|
)
|
|
return code_type, code
|
|
|
|
|
|
def execute_code(code_type: str, code: str, env_controller) -> Dict:
|
|
"""Execute code based on its type."""
|
|
# Log the full code being executed (untruncated)
|
|
logger.info(f"CODING_AGENT_CODE_EXECUTION - Type: {code_type}\nCode:\n{code}")
|
|
|
|
try:
|
|
if code_type == "bash":
|
|
result = env_controller.run_bash_script(code, timeout=30)
|
|
elif code_type == "python":
|
|
result = env_controller.run_python_script(code)
|
|
else:
|
|
result = {"status": "error", "error": f"Unknown code type: {code_type}"}
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error executing {code_type} code: {e}")
|
|
return {"status": "error", "error": str(e)}
|
|
|
|
|
|
def format_result(result: Dict, step_count: int) -> str:
|
|
"""Format execution result into context string."""
|
|
if not result:
|
|
logger.warning(f"Step {step_count + 1}: No result returned from execution")
|
|
return f"""
|
|
Step {step_count + 1} Error:
|
|
Error: No result returned from execution
|
|
"""
|
|
|
|
status = result.get("status", "unknown")
|
|
return_code = result.get("returncode", result.get("return_code", -1))
|
|
|
|
# Handle different response structures for bash vs python
|
|
if "returncode" in result:
|
|
# Bash script response
|
|
output = result.get("output", "") # Contains both stdout and stderr merged
|
|
error = result.get("error", "") # Always empty for bash
|
|
else:
|
|
# Python script response
|
|
output = result.get("output", "") # stdout only
|
|
error = result.get("error", "") # stderr only
|
|
|
|
logger.debug(f"Step {step_count + 1}: Status={status}, Return Code={return_code}")
|
|
|
|
# Format with better structure for multi-line outputs
|
|
result_text = f"Step {step_count + 1} Result:\n"
|
|
result_text += f"Status: {status}\n"
|
|
result_text += f"Return Code: {return_code}\n"
|
|
|
|
if output:
|
|
result_text += f"Output:\n{output}\n"
|
|
|
|
if error:
|
|
result_text += f"Error:\n{error}\n"
|
|
|
|
return result_text
|
|
|
|
|
|
class CoderAgent:
|
|
"""A dedicated agent for executing code with a budget of steps."""
|
|
|
|
def __init__(self, engine_params: Dict, client_password: str, platform: str = "linux"):
|
|
"""Initialize the CodeAgent."""
|
|
if not engine_params:
|
|
raise ValueError("engine_params cannot be None or empty")
|
|
|
|
self.engine_params = engine_params
|
|
self.budget = engine_params.get("budget", 20)
|
|
self.temperature = engine_params.get("temperature", 0.1)
|
|
self.agent = None
|
|
self.platform = platform
|
|
self.client_password = client_password
|
|
|
|
logger.info(f"CodeAgent initialized with budget={self.budget} and platform={self.platform}")
|
|
self.reset()
|
|
|
|
def reset(self):
|
|
"""Reset the code agent state."""
|
|
logger.debug("Resetting CodeAgent state")
|
|
self.agent = LMMAgent(
|
|
engine_params=self.engine_params,
|
|
system_prompt=PROCEDURAL_MEMORY.construct_coder_procedural_memory(platform=self.platform, client_password=self.client_password)
|
|
)
|
|
|
|
def execute(self, task_instruction: str, screenshot: str, env_controller) -> Dict:
|
|
"""Execute code for the given task with a budget of steps."""
|
|
if env_controller is None:
|
|
raise ValueError("env_controller is required for code execution")
|
|
|
|
print(f"\n🚀 STARTING CODE EXECUTION")
|
|
print("=" * 60)
|
|
print(f"Task: {task_instruction}")
|
|
print(f"Budget: {self.budget} steps")
|
|
print("=" * 60)
|
|
|
|
logger.info(f"Starting code execution for task: {task_instruction}")
|
|
logger.info(f"Budget: {self.budget} steps")
|
|
|
|
self.reset()
|
|
|
|
|
|
# Add initial task instruction and screenshot context as user message
|
|
context = (
|
|
f"Task: {task_instruction}\n\nCurrent screenshot is provided for context."
|
|
)
|
|
self.agent.add_message(context, image_content=screenshot, role="user")
|
|
|
|
step_count = 0
|
|
execution_history = []
|
|
execution_result_history = []
|
|
while step_count < self.budget:
|
|
logger.info(f"Step {step_count + 1}/{self.budget}")
|
|
|
|
# Get assistant response (thoughts and code)
|
|
response = call_llm_safe(self.agent, temperature=self.temperature)
|
|
|
|
# Print to terminal for immediate visibility
|
|
# print(f"\n🤖 CODING AGENT RESPONSE - Step {step_count + 1}/{self.budget}")
|
|
# print("=" * 60)
|
|
# print(response)
|
|
# print("=" * 60)
|
|
|
|
# Log the latest message from the coding agent (untruncated)
|
|
logger.info(
|
|
f"CODING_AGENT_LATEST_MESSAGE - Step {step_count + 1}:\n{response}"
|
|
)
|
|
|
|
# Check if response is None or empty
|
|
if not response or response.strip() == "":
|
|
error_msg = f"Step {step_count + 1}: LLM returned empty response"
|
|
logger.error(error_msg)
|
|
raise RuntimeError(error_msg)
|
|
|
|
# Parse the response to extract action
|
|
action = parse_code_from_string(response)
|
|
thoughts = response
|
|
|
|
execution_history.append(
|
|
{"step": step_count + 1, "action": action, "thoughts": thoughts}
|
|
)
|
|
|
|
# Check for completion signals
|
|
action_upper = action.upper().strip()
|
|
if action_upper == "DONE":
|
|
print(f"\n✅ TASK COMPLETED - Step {step_count + 1}")
|
|
print("=" * 60)
|
|
print("Agent signaled task completion")
|
|
print("=" * 60)
|
|
logger.info(f"Step {step_count + 1}: Task completed successfully")
|
|
completion_reason = "DONE"
|
|
break
|
|
elif action_upper == "FAIL":
|
|
print(f"\n❌ TASK FAILED - Step {step_count + 1}")
|
|
print("=" * 60)
|
|
print("Agent signaled task failure")
|
|
print("=" * 60)
|
|
logger.info(f"Step {step_count + 1}: Task failed by agent request")
|
|
completion_reason = "FAIL"
|
|
break
|
|
elif action_upper == 'INFEASIBLE':
|
|
print(f"\n❌ TASK INFEASIBLE - Step {step_count + 1}")
|
|
print("=" * 60)
|
|
print("Agent signaled task infeasible")
|
|
print("=" * 60)
|
|
logger.info(f"Step {step_count + 1}: Task infeasible by agent request")
|
|
completion_reason = "INFEASIBLE"
|
|
break
|
|
|
|
# Extract and execute code
|
|
code_type, code = extract_code_block(response.split("(Answer)")[-1])
|
|
|
|
if code:
|
|
result = execute_code(code_type, code, env_controller)
|
|
execution_result_history.append(
|
|
{"step": step_count + 1, "result": result}
|
|
)
|
|
# Prepare formatted output and error for logging
|
|
output = result.get("output", "")
|
|
error = result.get("error", "")
|
|
message = result.get("message", "")
|
|
status = result.get("status", "")
|
|
|
|
# Print execution result to terminal for immediate visibility
|
|
print(f"\n⚡ CODE EXECUTION RESULT - Step {step_count + 1}")
|
|
print("-" * 50)
|
|
print(f"Status: {status}")
|
|
if output:
|
|
print(f"Output:\n{output}")
|
|
if error:
|
|
print(f"Error:\n{error}")
|
|
if message and not output and not error:
|
|
print(f"Message:\n{message}")
|
|
print("-" * 50)
|
|
|
|
log_lines = [
|
|
f"CODING_AGENT_EXECUTION_RESULT - Step {step_count + 1}:",
|
|
f"Status: {status}" if status else None,
|
|
]
|
|
|
|
if output:
|
|
log_lines.append(
|
|
"Output:\n" + ("-" * 40) + f"\n{output}\n" + ("-" * 40)
|
|
)
|
|
if error:
|
|
log_lines.append(
|
|
"Error:\n" + ("!" * 40) + f"\n{error}\n" + ("!" * 40)
|
|
)
|
|
if message and not output and not error:
|
|
log_lines.append(
|
|
"Message:\n" + ("-" * 40) + f"\n{message}\n" + ("-" * 40)
|
|
)
|
|
|
|
# Remove None entries and join
|
|
formatted_log = "\n".join([line for line in log_lines if line])
|
|
logger.info(formatted_log)
|
|
else:
|
|
print(f"\n⚠️ NO CODE BLOCK FOUND - Step {step_count + 1}")
|
|
print("-" * 50)
|
|
print("Action did not contain executable code")
|
|
print("-" * 50)
|
|
|
|
logger.warning(f"Step {step_count + 1}: No code block found in action")
|
|
result = {"status": "skipped", "message": "No code block found"}
|
|
logger.info(
|
|
f"CODING_AGENT_EXECUTION_RESULT - Step {step_count + 1}:\n"
|
|
f"Status: skipped\n"
|
|
f"Message:\n{'-' * 40}\n{result['message']}\n{'-' * 40}"
|
|
)
|
|
# Add assistant's thoughts and code to message history
|
|
self.agent.add_message(response, role="assistant")
|
|
|
|
# Process result and add formatted environment results as user message
|
|
result_context = format_result(result, step_count)
|
|
self.agent.add_message(result_context, role="user")
|
|
|
|
step_count += 1
|
|
|
|
# Handle budget exhaustion
|
|
if "completion_reason" not in locals():
|
|
print(f"\n⏰ BUDGET EXHAUSTED - {step_count} steps completed")
|
|
print("=" * 60)
|
|
print(f"Maximum budget of {self.budget} steps reached")
|
|
print("=" * 60)
|
|
logger.info(f"Budget exhausted after {step_count} steps")
|
|
completion_reason = f"BUDGET_EXHAUSTED_AFTER_{step_count}_STEPS"
|
|
|
|
# Generate final summary
|
|
logger.info("Generating execution summary")
|
|
summary = self._generate_summary(execution_history, task_instruction)
|
|
|
|
result = {
|
|
"task_instruction": task_instruction,
|
|
"completion_reason": completion_reason,
|
|
"summary": summary,
|
|
"execution_history": execution_history,
|
|
"execution_result_history": execution_result_history,
|
|
"steps_executed": step_count,
|
|
"budget": self.budget
|
|
}
|
|
|
|
logger.info(f"Code execution completed: steps={step_count}")
|
|
return result
|
|
|
|
def _generate_summary(
|
|
self, execution_history: List[Dict], task_instruction: str
|
|
) -> str:
|
|
"""Generate summary of code execution session."""
|
|
if not execution_history:
|
|
logger.info("No execution history to summarize")
|
|
return "No actions were executed."
|
|
|
|
logger.info(f"Generated summary for {len(execution_history)} steps")
|
|
|
|
# Build detailed execution context for summary agent
|
|
execution_context = f"Task: {task_instruction}\n\nExecution Steps:\n"
|
|
|
|
for step in execution_history:
|
|
step_num = step["step"]
|
|
thoughts = step.get("thoughts", "")
|
|
action = step.get("action", "")
|
|
|
|
execution_context += f"\nStep {step_num}:\n"
|
|
if thoughts:
|
|
execution_context += f"Thoughts: {thoughts}\n"
|
|
execution_context += f"Code: {action}\n"
|
|
|
|
# Create summary prompt with same context as coding agent
|
|
summary_prompt = f"""
|
|
{execution_context}
|
|
|
|
Please provide a concise summary of the code execution session. Focus on:
|
|
|
|
1. The code logic implemented at each step
|
|
2. The outputs and results produced by each code execution
|
|
3. The progression of the solution approach
|
|
|
|
Do not make judgments about success or failure. Simply describe what was attempted and what resulted.
|
|
|
|
Keep the summary under 150 words and use clear, factual language.
|
|
"""
|
|
|
|
# Generate summary using LLM with dedicated summary system prompt
|
|
try:
|
|
summary_agent = LMMAgent(
|
|
engine_params=self.engine_params,
|
|
system_prompt=PROCEDURAL_MEMORY.CODE_SUMMARY_AGENT_PROMPT,
|
|
)
|
|
summary_agent.add_message(summary_prompt, role="user")
|
|
summary = call_llm_safe(summary_agent, temperature=self.temperature)
|
|
|
|
if not summary or summary.strip() == "":
|
|
summary = "Summary generation failed - no response from LLM"
|
|
logger.warning("Summary generation failed - empty response from LLM")
|
|
|
|
except Exception as e:
|
|
summary = f"Summary generation failed: {str(e)}"
|
|
logger.error(f"Error generating summary: {e}")
|
|
|
|
return summary
|