Feat/claude cua support (#253)
* feat: add claude support * feat: add script for end-to-end evaluation with logging and task distribution * feat&fix: add tool result handling and update model default in evaluation script * chore: remove run_test_env.py script * feat&fix: implement action parsing for tool calls and update default action space * fix: update text formatting in action parsing and replace logger import * feat&fix: implement action parsing for tool calls and add screen size handling * feat: add setup instructions for Anthropic API integration * feat: add notice about image size limitations for Anthropic API * Delete test_env/logger.py * Delete test_env/utils.py
This commit is contained in:
144
mm_agents/anthropic/tools/bash.py
Normal file
144
mm_agents/anthropic/tools/bash.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import asyncio
|
||||
import os
|
||||
from typing import ClassVar, Literal, Optional
|
||||
|
||||
from anthropic.types.beta import BetaToolBash20241022Param
|
||||
|
||||
from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult
|
||||
|
||||
|
||||
class _BashSession:
|
||||
"""A session of a bash shell."""
|
||||
|
||||
_started: bool
|
||||
_process: asyncio.subprocess.Process
|
||||
|
||||
command: str = "/bin/bash"
|
||||
_output_delay: float = 0.2 # seconds
|
||||
_timeout: float = 120.0 # seconds
|
||||
_sentinel: str = "<<exit>>"
|
||||
|
||||
def __init__(self):
|
||||
self._started = False
|
||||
self._timed_out = False
|
||||
|
||||
async def start(self):
|
||||
if self._started:
|
||||
return
|
||||
|
||||
self._process = await asyncio.create_subprocess_shell(
|
||||
self.command,
|
||||
preexec_fn=os.setsid,
|
||||
shell=True,
|
||||
bufsize=0,
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
|
||||
self._started = True
|
||||
|
||||
def stop(self):
|
||||
"""Terminate the bash shell."""
|
||||
if not self._started:
|
||||
raise ToolError("Session has not started.")
|
||||
if self._process.returncode is not None:
|
||||
return
|
||||
self._process.terminate()
|
||||
|
||||
async def run(self, command: str):
|
||||
"""Execute a command in the bash shell."""
|
||||
if not self._started:
|
||||
raise ToolError("Session has not started.")
|
||||
if self._process.returncode is not None:
|
||||
return ToolResult(
|
||||
system="tool must be restarted",
|
||||
error=f"bash has exited with returncode {self._process.returncode}",
|
||||
)
|
||||
if self._timed_out:
|
||||
raise ToolError(
|
||||
f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
|
||||
)
|
||||
|
||||
# we know these are not None because we created the process with PIPEs
|
||||
assert self._process.stdin
|
||||
assert self._process.stdout
|
||||
assert self._process.stderr
|
||||
|
||||
# send command to the process
|
||||
self._process.stdin.write(
|
||||
command.encode() + f"; echo '{self._sentinel}'\n".encode()
|
||||
)
|
||||
await self._process.stdin.drain()
|
||||
|
||||
# read output from the process, until the sentinel is found
|
||||
try:
|
||||
async with asyncio.timeout(self._timeout):
|
||||
while True:
|
||||
await asyncio.sleep(self._output_delay)
|
||||
# if we read directly from stdout/stderr, it will wait forever for
|
||||
# EOF. use the StreamReader buffer directly instead.
|
||||
output = self._process.stdout._buffer.decode() # pyright: ignore[reportAttributeAccessIssue]
|
||||
if self._sentinel in output:
|
||||
# strip the sentinel and break
|
||||
output = output[: output.index(self._sentinel)]
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
self._timed_out = True
|
||||
raise ToolError(
|
||||
f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
|
||||
) from None
|
||||
|
||||
if output.endswith("\n"):
|
||||
output = output[:-1]
|
||||
|
||||
error = self._process.stderr._buffer.decode() # pyright: ignore[reportAttributeAccessIssue]
|
||||
if error.endswith("\n"):
|
||||
error = error[:-1]
|
||||
|
||||
# clear the buffers so that the next output can be read correctly
|
||||
self._process.stdout._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
|
||||
self._process.stderr._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
|
||||
|
||||
return CLIResult(output=output, error=error)
|
||||
|
||||
|
||||
class BashTool(BaseAnthropicTool):
|
||||
"""
|
||||
A tool that allows the agent to run bash commands.
|
||||
The tool parameters are defined by Anthropic and are not editable.
|
||||
"""
|
||||
|
||||
_session: Optional[_BashSession]
|
||||
name: ClassVar[Literal["bash"]] = "bash"
|
||||
api_type: ClassVar[Literal["bash_20241022"]] = "bash_20241022"
|
||||
|
||||
def __init__(self):
|
||||
self._session = None
|
||||
super().__init__()
|
||||
|
||||
async def __call__(
|
||||
self, command: Optional[str] = None, restart: bool = False, **kwargs
|
||||
):
|
||||
if restart:
|
||||
if self._session:
|
||||
self._session.stop()
|
||||
self._session = _BashSession()
|
||||
await self._session.start()
|
||||
|
||||
return ToolResult(system="tool has been restarted.")
|
||||
|
||||
if self._session is None:
|
||||
self._session = _BashSession()
|
||||
await self._session.start()
|
||||
|
||||
if command is not None:
|
||||
return await self._session.run(command)
|
||||
|
||||
raise ToolError("no command provided.")
|
||||
|
||||
def to_params(self) -> BetaToolBash20241022Param:
|
||||
return {
|
||||
"type": self.api_type,
|
||||
"name": self.name,
|
||||
}
|
||||
Reference in New Issue
Block a user