Files
sci-gui-agent-benchmark/mm_agents/uipath/grounder_client.py
alexandruilie7 f59cf00cae Add ui agent (#343)
* add uipath agent

* readme update
2025-09-24 19:42:46 +08:00

44 lines
1.4 KiB
Python

import httpx
import mm_agents.uipath.utils as utils
import os
class GrounderClient(object):
def __init__(self):
# Proxy for hosting UI-TARS + UiElementPredictor
# Could be replaced with a VLLM server and grounder (UI-TARS) specific processing
# Or any other grounder
self.url = ""
async def predict(
self, image_base64: str, action_description: str, action: str | None = None
) -> utils.GroundingOutput:
request = utils.GroundingRequest(
description=action_description,
image_base64=image_base64,
action_type=action,
)
api_key = os.getenv("SERVICE_KEY")
async with httpx.AsyncClient() as client:
response = await client.post(
self.url,
json={
"image_base64": request.image_base64,
"action_description": request.description,
"action": request.action_type,
},
headers={
"X-API-KEY": api_key
},
timeout=100.0,
)
if response.status_code != 200:
raise ValueError(f"Prediction failed: {response.text}")
data = response.json()
return utils.GroundingOutput(
description=data["description"],
position=tuple(data["position"]),
)