43
mm_agents/uipath/grounder_client.py
Normal file
43
mm_agents/uipath/grounder_client.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import httpx
|
||||
import mm_agents.uipath.utils as utils
|
||||
import os
|
||||
|
||||
class GrounderClient(object):
|
||||
def __init__(self):
|
||||
# Proxy for hosting UI-TARS + UiElementPredictor
|
||||
# Could be replaced with a VLLM server and grounder (UI-TARS) specific processing
|
||||
# Or any other grounder
|
||||
self.url = ""
|
||||
|
||||
async def predict(
|
||||
self, image_base64: str, action_description: str, action: str | None = None
|
||||
) -> utils.GroundingOutput:
|
||||
request = utils.GroundingRequest(
|
||||
description=action_description,
|
||||
image_base64=image_base64,
|
||||
action_type=action,
|
||||
)
|
||||
api_key = os.getenv("SERVICE_KEY")
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
self.url,
|
||||
json={
|
||||
"image_base64": request.image_base64,
|
||||
"action_description": request.description,
|
||||
"action": request.action_type,
|
||||
},
|
||||
headers={
|
||||
"X-API-KEY": api_key
|
||||
},
|
||||
timeout=100.0,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise ValueError(f"Prediction failed: {response.text}")
|
||||
|
||||
data = response.json()
|
||||
return utils.GroundingOutput(
|
||||
description=data["description"],
|
||||
position=tuple(data["position"]),
|
||||
)
|
||||
Reference in New Issue
Block a user