Fix one multi_app example; remove some broken examples; Support downsampling
This commit is contained in:
@@ -80,9 +80,11 @@ def filter_nodes(root: ET, platform="ubuntu", check_image=False):
|
||||
return filtered_nodes
|
||||
|
||||
|
||||
def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
|
||||
def draw_bounding_boxes(nodes, image_file_path, output_image_file_path, down_sampling_ratio=1.0):
|
||||
# Load the screenshot image
|
||||
image = Image.open(image_file_path)
|
||||
if float(down_sampling_ratio) != 1.0:
|
||||
image = image.resize((int(image.size[0] * down_sampling_ratio), int(image.size[1] * down_sampling_ratio)))
|
||||
draw = ImageDraw.Draw(image)
|
||||
marks = []
|
||||
drew_nodes = []
|
||||
@@ -108,6 +110,11 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
|
||||
coords = tuple(map(int, coords_str.strip('()').split(', ')))
|
||||
size = tuple(map(int, size_str.strip('()').split(', ')))
|
||||
|
||||
if float(down_sampling_ratio) != 1.0:
|
||||
# Downsample the coordinates and size
|
||||
coords = tuple(int(coord * down_sampling_ratio) for coord in coords)
|
||||
size = tuple(int(s * down_sampling_ratio) for s in size)
|
||||
|
||||
# Check for negative sizes
|
||||
if size[0] <= 0 or size[1] <= 0:
|
||||
raise ValueError(f"Size must be positive, got: {size}")
|
||||
|
||||
@@ -6,17 +6,14 @@ import re
|
||||
import time
|
||||
import uuid
|
||||
import xml.etree.ElementTree as ET
|
||||
import numpy as np
|
||||
from http import HTTPStatus
|
||||
from io import BytesIO
|
||||
from typing import Dict, List, Tuple, Union
|
||||
|
||||
import backoff
|
||||
import dashscope
|
||||
import google.generativeai as genai
|
||||
import openai
|
||||
import requests
|
||||
import cv2
|
||||
from PIL import Image
|
||||
from google.api_core.exceptions import InvalidArgument
|
||||
|
||||
@@ -28,14 +25,6 @@ from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_S
|
||||
|
||||
logger = logging.getLogger("desktopenv.agent")
|
||||
|
||||
def downsample_image(img: Union[str, np.ndarray], ratio: Tuple[float, float]):
|
||||
fx, fy = ratio
|
||||
if isinstance(img, str):
|
||||
img = cv2.imread(img)
|
||||
|
||||
resized = cv2.resize(img, None, fx=fx, fy=fy, interpolation=cv2.INTER_AREA)
|
||||
return resized
|
||||
|
||||
|
||||
# Function to encode the image
|
||||
def encode_image(image_path):
|
||||
|
||||
Reference in New Issue
Block a user