ver Jan29th
updated the position of SoM marks
This commit is contained in:
@@ -1 +1 @@
|
|||||||
baseline
|
baseline_som
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import ctypes
|
#import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@@ -43,9 +43,11 @@ logger.addHandler(sdebug_handler)
|
|||||||
|
|
||||||
logger = logging.getLogger("desktopenv.experiment")
|
logger = logging.getLogger("desktopenv.experiment")
|
||||||
|
|
||||||
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
|
#PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
|
||||||
|
|
||||||
|
|
||||||
|
PATH_TO_VM = "../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
|
||||||
|
|
||||||
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
|
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
|
||||||
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
|
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
|
||||||
env = DesktopEnv(
|
env = DesktopEnv(
|
||||||
@@ -125,14 +127,16 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
|
|||||||
def main(example_class, example_id):
|
def main(example_class, example_id):
|
||||||
action_space = "pyautogui"
|
action_space = "pyautogui"
|
||||||
gpt4_model = "gpt-4-vision-preview"
|
gpt4_model = "gpt-4-vision-preview"
|
||||||
gemini_model = "gemini-pro-vision"
|
#gemini_model = "gemini-pro-vision"
|
||||||
|
|
||||||
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
|
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
|
||||||
example = json.load(f)
|
example = json.load(f)
|
||||||
example["snapshot"] = "exp_v1"
|
#example["snapshot"] = "exp_v1"
|
||||||
|
# example["snapshot"] = "exp_setup4"
|
||||||
|
example["snapshot"] = "Snapshot 30"
|
||||||
|
|
||||||
api_key = os.environ.get("OPENAI_API_KEY")
|
api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
|
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, max_tokens=1000, instruction=example['instruction'],
|
||||||
action_space=action_space, exp="som")
|
action_space=action_space, exp="som")
|
||||||
|
|
||||||
# api_key = os.environ.get("GENAI_API_KEY")
|
# api_key = os.environ.get("GENAI_API_KEY")
|
||||||
@@ -149,7 +153,7 @@ def main(example_class, example_id):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
xx_list = [
|
xx_list = [ "01b269ae-2111-4a07-81fd-3fcd711993b0"
|
||||||
]
|
]
|
||||||
for example_id in xx_list:
|
for example_id in xx_list:
|
||||||
main("xx", example_id)
|
main("libreoffice_calc", example_id)
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import xml.etree.ElementTree as ET
|
|||||||
|
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
def find_leaf_nodes(xlm_file_str):
|
def find_leaf_nodes(xlm_file_str):
|
||||||
if not xlm_file_str:
|
if not xlm_file_str:
|
||||||
@@ -105,15 +106,20 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
|
|||||||
|
|
||||||
# Draw index number at the bottom left of the bounding box with black background
|
# Draw index number at the bottom left of the bounding box with black background
|
||||||
text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
|
text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
|
||||||
draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
|
text_bbox: Tuple[int, int ,int ,int] = draw.textbbox(text_position, str(index), font=font, anchor="lb")
|
||||||
draw.text(text_position, str(index), font=font, fill="white")
|
#offset: int = bottom_right[1]-text_bbox[3]
|
||||||
|
#text_bbox = (text_bbox[0], text_bbox[1]+offset, text_bbox[2], text_bbox[3]+offset)
|
||||||
|
|
||||||
|
#draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
|
||||||
|
draw.rectangle(text_bbox, fill='black')
|
||||||
|
draw.text(text_position, str(index), font=font, anchor="lb", fill="white")
|
||||||
index += 1
|
index += 1
|
||||||
|
|
||||||
# each mark is an x, y, w, h tuple
|
# each mark is an x, y, w, h tuple
|
||||||
marks.append([coords[0], coords[1], size[0], size[1]])
|
marks.append([coords[0], coords[1], size[0], size[1]])
|
||||||
drew_nodes.append(_node)
|
drew_nodes.append(_node)
|
||||||
|
|
||||||
except ValueError as e:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Save the result
|
# Save the result
|
||||||
|
|||||||
@@ -38,3 +38,4 @@ pydrive
|
|||||||
fastdtw
|
fastdtw
|
||||||
|
|
||||||
openai
|
openai
|
||||||
|
func-timeout
|
||||||
|
|||||||
Reference in New Issue
Block a user