ver Jan29th

updated the position of SoM marks
This commit is contained in:
David Chang
2024-01-29 21:49:53 +08:00
parent e37f0037c4
commit d8a497a417
4 changed files with 23 additions and 12 deletions

View File

@@ -1 +1 @@
baseline baseline_som

View File

@@ -1,4 +1,4 @@
import ctypes #import ctypes
import datetime import datetime
import json import json
import logging import logging
@@ -43,9 +43,11 @@ logger.addHandler(sdebug_handler)
logger = logging.getLogger("desktopenv.experiment") logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx" #PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
PATH_TO_VM = "../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True): def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json") trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv( env = DesktopEnv(
@@ -125,14 +127,16 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
def main(example_class, example_id): def main(example_class, example_id):
action_space = "pyautogui" action_space = "pyautogui"
gpt4_model = "gpt-4-vision-preview" gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision" #gemini_model = "gemini-pro-vision"
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f: with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f) example = json.load(f)
example["snapshot"] = "exp_v1" #example["snapshot"] = "exp_v1"
# example["snapshot"] = "exp_setup4"
example["snapshot"] = "Snapshot 30"
api_key = os.environ.get("OPENAI_API_KEY") api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, max_tokens=1000, instruction=example['instruction'],
action_space=action_space, exp="som") action_space=action_space, exp="som")
# api_key = os.environ.get("GENAI_API_KEY") # api_key = os.environ.get("GENAI_API_KEY")
@@ -149,7 +153,7 @@ def main(example_class, example_id):
if __name__ == '__main__': if __name__ == '__main__':
xx_list = [ xx_list = [ "01b269ae-2111-4a07-81fd-3fcd711993b0"
] ]
for example_id in xx_list: for example_id in xx_list:
main("xx", example_id) main("libreoffice_calc", example_id)

View File

@@ -2,6 +2,7 @@ import xml.etree.ElementTree as ET
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
from typing import Tuple
def find_leaf_nodes(xlm_file_str): def find_leaf_nodes(xlm_file_str):
if not xlm_file_str: if not xlm_file_str:
@@ -105,15 +106,20 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
# Draw index number at the bottom left of the bounding box with black background # Draw index number at the bottom left of the bounding box with black background
text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black') text_bbox: Tuple[int, int ,int ,int] = draw.textbbox(text_position, str(index), font=font, anchor="lb")
draw.text(text_position, str(index), font=font, fill="white") #offset: int = bottom_right[1]-text_bbox[3]
#text_bbox = (text_bbox[0], text_bbox[1]+offset, text_bbox[2], text_bbox[3]+offset)
#draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
draw.rectangle(text_bbox, fill='black')
draw.text(text_position, str(index), font=font, anchor="lb", fill="white")
index += 1 index += 1
# each mark is an x, y, w, h tuple # each mark is an x, y, w, h tuple
marks.append([coords[0], coords[1], size[0], size[1]]) marks.append([coords[0], coords[1], size[0], size[1]])
drew_nodes.append(_node) drew_nodes.append(_node)
except ValueError as e: except ValueError:
pass pass
# Save the result # Save the result

View File

@@ -38,3 +38,4 @@ pydrive
fastdtw fastdtw
openai openai
func-timeout