update all ids in experiment_screenshot.py

2024-03-13 21:06:55 +08:00
parent 670e20a248 a7782338d8
commit cee3b93009
24 changed files with 945 additions and 384 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -173,6 +173,8 @@ branch_flag
 branch-config
 *.syncthing.*.tmp
 cache
+version.folder
+at_processing

 test.xlsx
 test2.xlsx
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -167,7 +167,7 @@ class DesktopEnv(gym.Env):
        return screenshot_image_path

    def _set_task_info(self, task_config: Dict[str, Any]):
-        self.snapshot_path = task_config["snapshot"]
+        self.snapshot_path = task_config["snapshot"] # todo: save the snapshot when first start the environment, and then revert to it when reset
        self.task_id: str = task_config["id"]
        self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
        os.makedirs(self.cache_dir, exist_ok=True)
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -178,6 +178,9 @@ import typing


 def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
+    if not pdf1_path or not pdf2_path:
+        return 0.
+
    def extract_images_from_pdf(pdf_path):
        pdf_document = fitz.open(pdf_path)
        images = []
@@ -219,7 +222,10 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
    """
    Compare two archives. Note that the files in the archives should be of the same type.
    """
-    if not pred_path: return 0.
+    file_path = kwargs.pop('file_path', '')
+
+    if not pred_path:
+        return 0.
    pred_folder = os.path.splitext(pred_path)[0] + '_pred'
    gold_folder = os.path.splitext(gold_path)[0] + '_gold'

@@ -227,13 +233,16 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
        shutil.rmtree(pred_folder, ignore_errors=True)
    os.makedirs(pred_folder)
    shutil.unpack_archive(pred_path, pred_folder)
+
    if not os.path.exists(gold_folder):  # use cache if exists
        os.makedirs(gold_folder)
        shutil.unpack_archive(gold_path, gold_folder)

-    pred_files = sorted(os.listdir(pred_folder))
-    gold_files = sorted(os.listdir(gold_folder))
-    if pred_files != gold_files: return 0.
+    pred_files = sorted(os.listdir(os.path.join(pred_folder, file_path)))
+    gold_files = sorted(os.listdir(os.path.join(gold_folder, file_path)))
+
+    if pred_files != gold_files:
+        return 0.

    def get_compare_function():
        file_type = kwargs.pop('file_type', 'text')
@@ -269,8 +278,8 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
    score = 0
    compare_function = get_compare_function()
    for f1, f2 in zip(pred_files, gold_files):
-        fp1 = os.path.join(pred_folder, f1)
-        fp2 = os.path.join(gold_folder, f2)
+        fp1 = os.path.join(pred_folder, file_path, f1)
+        fp2 = os.path.join(gold_folder, file_path, f2)
        score += compare_function(fp1, fp2, **kwargs)
    return score / len(pred_files)

--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -50,7 +50,11 @@ def contains_page_break(docx_file):
    if not docx_file:
        return 0

-    doc = Document(docx_file)
+    try:
+        doc = Document(docx_file)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}

@@ -91,16 +95,24 @@ def compare_docx_files(file1, file2, **options):

    # Determine file types and load documents
    if file1.endswith('.docx') and file2.endswith('.docx'):
-        doc1 = Document(file1)
-        doc2 = Document(file2)
+        try:
+            doc1 = Document(file1)
+            doc2 = Document(file2)
+        except Exception as e:
+            logger.error(f"Error: {e}")
+            return 0
        doc1_paragraphs = [p.text for p in doc1.paragraphs]
        doc2_paragraphs = [p.text for p in doc2.paragraphs]
        if ignore_order:
            doc1_paragraphs = sorted(doc1_paragraphs)
            doc2_paragraphs = sorted(doc2_paragraphs)
    elif file1.endswith('.odt') and file2.endswith('.odt'):
-        doc1 = load(file1)
-        doc2 = load(file2)
+        try:
+            doc1 = load(file1)
+            doc2 = load(file2)
+        except Exception as e:
+            logger.error(f"Error: {e}")
+            return 0
        doc1_paragraphs = get_paragraph_texts_odt(doc1)
        doc2_paragraphs = get_paragraph_texts_odt(doc2)
        if ignore_order:
@@ -153,8 +165,12 @@ def compare_init_lines(file1, file2):
    if not file1 or not file2:
        return 0

-    doc1 = Document(file1)
-    doc2 = Document(file2)
+    try:
+        doc1 = Document(file1)
+        doc2 = Document(file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    doc1_paragraphs = [p.text for p in doc1.paragraphs]
    doc2_paragraphs = [p.text for p in doc2.paragraphs]
@@ -173,8 +189,12 @@ def compare_docx_tables(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0

-    doc1 = Document(docx_file1)
-    doc2 = Document(docx_file2)
+    try:
+        doc1 = Document(docx_file1)
+        doc2 = Document(docx_file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    # get list of tables in docx
    tables1 = doc1.tables
@@ -202,8 +222,12 @@ def compare_docx_images(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0

-    doc1 = Document(docx_file1)
-    doc2 = Document(docx_file2)
+    try:
+        doc1 = Document(docx_file1)
+        doc2 = Document(docx_file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    def extract_images(doc):
        images = []
@@ -240,8 +264,13 @@ def compare_line_spacing(docx_file1, docx_file2):

    if not compare_docx_files(docx_file1, docx_file2):
        return 0
-    doc1 = Document(docx_file1)
-    doc2 = Document(docx_file2)
+
+    try:
+        doc1 = Document(docx_file1)
+        doc2 = Document(docx_file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    if len(doc1.paragraphs) != len(doc2.paragraphs):
        return 0
@@ -265,8 +294,12 @@ def compare_insert_equation(docx_file1, docx_file2):
    if not compare_docx_files(docx_file1, docx_file2):
        return 0

-    doc1 = Document(docx_file1)
-    doc2 = Document(docx_file2)
+    try:
+        doc1 = Document(docx_file1)
+        doc2 = Document(docx_file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    # Compare each paragraph if it contains equation
    for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
@@ -280,7 +313,12 @@ def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
    if not docx_file:
        return 0

-    doc = Document(docx_file)
+    try:
+        doc = Document(docx_file)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0
+
    expected_font = rules["font_name"]

    for paragraph in doc.paragraphs:
@@ -295,8 +333,12 @@ def compare_subscript_contains(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0

-    doc1 = Document(docx_file1)
-    doc2 = Document(docx_file2)
+    try:
+        doc1 = Document(docx_file1)
+        doc2 = Document(docx_file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
        for run1, run2 in zip(para1.runs, para2.runs):
@@ -310,7 +352,11 @@ def has_page_numbers_in_footers(docx_file):
    if not docx_file:
        return 0

-    doc = Document(docx_file)
+    try:
+        doc = Document(docx_file)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    for section in doc.sections:
        footer = section.footer
@@ -327,7 +373,12 @@ def is_first_line_centered(docx_file):
    if not docx_file:
        return 0

-    doc = Document(docx_file)
+    try:
+        doc = Document(docx_file)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0
+
    first_paragraph = doc.paragraphs[0]

    # check if the first line is center justified
@@ -345,8 +396,13 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
    if not docx_file1 or not docx_file2:
        return .0

-    doc1: Document = Document(docx_file1)
-    doc2: Document = Document(docx_file2)
+    try:
+        doc1: Document = Document(docx_file1)
+        doc2: Document = Document(docx_file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return .0
+
    para1 = [p for p in doc1.paragraphs if p.text.strip()]
    para2 = [p for p in doc2.paragraphs if p.text.strip()]
    if len(para1) != len(para2): return .0
@@ -383,8 +439,12 @@ def compare_contains_image(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0

-    doc1 = Document(docx_file1)
-    doc2 = Document(docx_file2)
+    try:
+        doc1 = Document(docx_file1)
+        doc2 = Document(docx_file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
        for run1, run2 in zip(para1.runs, para2.runs):
@@ -400,7 +460,13 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):

    if not compare_docx_files(file_path1, file_path2):
        return 0
-    document = Document(file_path1)
+
+    try:
+        document = Document(file_path1)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0
+
    threshold = kwargs.get('threshold', 3.5)

    def _calculate_color_difference(rgb1, rgb2):
@@ -462,7 +528,12 @@ def evaluate_strike_through_last_paragraph(file_path1, file_path2):

    if not compare_docx_files(file_path1, file_path2):
        return 0
-    document = Document(file_path1)
+
+    try:
+        document = Document(file_path1)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    # Get the last paragraph
    last_paragraph = document.paragraphs[-1]
@@ -479,7 +550,11 @@ def evaluate_conversion(file_path):
    if not file_path:
        return 0

-    document = Document(file_path)
+    try:
+        document = Document(file_path)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    for table in document.tables:
        for row in table.rows:
@@ -501,7 +576,11 @@ def evaluate_spacing(file_path):
    if not file_path:
        return 0

-    document = Document(file_path)
+    try:
+        document = Document(file_path)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    # Check line spacing for introduction, body, and conclusion
    introduction_spacing = document.paragraphs[0].paragraph_format.line_spacing
@@ -519,7 +598,13 @@ def check_italic_font_size_14(path1, path2):

    if not compare_docx_files(path1, path2):
        return 0
-    document = Document(path1)
+
+    try:
+        document = Document(path1)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0
+
    for paragraph in document.paragraphs:
        for run in paragraph.runs:
            if run.italic:
@@ -534,7 +619,11 @@ def evaluate_alignment(docx_path):
        return 0

    # Load the document
-    doc = Document(docx_path)
+    try:
+        doc = Document(docx_path)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

    # Iterate through each paragraph in the document
    for para in doc.paragraphs:
@@ -565,7 +654,12 @@ def get_unique_train_ids(initial_file):  # fixed standard
    if not initial_file:
        return set(), 0

-    doc = Document(initial_file)
+    try:
+        doc = Document(initial_file)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return set(), 0
+
    train_ids = set()
    processed_lines = 0

@@ -586,7 +680,13 @@ def check_no_duplicates(initial_file, processed_file):

    # Open the document
    train_ids_ini, ini_lines = get_unique_train_ids(initial_file)
-    doc_processed = Document(processed_file)
+
+    try:
+        doc_processed = Document(processed_file)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0
+
    train_ids_pro = set()
    processed_lines = 0  # Counter for valid lines processed

@@ -615,10 +715,14 @@ def compare_docx_lines(file1, file2):
        return 0

    # Read the text of the document, line by line
-    doc1 = Document(file1)
-    doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
+    try:
+        doc1 = Document(file1)
+        doc2 = Document(file2)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return 0

-    doc2 = Document(file2)
+    doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
    doc2_lines = [p.text.strip() for p in doc2.paragraphs if p.text.strip()]
    # print(doc1_lines)
    # print(doc2_lines)
@@ -638,8 +742,13 @@ def compare_docx_files_and_ignore_new_lines(file1, file2, **options):

    # Determine file types and load documents
    if file1.endswith('.docx') and file2.endswith('.docx'):
-        doc1 = Document(file1)
-        doc2 = Document(file2)
+        try:
+            doc1 = Document(file1)
+            doc2 = Document(file2)
+        except Exception as e:
+            logger.error(f"Error: {e}")
+            return 0
+
        # First, delete all the blank in paragraphs
        doc1 = [p for p in doc1.paragraphs if p.text != '']
        doc2 = [p for p in doc2.paragraphs if p.text != '']
@@ -716,8 +825,13 @@ def compare_references(file1, file2, **options):

    # Determine file types and load documents
    if file1.endswith('.docx') and file2.endswith('.docx'):
-        doc1 = Document(file1)
-        doc2 = Document(file2)
+        try:
+            doc1 = Document(file1)
+            doc2 = Document(file2)
+        except Exception as e:
+            logger.error(f"Error: {e}")
+            return 0
+
        doc1_paragraphs = [p.text for p in doc1.paragraphs]
        doc2_paragraphs = [p.text for p in doc2.paragraphs]
    else:
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -1,11 +1,11 @@
 import csv
-import os
 import datetime
 import difflib
 import functools
 import json
 import logging
 import operator
+import os
 import re
 import sqlite3
 from numbers import Number
@@ -13,7 +13,6 @@ from typing import Callable, Any, Union
 from typing import Dict, List, Pattern

 import lxml.etree
-import pandas as pd
 import pdfplumber
 import yaml
 from docx import Document
@@ -104,13 +103,14 @@ def fuzzy_place_math(result_file_path, rules) -> float:
    for word in words_list:
        max_score = 0
        for ans in expect:
-            score = fuzz.ratio(word, ans)/100
+            score = fuzz.ratio(word, ans) / 100
            max_score = max(max_score, score)
        fuzzy_score_list.append(max_score)
    if len(fuzzy_score_list) != 3:
        return 0.
    return sum(fuzzy_score_list) / 3

+
 def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
    """
    Args:
@@ -341,27 +341,30 @@ def check_direct_json_object(result, rules) -> float:
        logger.debug("check_direct_json_object: result is not a valid json object")
        return 0.

+
 def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
    if not speedtest_result_path:
        return 0

    # open the speedtest results file(csv)
    date_col = None
-    with open(speedtest_result_path, 'r') as f:
-        reader = pd.read_csv(f)
-        for column in reader.columns:
-            if column.startswith('TEST_DATE'):
-                date_col = column
-                break
-        now_date_time = datetime.datetime.now().strftime('%H:%M')
-        for date in reader[date_col]:
+    try:
+        with open(speedtest_result_path, 'r') as f:
+            for i, line in enumerate(f):
+                if i == 1:
+                    date = line.split(',')[1]
+                    break
+            now_date_time = datetime.datetime.now().strftime('%H:%M')
            date_time = date[-5:]
            # compare the date time with the current date time, if time diff less than time_diff para, then return true
            if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time,
                                                                                                    '%H:%M')).total_seconds()) / 60 < int(
-                    time_diff):
+                time_diff):
                return 0
        return 1
+    except:
+        logger.debug("compare_time_in_speedtest_results: file not found or not readable")
+        return 0


 def is_included_all_json_objects(gold_file_path, result_file_path):
@@ -384,6 +387,9 @@ def is_included_all_json_objects(gold_file_path, result_file_path):


 def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
+    if not gold_text_path or not pdf_file_path:
+        return 0
+
    print("gold_text_path: ")
    print(gold_text_path)
    print("pdf_file_path: ")
@@ -444,6 +450,9 @@ def check_line_number(file_path, line_number):


 def compare_terminal_and_txt(txt_file_path, terminal_output):
+    if not txt_file_path or not terminal_output:
+        return 0
+
    # read txt file content
    with open(txt_file_path, 'r') as f:
        txt_file_content = f.read()
@@ -452,6 +461,9 @@ def compare_terminal_and_txt(txt_file_path, terminal_output):


 def compare_python_pure_text(py_file_path, gold_file_path):
+    if not py_file_path or not gold_file_path:
+        return 0
+
    # first, change the suffix of gold_file from .txt to .py
    print("py_file_path: ")
    print(py_file_path)
--- a/desktop_env/evaluators/metrics/gimp.py
+++ b/desktop_env/evaluators/metrics/gimp.py
@@ -199,7 +199,7 @@ def structure_check_by_mse(img1, img2, threshold=0.03):

 def structure_check_by_ssim(img1, img2, threshold=0.9):
    """Check if two images are approximately the same by SSIM"""
-    similarity = ssim(np.array(img1), np.array(img2), multichannel=True)
+    similarity = ssim(np.array(img1), np.array(img2), multichannel=True, channel_axis=-1)
    print("SSIM: ", similarity)
    return similarity >= threshold

@@ -571,40 +571,44 @@ def check_image_file_size(src_path, rule):


 if __name__ == "__main__":
-    actual_config_path = "../../../cache/sessionrc_test"
-    rule = {
-        "key": "hide-docks",
-        "value": "no"
-    }
-    print(check_config_status(actual_config_path, rule))
+    # actual_config_path = "../../../cache/sessionrc_test"
+    # rule = {
+    #     "key": "hide-docks",
+    #     "value": "no"
+    # }
+    # print(check_config_status(actual_config_path, rule))
+    #
+    # actual_config_path = "../../../cache/action-history_test"
+    # rule = {
+    #     "key": ["history-item", "\"filters-vignette\""],
+    #     "value": "1"
+    # }
+    # print(check_config_status(actual_config_path, rule))
+    #
+    # actual_config_path = "../../../cache/gimprc_test"
+    # rule = {
+    #     "key": "undo-levels",
+    #     "value": "100"
+    # }
+    # print(check_config_status(actual_config_path, rule))
+    #
+    # src_path = "../../../cache/734d6579-c07d-47a8-9ae2-13339795476b/green_background_with_object.png"
+    # tgt_path = "../../../cache/734d6579-c07d-47a8-9ae2-13339795476b/white_background_with_object.png"
+    # print(check_green_background(src_path, tgt_path))
+    #
+    # tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png"
+    # print(check_triangle_position(tgt_path))
+    #
+    # src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi_sharper.png"
+    # tgt_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi.png"
+    # print(check_sharper(src_path, tgt_path))
+    #
+    # src_path = "../../../cache/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f/compressed.jpeg"
+    # rule = {
+    #     "max_size": 500000
+    # }
+    # print(check_image_file_size(src_path, rule))

-    actual_config_path = "../../../cache/action-history_test"
-    rule = {
-        "key": ["history-item", "\"filters-vignette\""],
-        "value": "1"
-    }
-    print(check_config_status(actual_config_path, rule))
-
-    actual_config_path = "../../../cache/gimprc_test"
-    rule = {
-        "key": "undo-levels",
-        "value": "100"
-    }
-    print(check_config_status(actual_config_path, rule))
-
-    src_path = "../../../cache/734d6579-c07d-47a8-9ae2-13339795476b/green_background_with_object.png"
-    tgt_path = "../../../cache/734d6579-c07d-47a8-9ae2-13339795476b/white_background_with_object.png"
-    print(check_green_background(src_path, tgt_path))
-
-    tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png"
-    print(check_triangle_position(tgt_path))
-
-    src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi_sharper.png"
-    tgt_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi.png"
-    print(check_sharper(src_path, tgt_path))
-
-    src_path = "../../../cache/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f/compressed.jpeg"
-    rule = {
-        "max_size": 500000
-    }
-    print(check_image_file_size(src_path, rule))
+    src_path = "../../../cache/d68204bf-11c1-4b13-b48b-d303c73d4bf6/12ad623d-7f35-550e-9c44-6830386b20a0_rearranged_gold.png"
+    tgt_path = "../../../cache/d68204bf-11c1-4b13-b48b-d303c73d4bf6/d1c7c561-6e76-5d7b-9c10-4af0332dfa50_tilearray.png"
+    print(check_structure_sim(src_path, tgt_path))
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -117,7 +117,7 @@ def launch_app():
 def capture_screen_with_cursor():
    # fixme: when running on virtual machines, the cursor is not captured, don't know why

-    file_path = os.path.join("screenshots", "screenshot.png")
+    file_path = os.path.join(os.path.dirname(__file__), "screenshots", "screenshot.png")
    user_platform = platform.system()

    # Ensure the screenshots directory exists
@@ -276,12 +276,12 @@ def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = N
        # only text shown on current screen is available
        # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
        text: str = text_obj.getText(0, text_obj.characterCount)
-        if flag=="thunderbird":
-            # appeard in thunderbird (uFFFC), "Object Replacement Character" in
-            # Unicode, "used as placeholder in text for an otherwise
-            # unspecified object; uFFFD is another "Replacement Character",
-            # just in case
-            text = text.replace("\ufffc", "").replace("\ufffd", "")
+        #if flag=="thunderbird":
+        # appeard in thunderbird (uFFFC) (not only in thunderbird), "Object
+        # Replacement Character" in Unicode, "used as placeholder in text for
+        # an otherwise unspecified object; uFFFD is another "Replacement
+        # Character", just in case
+        text = text.replace("\ufffc", "").replace("\ufffd", "")
    #  }}} Text # 

    #  Selection {{{ # 
--- a/evaluation_examples/examples/chrome/9656a811-9b5b-4ddf-99c7-5117bcef0626.json
+++ b/evaluation_examples/examples/chrome/9656a811-9b5b-4ddf-99c7-5117bcef0626.json
@@ -29,6 +29,15 @@
    "chrome"
  ],
  "evaluator": {
+    "postconfig": [
+      {
+        "type": "execute",
+        "parameters": {
+          "command": "pkill chrome",
+          "shell": "true"
+        }
+      }
+    ],
    "func": "exact_match",
    "result": {
      "type": "enable_enhanced_safety_browsing"
--- a/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json
+++ b/evaluation_examples/examples/multi_apps/0c825995-5b70-4526-b663-113f4c999dd2.json
@@ -1,94 +1,109 @@
 {
-	"id": "0c825995-5b70-4526-b663-113f4c999dd2",
-	"snapshot": "libreoffice_calc",
-	"instruction": "I'm working on a comprehensive report for our environmental policy review meeting next week. I need to integrate key insights from an important document, which is a guidebook on the Green Economy, where I'm particularly interested in the 'Introduction' section. Could you extract this section and compile them into a new Google Doc named 'environment_policy_report (draft)' under /environment_policy folder? This will significantly aid in our discussion on aligning our environmental policies with sustainable and green economic practices. Thanks!",
-	"source": "authors",
-	"config": [
-		{
-            "type": "googledrive",
-            "parameters": {
-                "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
-                "operation": ["delete"],
-                "args": [
-                    {
-                        "query": "title = 'environment_policy_report (draft).doc' or title = 'environment_policy_report (draft).docx' or title = 'environment_policy_report (draft)'",
-                        "trash": false
-                    }
-                ]
-            }
-        },
-        {
-            "type": "launch",
-            "parameters": {
-                "command": [
-                    "google-chrome",
-                    "--remote-debugging-port=1337"
-                ]
-            }
-        },
-        {
-            "type": "launch",
-            "parameters": {
-                "command": [
-                    "socat",
-                    "tcp-listen:9222,fork",
-                    "tcp:localhost:1337"
-                ]
-            }
-        },
-        {
-            "type": "login",
-            "parameters": {
-                "settings_file": "evaluation_examples/settings/google/settings.json",
-                "platform": "googledrive"
-            }
-        },
-		{
-			"type": "command",
-			"parameters": {
-				"command": ["mkdir", "-p", "/home/user/Desktop/wwf"]
-			}
-		},
-		{
-			"type": "download",
-			"parameters": {
-				"files": [
-					{"path": "/home/user/Desktop/wwf/lpr_living_planet_report_2016.pdf", "url": "https://drive.google.com/uc?id=19NCdw_MVP6nH5nC6okYYe8U1mJABfTRK&export=download"},
-					{"path": "/home/user/Desktop/wwf/279c656a32_ENGLISH_FULL.pdf", "url": "https://drive.google.com/uc?id=1ckH1NetfImQ9EyONTO-ZFWA8m8VIUFvD&export=download"},
-					{"path": "/home/user/Desktop/wwf/7g37j96psg_WWF_AR2021_spreads.pdf", "url": "https://drive.google.com/uc?id=1cxLTzmqDKMomOyvho29lvFvhRnb0Y8__&export=download"},
-					{"path": "/home/user/Desktop/GE Guidebook.pdf", "url": "https://drive.google.com/uc?id=1KzC_R3eI3Rmgwz5bkcI8Ohv7ebOrU-Is&export=download"},
-					{"path": "/home/user/Desktop/assessing_and_reporting_water_quality(q&a).pdf", "url": "https://drive.google.com/uc?id=1LFojf3Weflv3fVdrZrgTY1iUaRdbT9kG&export=download"}
-				]
-			}
-		}
-	],
-	"trajectory": "trajectories/0c825995-5b70-4526-b663-113f4c999dd2",
-	"related_apps": [
-		"libreoffice_calc",
-		"chrome",
-		"os"
-    ],
-	"evaluator": {
-		"func": "compare_docx_files",
-        "result": {
-            "type": "googledrive_file",
-            "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
-            "path_list": [
-                    [
-                        "environment_policy_report (draft).docx"
-                    ]
-                ],
-          "dest": [
-                    "environment_policy_report (draft).docx"
-                ]
-        },
-        "expected": {
-            "type": "cloud_file",
-            "path": "https://drive.google.com/uc?id=1A2ti9JncAfIa6ks7FTJWHtYlZo-68FtM&export=download",
-            "dest": "environment_policy_report (draft)_gold.docx"
-        },
-        "options": {
-          "content_only": true
-        }
-	}
+  "id": "0c825995-5b70-4526-b663-113f4c999dd2",
+  "snapshot": "libreoffice_calc",
+  "instruction": "I'm working on a comprehensive report for our environmental policy review meeting next week. I need to integrate key insights from an important document, which is a guidebook on the Green Economy, where I'm particularly interested in the 'Introduction' section. Could you extract this section and compile them into a new Google Doc named 'environment_policy_report (draft)' under /environment_policy folder? This will significantly aid in our discussion on aligning our environmental policies with sustainable and green economic practices. Thanks!",
+  "source": "authors",
+  "config": [
+    {
+      "type": "googledrive",
+      "parameters": {
+        "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+        "operation": [
+          "delete"
+        ],
+        "args": [
+          {
+            "query": "title = 'environment_policy_report (draft).doc' or title = 'environment_policy_report (draft).docx' or title = 'environment_policy_report (draft)'",
+            "trash": false
+          }
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "google-chrome",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "socat",
+          "tcp-listen:9222,fork",
+          "tcp:localhost:1337"
+        ]
+      }
+    },
+    {
+      "type": "login",
+      "parameters": {
+        "settings_file": "evaluation_examples/settings/google/settings.json",
+        "platform": "googledrive"
+      }
+    },
+    {
+      "type": "command",
+      "parameters": {
+        "command": [
+          "mkdir",
+          "-p",
+          "/home/user/Desktop/wwf"
+        ]
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "/home/user/Desktop/wwf/lpr_living_planet_report_2016.pdf",
+            "url": "https://drive.google.com/uc?id=19NCdw_MVP6nH5nC6okYYe8U1mJABfTRK&export=download"
+          },
+          {
+            "path": "/home/user/Desktop/wwf/279c656a32_ENGLISH_FULL.pdf",
+            "url": "https://drive.google.com/uc?id=1ckH1NetfImQ9EyONTO-ZFWA8m8VIUFvD&export=download"
+          },
+          {
+            "path": "/home/user/Desktop/wwf/7g37j96psg_WWF_AR2021_spreads.pdf",
+            "url": "https://drive.google.com/uc?id=1cxLTzmqDKMomOyvho29lvFvhRnb0Y8__&export=download"
+          },
+          {
+            "path": "/home/user/Desktop/GE Guidebook.pdf",
+            "url": "https://drive.google.com/uc?id=1KzC_R3eI3Rmgwz5bkcI8Ohv7ebOrU-Is&export=download"
+          },
+          {
+            "path": "/home/user/Desktop/assessing_and_reporting_water_quality(q&a).pdf",
+            "url": "https://drive.google.com/uc?id=1LFojf3Weflv3fVdrZrgTY1iUaRdbT9kG&export=download"
+          }
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/0c825995-5b70-4526-b663-113f4c999dd2",
+  "related_apps": [
+    "libreoffice_calc",
+    "chrome",
+    "os"
+  ],
+  "evaluator": {
+    "func": "compare_docx_files",
+    "result": {
+      "type": "googledrive_file",
+      "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+      "path": ["environment_policy", "environment_policy_report (draft)"],
+      "dest": "environment_policy_report (draft).docx"
+    },
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1A2ti9JncAfIa6ks7FTJWHtYlZo-68FtM&export=download",
+      "dest": "environment_policy_report (draft)_gold.docx"
+    },
+    "options": {
+      "content_only": true
+    }
+  }
 }
--- a/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json
+++ b/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json
@@ -90,6 +90,10 @@
            "type": "cloud_file",
            "path": "https://drive.usercontent.google.com/download?id=1Ej2iHG8p-QJe7FZQpPIIS82BHOlFAUQM&export=download&authuser=0&confirm=t",
            "dest": "gold_lecture_slides.zip"
+        },
+        "options": {
+            "file_path": "lecture_slides",
+            "file_type": "pdf"
        }
    }
 }
--- a/evaluation_examples/examples/multi_apps/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f.json
+++ b/evaluation_examples/examples/multi_apps/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f.json
@@ -1,7 +1,7 @@
 {
  "id": "3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
  "snapshot": "gimp",
-  "instruction": "Use `gdown` to download the image from \"https://drive.google.com/uc?export=download&id=1i8j5dGS57sA07jEuPNAlQW-sn5uqUnuK\", and then use GIMP to compress it to under 600KB. Resize if needed.",
+  "instruction": "Download the image from \"https://drive.google.com/uc?export=download&id=1i8j5dGS57sA07jEuPNAlQW-sn5uqUnuK\", and then use GIMP to compress it to under 600KB. Resize if needed.",
  "source": "",
  "config": [
    {
--- a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json
+++ b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json
@@ -57,7 +57,7 @@
    {
      "type": "launch",
      "parameters": {
-        "command": ["nautilus"]
+        "command": ["nautilus", "/home/user/Documents/Finance"]
      }
    }
  ],
--- a/evaluation_examples/examples/multi_apps/42f4d1c7-4521-4161-b646-0a8934e36081.json
+++ b/evaluation_examples/examples/multi_apps/42f4d1c7-4521-4161-b646-0a8934e36081.json
@@ -1,7 +1,7 @@
 {
  "id": "42f4d1c7-4521-4161-b646-0a8934e36081",
  "snapshot": "gimp",
-  "instruction": "Configure VS Code to edit GIMP script-fu scripts effectively by installing lisp extension. Test by writing code to resizing the image as 128 * 128 as \"resized.png\"",
+  "instruction": "Configure VS Code to edit GIMP script-fu scripts effectively by installing lisp extension. Test by writing code to resize the image \"character.png\" to 128 * 128 as \"resized.png\".",
  "source": "",
  "config": [
    {
--- a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
+++ b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
@@ -37,7 +37,7 @@
        },
        "result": {
            "type": "cloud_file",
-            "path": "https://drive.usercontent.google.com/download?id=1lpRSXEoZq3ENOG5ekaAsBQSNv5ig0mDr&export=download&authuser=0&confirm=t&uuid=4cb10a33-81b3-4814-a969-f469832e33e5&at=APZUnTWN3pyiVpS003vLOgCcq2gu:1709710047375",
+            "path": "https://drive.usercontent.google.com/download?id=13if1UwZ5ay6ADAVW2jp3rcyvAEBse6MJ&export=download&authuser=0&confirm=t&uuid=2ea03068-1874-4240-baa1-f8bb2f917a99&at=APZUnTXq6dVlASg819jCaI1A-rm2:1710136385956",
            "dest": "image_original.png"
        }
    }
--- a/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json
+++ b/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json
@@ -45,7 +45,7 @@
      "rules": {
        "include": [],
        "exclude": [
-          "ModuleNotFoundError: No module named"
+          "Error:"
        ]
      }
    }
--- a/evaluation_examples/examples/multi_apps/91190194-f406-4cd6-b3f9-c43fac942b22.json
+++ b/evaluation_examples/examples/multi_apps/91190194-f406-4cd6-b3f9-c43fac942b22.json
@@ -11,10 +11,6 @@
          {
            "url": "https://drive.google.com/uc?export=download&id=1bmSRNNh4JkF6izrKrmynUHarf0pFES50",
            "path": "/home/user/Desktop/cola.png"
-          },
-          {
-            "url": "https://drive.google.com/uc?export=download&id=1MayrIPJWRK7cMEVe3TxYmgkAbVMrYcQA",
-            "path": "/home/user/Desktop/cropped_gold.png"
          }
        ]
      }
@@ -43,8 +39,8 @@
      "dest": "cropped.png"
    },
    "expected": {
-      "type": "vm_file",
-      "path": "/home/user/Desktop/cropped_gold.png",
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?export=download&id=1MayrIPJWRK7cMEVe3TxYmgkAbVMrYcQA",
      "dest": "cropped_gold.png"
    }
  }
--- a/evaluation_examples/examples/multi_apps/98e8e339-5f91-4ed2-b2b2-12647cb134f4.json
+++ b/evaluation_examples/examples/multi_apps/98e8e339-5f91-4ed2-b2b2-12647cb134f4.json
@@ -1,7 +1,7 @@
 {
  "id": "98e8e339-5f91-4ed2-b2b2-12647cb134f4",
  "snapshot": "vs_code",
-  "instruction": "Merge the contents of all .txt files from your vscode project into a single document in Writer. No merging separator is needed. Ensure to set the overall font size of the document to 10.",
+  "instruction": "Merge the contents of all .txt files from your vscode project into a single document \"concat.docx\" on Desktop with libreoffice writer. No merging separator is needed. Ensure to set the overall font size of the document to 10.",
  "source": "",
  "config": [
    {
--- a/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json
+++ b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json
@@ -1,7 +1,7 @@
 {
  "id": "b337d106-053f-4d37-8da0-7f9c4043a66b",
  "snapshot": "os",
-  "instruction": "Recently, I've been exploring the use of the Vim editor for code editing. However, the default settings don't display line numbers in Vim editor. Please search the internet for a tutorial on adding line numbers in Vim and setting it as default for my local Vim.",
+  "instruction": "Recently, I've been exploring the use of the Vim editor for code editing. However, the default settings don't display line numbers in Vim editor. Please search the Internet for a tutorial on adding absolute line numbers in Vim and setting it as default for my local Vim.",
  "source": "authors",
  "config": [
    {
@@ -52,7 +52,7 @@
        "parameters": {
          "files": [
            {
-              "url": "https://drive.usercontent.google.com/download?id=1CyhWjUS2oov4Fzc0VRwTh6LiS2Qu-T_8&export=download&authuser=0&confirm=t&uuid=9d0e2c62-895c-4bb3-a057-30cae60329ed&at=APZUnTVngSwARjYsWSmhSyHAqwID:1709647023362",
+              "url": "https://drive.usercontent.google.com/download?id=1CyhWjUS2oov4Fzc0VRwTh6LiS2Qu-T_8&export=download&authuser=0&confirm=t&uuid=384ea31d-c9ae-4e81-be19-42035c563014&at=APZUnTU9lsYwMLfWb7RIizGr1D7H:1710085473758",
              "path": "eval.sh"
            }
          ]
--- a/evaluation_examples/examples/multi_apps/d68204bf-11c1-4b13-b48b-d303c73d4bf6.json
+++ b/evaluation_examples/examples/multi_apps/d68204bf-11c1-4b13-b48b-d303c73d4bf6.json
@@ -11,10 +11,6 @@
          {
            "url": "https://drive.google.com/uc?export=download&id=1CPGW_OZsfSWDdTU7CFrTjpzSAASyLy4w",
            "path": "/home/user/Desktop/tilearray.png"
-          },
-          {
-            "url": "https://drive.google.com/uc?export=download&id=1aHwmnxL2CKEh_FhVpevY452-BQH2t5rG",
-            "path": "/home/user/Desktop/rearranged_gold.png"
          }
        ]
      }
@@ -43,8 +39,8 @@
      "dest": "rearranged.png"
    },
    "expected": {
-      "type": "vm_file",
-      "path": "/home/user/Desktop/rearranged_gold.png",
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?export=download&id=1aHwmnxL2CKEh_FhVpevY452-BQH2t5rG",
      "dest": "rearranged_gold.png"
    }
  }
--- a/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json
+++ b/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json
@@ -1,7 +1,7 @@
 {
    "id": "da922383-bfa4-4cd3-bbad-6bebab3d7742",
    "snapshot": "multiapps",
-    "instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs opening now in pdf format and save them in their tile to /home/user/Documents/Blogs.",
+    "instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs opening now in pdf format and save them in their tile to /home/user/Documents/Blog.",
    "source": "authors",
    "config": [
        {
--- a/evaluation_examples/settings/googledrive/credentials.json
+++ b/evaluation_examples/settings/googledrive/credentials.json
@@ -1 +1 @@
-{"access_token": "ya29.a0Ad52N39ZcfpNY4Sf7ENoFll1YKdduqt6LPdlPFARhaxx90vWPlXvhHfTaEFDEMAbkOBa-08KkZ1huROAlXDIcVRTvfBOuodzWus5ewKSo3E-1Co9R17O3d6UkIDrSyYYET5j9CBnP4x-bgNTDCadJXqVTTCvXGRAyFHXTwaCgYKASgSARISFQHGX2MiQ7ND_iA14Ai6cSVeNKeqdg0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-10T08:19:52Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N39ZcfpNY4Sf7ENoFll1YKdduqt6LPdlPFARhaxx90vWPlXvhHfTaEFDEMAbkOBa-08KkZ1huROAlXDIcVRTvfBOuodzWus5ewKSo3E-1Co9R17O3d6UkIDrSyYYET5j9CBnP4x-bgNTDCadJXqVTTCvXGRAyFHXTwaCgYKASgSARISFQHGX2MiQ7ND_iA14Ai6cSVeNKeqdg0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
+{"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-13T10:09:01Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
--- a/experiment_screenshot.py
+++ b/experiment_screenshot.py
@@ -3,11 +3,16 @@ import json
 import logging
 import os
 import sys
-import time
+
+# import eventlet
 import func_timeout
+from func_timeout import FunctionTimedOut
+
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent

+# eventlet.monkey_patch()
+
 # from mm_agents.gemini_pro_agent import GeminiPro_Agent

 #  Logger Configs {{{ # 
@@ -47,12 +52,14 @@ logger = logging.getLogger("desktopenv.experiment")
 PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"


-def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
+def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True,
+                    max_time=600):
    trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
    env = DesktopEnv(
        path_to_vm=PATH_TO_VM,
        action_space=agent.action_space,
-        task_config=example
+        task_config=example,
+        headless=True
    )
    # reset the environment to certain snapshot
    observation = env.reset()
@@ -106,7 +113,7 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
            print(f"An error occurred while stopping the recording: {e}")

    try:
-        func_timeout.func_timeout(30, stop_recording)
+        func_timeout.func_timeout(120, stop_recording)
    except func_timeout.exceptions.FunctionTimedOut:
        logger.info("Recording timed out.")

@@ -123,7 +130,7 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
    logger.info("Environment closed.")


-def main(example_class, example_id, gpt4_model = "gpt-4-vision-preview"):
+def main(example_class, example_id, gpt4_model="gpt-4-vision-preview"):
    action_space = "pyautogui"
    gemini_model = "gemini-pro-vision"

@@ -136,7 +143,10 @@ def main(example_class, example_id, gpt4_model = "gpt-4-vision-preview"):
    example["snapshot"] = "exp_v5"

    api_key = os.environ.get("OPENAI_API_KEY")
-    agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], action_space=action_space,
+    agent = GPT4v_Agent(api_key=api_key,
+                        model=gpt4_model,
+                        instruction=example['instruction'],
+                        action_space=action_space,
                        exp="screenshot")
    #
    # api_key = os.environ.get("GENAI_API_KEY")
@@ -149,71 +159,185 @@ def main(example_class, example_id, gpt4_model = "gpt-4-vision-preview"):

    os.makedirs(example_trajectory_dir, exist_ok=True)

-    run_one_example(example, agent, 15, example_trajectory_dir)
+    if os.path.exists(os.path.join(example_trajectory_dir, "trajectory.json")):
+        with open(os.path.join(example_trajectory_dir, "trajectory.json"), "r") as f:
+            lines = f.readlines()
+            # strip the last line if it is empty
+            lines = [line.strip() for line in lines if line.strip() != ""]
+            if len(lines) > 0:
+                last_line = json.loads(lines[-1])
+                if "result" in last_line:
+                    logger.info(f"evaluation_examples/examples/{example_class}/{example_id}.json" + "has been evaluated. Skip.")
+                    return
+
+    try:
+        func_timeout.func_timeout(1200, run_one_example, args=(example, agent, 15, example_trajectory_dir))
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        with open(os.path.join(example_trajectory_dir, "trajectory.json"), "a") as f:
+            f.write(json.dumps({
+                "error": str(e)
+            }))


 if __name__ == '__main__':
-    chrome_list = [
-        # "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
-        # "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
-        # "06fe7178-4491-4589-810f-2e2bc9502122",
-        # "e1e75309-3ddb-4d09-92ec-de869c928143",
-        # "35253b65-1c19-4304-8aa4-6884b8218fc0",
-        # "2ad9387a-65d8-4e33-ad5b-7580065a27ca",
-        # "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
-        # "44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
-        # "2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
-        "480bcfea-d68f-4aaa-a0a9-2589ef319381",
-        "af630914-714e-4a24-a7bb-f9af687d3b91"
+    ####### The complete version of the list of examples #######
+
+    os_list = [
+        '94d95f96-9699-4208-98ba-3c3119edf9c2',
+        'bedcedc4-4d72-425e-ad62-21960b11fe0d',
+        '43c2d64c-bab5-4dcb-a30c-b888321c319a',
+        '7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82',
+        'ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3',
+        'a462a795-fdc7-4b23-b689-e8b6df786b78',
+        'f9be0997-4b7c-45c5-b05c-4612b44a6118',
+        '28cc3b7e-b194-4bc9-8353-d04c0f4d56d2',
+        '5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57',
+        'e0df059f-28a6-4169-924f-b9623e7184cc',
+        'ddc75b62-7311-4af8-bfb3-859558542b36',
+        'b6781586-6346-41cd-935a-a6b1487918fc',
+        'b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa',
+        '3ce045a0-877b-42aa-8d2c-b4a863336ab8',
+        'fe41f596-a71b-4c2f-9b2f-9dcd40b568c3',
+        'a4d98375-215b-4a4d-aee9-3d4370fccc41',
+        '13584542-872b-42d8-b299-866967b5c3ef',
+        '23393935-50c7-4a86-aeea-2b78fd089c5c',
+        '5812b315-e7bd-4265-b51f-863c02174c28',
+        'c288e301-e626-4b98-a1ab-159dcb162af5',
+        'cc9d4f34-1ca0-4a1b-8ff2-09302696acb9',
+        'c56de254-a3ec-414e-81a6-83d2ce8c41fa',
+        '4783cc41-c03c-4e1b-89b4-50658f642bd5',
+        '5c1075ca-bb34-46a3-a7a0-029bd7463e79',
+        '5ced85fc-fa1a-4217-95fd-0fb530545ce2',
+        '37887e8c-da15-4192-923c-08fa390a176d',
+        '4127319a-8b79-4410-b58a-7a151e15f3d7',
+        '4d117223-a354-47fb-8b45-62ab1390a95f',
+        '6f56bf42-85b8-4fbb-8e06-6c44960184ba'
    ]
+
    calc_list = [
-    "a9f325aa-8c05-4e4f-8341-9e4358565f4f",
-    "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
-    "7efeb4b1-3d19-4762-b163-63328d66303b",
-    "4e6fcf72-daf3-439f-a232-c434ce416af6",
-    "6054afcb-5bab-4702-90a0-b259b5d3217c",
-    "abed40dc-063f-4598-8ba5-9fe749c0615d",
-    "01b269ae-2111-4a07-81fd-3fcd711993b0",
-    "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
-    "af2b02f7-acee-4be4-8b66-499fab394915",
-    "da1d63b8-fa12-417b-ba18-f748e5f770f3",
-    "636380ea-d5f6-4474-b6ca-b2ed578a20f1",
-    "5ba77536-05c5-4aae-a9ff-6e298d094c3e",
-    "4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b",
-    "672a1b02-c62f-4ae2-acf0-37f5fb3052b0",
-    "648fe544-16ba-44af-a587-12ccbe280ea6",
-    "8985d1e4-5b99-4711-add4-88949ebb2308",
-    "9e606842-2e27-43bf-b1d1-b43289c9589b",
-    "fcb6e45b-25c4-4087-9483-03d714f473a9",
-    "68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2",
-    "fff629ea-046e-4793-8eec-1a5a15c3eb35",
-    "5c9a206c-bb00-4fb6-bb46-ee675c187df5",
-    "e975ae74-79bd-4672-8d1c-dc841a85781d",
-    "34a6938a-58da-4897-8639-9b90d6db5391",
-    "b5a22759-b4eb-4bf2-aeed-ad14e8615f19",
-    "2f9913a1-51ed-4db6-bfe0-7e1c95b3139e",
-    "2558031e-401d-4579-8e00-3ecf540fb492",
-    "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
-    "4188d3a4-077d-46b7-9c86-23e1a036f6c1",
-    "51b11269-2ca8-4b2a-9163-f21758420e78",
-    "7e429b8d-a3f0-4ed0-9b58-08957d00b127",
-    "347ef137-7eeb-4c80-a3bb-0951f26a8aff",
-    "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
-    "3aaa4e37-dc91-482e-99af-132a612d40f3",
-    "37608790-6147-45d0-9f20-1137bb35703d",
-    "f9584479-3d0d-4c79-affa-9ad7afdd8850",
-    "d681960f-7bc3-4286-9913-a8812ba3261a",
-    "21df9241-f8d7-4509-b7f1-37e501a823f7",
-    "1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
-    "357ef137-7eeb-4c80-a3bb-0951f26a8aff",
-    "aa3a8974-2e85-438b-b29e-a64df44deb4b",
-    "a01fbce3-2793-461f-ab86-43680ccbae25",
-    "4f07fbe9-70de-4927-a4d5-bb28bc12c52c"
-]
-    # for example_id in calc_list:
-    #     main("libreoffice_calc", example_id)
+        'eb03d19a-b88d-4de4-8a64-ca0ac66f426b',
+        '0bf05a7d-b28b-44d2-955a-50b41e24012a',
+        '7b802dad-6e0f-4204-9815-d4e3f57627d8',
+        '7a4e4bc8-922c-4c84-865c-25ba34136be1',
+        '2bd59342-0664-4ccb-ba87-79379096cc08',
+        'a9f325aa-8c05-4e4f-8341-9e4358565f4f',
+        'ecb0df7a-4e8d-4a03-b162-053391d3afaf',
+        '7efeb4b1-3d19-4762-b163-63328d66303b',
+        '4e6fcf72-daf3-439f-a232-c434ce416af6',
+        '6054afcb-5bab-4702-90a0-b259b5d3217c',
+        'abed40dc-063f-4598-8ba5-9fe749c0615d',
+        '01b269ae-2111-4a07-81fd-3fcd711993b0',
+        '8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14',
+        '0cecd4f3-74de-457b-ba94-29ad6b5dafb6',
+        '4188d3a4-077d-46b7-9c86-23e1a036f6c1',
+        '51b11269-2ca8-4b2a-9163-f21758420e78',
+        '7e429b8d-a3f0-4ed0-9b58-08957d00b127',
+        '347ef137-7eeb-4c80-a3bb-0951f26a8aff',
+        '6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5',
+        '3aaa4e37-dc91-482e-99af-132a612d40f3',
+        '37608790-6147-45d0-9f20-1137bb35703d',
+        'f9584479-3d0d-4c79-affa-9ad7afdd8850',
+        'd681960f-7bc3-4286-9913-a8812ba3261a',
+        '21df9241-f8d7-4509-b7f1-37e501a823f7',
+        '1334ca3e-f9e3-4db8-9ca7-b4c653be7d17',
+        '357ef137-7eeb-4c80-a3bb-0951f26a8aff',
+        'aa3a8974-2e85-438b-b29e-a64df44deb4b',
+        'a01fbce3-2793-461f-ab86-43680ccbae25',
+        '4f07fbe9-70de-4927-a4d5-bb28bc12c52c',
+        '1e8df695-bd1b-45b3-b557-e7d599cf7597',
+        '1273e544-688f-496b-8d89-3e0f40aa0606',
+        '4172ea6e-6b77-4edb-a9cc-c0014bd1603b',
+        '0326d92d-d218-48a8-9ca1-981cd6d064c7',
+        '26a8440e-c166-4c50-aef4-bfb77314b46b',
+        '1954cced-e748-45c4-9c26-9855b97fbc5e',
+        '535364ea-05bd-46ea-9937-9f55c68507e8',
+        '4de54231-e4b5-49e3-b2ba-61a0bec721c0',
+        '1de60575-bb6e-4c3d-9e6a-2fa699f9f197',
+        '0a2e43bf-b26c-4631-a966-af9dfa12c9e5',
+        '3a7c8185-25c1-4941-bd7b-96e823c9f21f',
+        '04d9aeaf-7bed-4024-bedb-e10e6f00eb7f',
+        '42e0a640-4f19-4b28-973d-729602b5a4a7',
+        '1d17d234-e39d-4ed7-b46f-4417922a4e7c',
+        '21ab7b40-77c2-4ae6-8321-e00d3a086c73',
+        '30e3e107-1cfb-46ee-a755-2cd080d7ba6a',
+        '12382c62-0cd1-4bf2-bdc8-1d20bf9b2371',
+        '035f41ba-6653-43ab-aa63-c86d449d62e5',
+        '51719eea-10bc-4246-a428-ac7c433dd4b3'
+    ]
+
+    sheetcopilot_list = [
+        # "1e8df695-bd1b-45b3-b557-e7d599cf7597",
+        # "1273e544-688f-496b-8d89-3e0f40aa0606",
+        # "4172ea6e-6b77-4edb-a9cc-c0014bd1603b",
+        # "0326d92d-d218-48a8-9ca1-981cd6d064c7",
+        # "26a8440e-c166-4c50-aef4-bfb77314b46b",
+        # "1954cced-e748-45c4-9c26-9855b97fbc5e",
+        # "535364ea-05bd-46ea-9937-9f55c68507e8",
+        # "4de54231-e4b5-49e3-b2ba-61a0bec721c0",
+        # "1de60575-bb6e-4c3d-9e6a-2fa699f9f197",
+        # "0a2e43bf-b26c-4631-a966-af9dfa12c9e5",
+        # "3a7c8185-25c1-4941-bd7b-96e823c9f21f",
+        # "04d9aeaf-7bed-4024-bedb-e10e6f00eb7f",
+        # "42e0a640-4f19-4b28-973d-729602b5a4a7",
+        # "1d17d234-e39d-4ed7-b46f-4417922a4e7c",
+        "21ab7b40-77c2-4ae6-8321-e00d3a086c73",
+        "30e3e107-1cfb-46ee-a755-2cd080d7ba6a",
+        "12382c62-0cd1-4bf2-bdc8-1d20bf9b2371",
+        "51719eea-10bc-4246-a428-ac7c433dd4b3"
+    ]

    impress_list = [
+        '5d901039-a89c-4bfb-967b-bf66f4df075e',
+        '550ce7e7-747b-495f-b122-acdc4d0b8e54',
+        '455d3c66-7dc6-4537-a39a-36d3e9119df7',
+        'af23762e-2bfd-4a1d-aada-20fa8de9ce07',
+        'c59742c0-4323-4b9d-8a02-723c251deaa0',
+        'ef9d12bd-bcee-4ba0-a40e-918400f43ddf',
+        '9ec204e4-f0a3-42f8-8458-b772a6797cab',
+        '0f84bef9-9790-432e-92b7-eece357603fb',
+        'ce88f674-ab7a-43da-9201-468d38539e4a',
+        '3b27600c-3668-4abd-8f84-7bcdebbccbdb',
+        'a097acff-6266-4291-9fbd-137af7ecd439',
+        'bf4e9888-f10f-47af-8dba-76413038b73c',
+        '21760ecb-8f62-40d2-8d85-0cee5725cb72',
+        'ac9bb6cb-1888-43ab-81e4-a98a547918cd',
+        '2cd43775-7085-45d8-89fa-9e35c0a915cf',
+        '358aa0a7-6677-453f-ae35-e440f004c31e',
+        'a669ef01-ded5-4099-9ea9-25e99b569840',
+        '73c99fb9-f828-43ce-b87a-01dc07faa224',
+        '15aece23-a215-4579-91b4-69eec72e18da',
+        '986fc832-6af2-417c-8845-9272b3a1528b',
+        'a434992a-89df-4577-925c-0c58b747f0f4',
+        '7dbc52a6-11e0-4c9a-a2cb-1e36cfda80d8',
+        '841b50aa-df53-47bd-a73a-22d3a9f73160',
+        '8979838c-54a5-4454-a2b8-3d135a1a5c8f',
+        'b8adbc24-cef2-4b15-99d5-ecbe7ff445eb',
+        '2b94c692-6abb-48ae-ab0b-b3e8a19cb340',
+        '9cf05d24-6bd9-4dae-8967-f67d88f5d38a',
+        '08aced46-45a2-48d7-993b-ed3fb5b32302',
+        'edb61b14-a854-4bf5-a075-c8075c11293a',
+        'c82632a4-56b6-4db4-9dd1-3820ee3388e4',
+        '39be0d19-634d-4475-8768-09c130f5425d',
+        'ac1b39ff-ee4d-4483-abce-c117e98942f0',
+        'f23acfd2-c485-4b7c-a1e7-d4303ddfe864',
+        '70bca0cc-c117-427e-b0be-4df7299ebeb6',
+        'af2d657a-e6b3-4c6a-9f67-9e3ed015974c',
+        '57667013-ea97-417c-9dce-2713091e6e2a',
+        '0a211154-fda0-48d0-9274-eaac4ce5486d',
+        'a53f80cd-4a90-4490-8310-097b011433f6',
+        '7ae48c60-f143-4119-b659-15b8f485eb9a',
+        '5cfb9197-e72b-454b-900e-c06b0c802b40',
+        '05dd4c1d-c489-4c85-8389-a7836c4f0567',
+        '5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1',
+        '4ed5abd0-8b5d-47bd-839f-cacfa15ca37a',
+        'e4ef0baf-4b52-4590-a47e-d4d464cca2d7',
+        'ed43c15f-00cb-4054-9c95-62c880865d68',
+        '3161d64e-3120-47b4-aaad-6a764a92493b',
+        '04578141-1d42-4146-b9cf-6fab4ce5fd74'
+    ]
+
+    impress_gpt4_list = [
        # "5d901039-a89c-4bfb-967b-bf66f4df075e",
        # "550ce7e7-747b-495f-b122-acdc4d0b8e54",
        # "455d3c66-7dc6-4537-a39a-36d3e9119df7",
@@ -226,81 +350,325 @@ if __name__ == '__main__':
        # "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
        # "a097acff-6266-4291-9fbd-137af7ecd439",
        # "bf4e9888-f10f-47af-8dba-76413038b73c",
-        "21760ecb-8f62-40d2-8d85-0cee5725cb72"
+        # "21760ecb-8f62-40d2-8d85-0cee5725cb72",
+        "ac9bb6cb-1888-43ab-81e4-a98a547918cd",
+        "2cd43775-7085-45d8-89fa-9e35c0a915cf",
+        "358aa0a7-6677-453f-ae35-e440f004c31e",
+        "a669ef01-ded5-4099-9ea9-25e99b569840",
+        # The following examples are from PPTC
+        "73c99fb9-f828-43ce-b87a-01dc07faa224",
+        "15aece23-a215-4579-91b4-69eec72e18da",
+        "986fc832-6af2-417c-8845-9272b3a1528b",
+        "a434992a-89df-4577-925c-0c58b747f0f4",
+        "7dbc52a6-11e0-4c9a-a2cb-1e36cfda80d8",
+        "841b50aa-df53-47bd-a73a-22d3a9f73160",
+        "8979838c-54a5-4454-a2b8-3d135a1a5c8f",
+        "b8adbc24-cef2-4b15-99d5-ecbe7ff445eb",
+        "2b94c692-6abb-48ae-ab0b-b3e8a19cb340",
+        "9cf05d24-6bd9-4dae-8967-f67d88f5d38a",
+        "08aced46-45a2-48d7-993b-ed3fb5b32302",
+        "edb61b14-a854-4bf5-a075-c8075c11293a",
+        "c82632a4-56b6-4db4-9dd1-3820ee3388e4",
+        "39be0d19-634d-4475-8768-09c130f5425d",
+        "ac1b39ff-ee4d-4483-abce-c117e98942f0",
+        "f23acfd2-c485-4b7c-a1e7-d4303ddfe864",
+        "70bca0cc-c117-427e-b0be-4df7299ebeb6",
+        "af2d657a-e6b3-4c6a-9f67-9e3ed015974c",
+        "57667013-ea97-417c-9dce-2713091e6e2a",
+        "0a211154-fda0-48d0-9274-eaac4ce5486d",
+        "a53f80cd-4a90-4490-8310-097b011433f6",
+        "7ae48c60-f143-4119-b659-15b8f485eb9a",
+        "5cfb9197-e72b-454b-900e-c06b0c802b40",
+        "05dd4c1d-c489-4c85-8389-a7836c4f0567",
+        "5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1",
+        "4ed5abd0-8b5d-47bd-839f-cacfa15ca37a",
+        "e4ef0baf-4b52-4590-a47e-d4d464cca2d7",
+        "ed43c15f-00cb-4054-9c95-62c880865d68",
+        "3161d64e-3120-47b4-aaad-6a764a92493b",
+        "04578141-1d42-4146-b9cf-6fab4ce5fd74"
    ]
-    # for example_id in impress_list:
-    #     main("libreoffice_impress", example_id)

-    # gimp_list = [
-    #     "7a4deb26-d57d-4ea9-9a73-630f66a7b568",
-    #     "554785e9-4523-4e7a-b8e1-8016f565f56a",
-    #     "77b8ab4d-994f-43ac-8930-8ca087d7c4b4",
-    #     "f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce",
-    #     "d52d6308-ec58-42b7-a2c9-de80e4837b2b",
-    #     "2a729ded-3296-423d-aec4-7dd55ed5fbb3",
-    #     "b148e375-fe0b-4bec-90e7-38632b0d73c2",
-    #     "a746add2-cab0-4740-ac36-c3769d9bfb46",
-    #     "7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
-    #     "d16c99dc-2a1e-46f2-b350-d97c86c85c15",
-    #     "06ca5602-62ca-47f6-ad4f-da151cde54cc",
-    #     "e2dd0213-26db-4349-abe5-d5667bfd725c",
-    #     "f723c744-e62c-4ae6-98d1-750d3cd7d79d",
-    #     "72f83cdc-bf76-4531-9a1b-eb893a13f8aa",
-    #     "7767eef2-56a3-4cea-8c9f-48c070c7d65b",
-    #     "734d6579-c07d-47a8-9ae2-13339795476b"
-    # ]
-    #
-    # for example_id in gimp_list:
-    #     try:
-    #         main("gimp", example_id)
-    #     except Exception as e:
-    #         logger.error("An error occurred while running the example: %s", e)
-    #         continue
-    #
+    writer_list = [
+        '0810415c-bde4-4443-9047-d5f70165a697',
+        '0a0faba3-5580-44df-965d-f562a99b291c',
+        '0b17a146-2934-46c7-8727-73ff6b6483e8',
+        '0e47de2a-32e0-456c-a366-8c607ef7a9d2',
+        '0e763496-b6bb-4508-a427-fad0b6c3e195',
+        '3ef2b351-8a84-4ff2-8724-d86eae9b842e',
+        '4bcb1253-a636-4df4-8cb0-a35c04dfef31',
+        '66399b0d-8fda-4618-95c4-bfc6191617e9',
+        '6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2',
+        '6ada715d-3aae-4a32-a6a7-429b2e43fb93',
+        '6f81754e-285d-4ce0-b59e-af7edb02d108',
+        '72b810ef-4156-4d09-8f08-a0cf57e7cefe',
+        '8472fece-c7dd-4241-8d65-9b3cd1a0b568',
+        '88fe4b2d-3040-4c70-9a70-546a47764b48',
+        '936321ce-5236-426a-9a20-e0e3c5dc536f',
+        'adf5e2c3-64c7-4644-b7b6-d2f0167927e7',
+        'b21acd93-60fd-4127-8a43-2f5178f4a830',
+        'd53ff5ee-3b1a-431e-b2be-30ed2673079b',
+        'e246f6d8-78d7-44ac-b668-fcf47946cb50',
+        'e528b65e-1107-4b8c-8988-490e4fece599',
+        'ecc2413d-8a48-416e-a3a2-d30106ca36cb',
+        'f178a4a9-d090-4b56-bc4c-4b72a61a035d',
+        'bb8ccc78-479f-4a2f-a71e-d565e439436b'
+    ]
+
+    vlc_list = [
+        '59f21cfb-0120-4326-b255-a5b827b38967',
+        '8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89',
+        '8f080098-ddb1-424c-b438-4e96e5e4786e',
+        'bba3381f-b5eb-4439-bd9e-80c22218d5a7',
+        'fba2c100-79e8-42df-ae74-b592418d54f4',
+        'efcf0d81-0835-4880-b2fd-d866e8bc2294',
+        '8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f',
+        'aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6',
+        '386dbd0e-0241-4a0a-b6a2-6704fba26b1c',
+        '9195653c-f4aa-453d-aa95-787f6ccfaae9',
+        'd06f0d4d-2cd5-4ede-8de9-598629438c6e',
+        'a5bbbcd5-b398-4c91-83d4-55e1e31bbb81',
+        '5ac2891a-eacd-4954-b339-98abba077adb',
+        'f3977615-2b45-4ac5-8bba-80c17dbe2a37',
+        '215dfd39-f493-4bc3-a027-8a97d72c61bf',
+        'cb130f0d-d36f-4302-9838-b3baf46139b6',
+        '7882ed6e-bece-4bf0-bada-c32dc1ddae72'
+    ]
+
+    chrome_list = [
+        'bb5e4c0d-f964-439c-97b6-bdb9747de3f4',
+        '7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3',
+        '06fe7178-4491-4589-810f-2e2bc9502122',
+        'e1e75309-3ddb-4d09-92ec-de869c928143',
+        '35253b65-1c19-4304-8aa4-6884b8218fc0',
+        '2ad9387a-65d8-4e33-ad5b-7580065a27ca',
+        '7a5a7856-f1b6-42a4-ade9-1ca81ca0f263',
+        '44ee5668-ecd5-4366-a6ce-c1c9b8d4e938',
+        '2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3',
+        '480bcfea-d68f-4aaa-a0a9-2589ef319381',
+        'af630914-714e-4a24-a7bb-f9af687d3b91',
+        '3720f614-37fd-4d04-8a6b-76f54f8c222d',
+        '99146c54-4f37-4ab8-9327-5f3291665e1e',
+        '12086550-11c0-466b-b367-1d9e75b3910e',
+        '6766f2b8-8a72-417f-a9e5-56fcaa735837',
+        '93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9',
+        'ae78f875-5b98-4907-bbb5-9c737fc68c03',
+        '3299584d-8f11-4457-bf4c-ce98f7600250',
+        '030eeff7-b492-4218-b312-701ec99ee0cc',
+        '9656a811-9b5b-4ddf-99c7-5117bcef0626',
+        'fc6d8143-9452-4171-9459-7f515143419a',
+        'a96b564e-dbe9-42c3-9ccf-b4498073438a',
+        '1704f00f-79e6-43a7-961b-cedd3724d5fd',
+        'f3b19d1e-2d48-44e9-b4e1-defcae1a0197',
+        '82bc8d6a-36eb-4d2d-8801-ef714fb1e55a',
+        '47543840-672a-467d-80df-8f7c3b9788c9',
+        'c1fa57f3-c3db-4596-8f09-020701085416',
+        'da46d875-6b82-4681-9284-653b0c7ae241',
+        '6c4c23a1-42a4-43cc-9db1-2f86ff3738cc',
+        'f79439ad-3ee8-4f99-a518-0eb60e5652b0',
+        'b7895e80-f4d1-4648-bee0-4eb45a6f1fa8',
+        '9f3f70fc-5afc-4958-a7b7-3bb4fcb01805',
+        '7f52cab9-535c-4835-ac8c-391ee64dc930',
+        '82279c77-8fc6-46f6-9622-3ba96f61b477',
+        '2888b4e6-5b47-4b57-8bf5-c73827890774',
+        'b4f95342-463e-4179-8c3f-193cd7241fb2',
+        'f5d96daf-83a8-4c86-9686-bada31fc66ab',
+        '121ba48f-9e17-48ce-9bc6-a4fb17a7ebba',
+        '368d9ba4-203c-40c1-9fa3-da2f1430ce63',
+        '59155008-fe71-45ec-8a8f-dc35497b6aa8',
+        'a728a36e-8bf1-4bb6-9a03-ef039a5233f0',
+        'b070486d-e161-459b-aa2b-ef442d973b92',
+        '0d8b7de3-e8de-4d86-b9fd-dd2dce58a217',
+        '9f935cce-0a9f-435f-8007-817732bfc0a5',
+        'f0b971a1-6831-4b9b-a50e-22a6e47f45ba',
+        'cabb3bae-cccb-41bd-9f5d-0f3a9fecd825'
+    ]

    vs_code_list = [
-        # "0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
-        # "53ad5833-3455-407b-bbc6-45b4c79ab8fb",
-        # "eabc805a-bfcf-4460-b250-ac92135819f6",
-        # "982d12a5-beab-424f-8d38-d2a48429e511",
-        # "4e60007a-f5be-4bfc-9723-c39affa0a6d3",
-        # "e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
-        # "9439a27b-18ae-42d8-9778-5f68f891805e",
-        "ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
-        "930fdb3b-11a8-46fe-9bac-577332e2640e",
-        "276cc624-87ea-4f08-ab93-f770e3790175",
-        "9d425400-e9b2-4424-9a4b-d4c7abac4140"
+        '0ed39f63-6049-43d4-ba4d-5fa2fe04a951',
+        '53ad5833-3455-407b-bbc6-45b4c79ab8fb',
+        'eabc805a-bfcf-4460-b250-ac92135819f6',
+        '982d12a5-beab-424f-8d38-d2a48429e511',
+        '4e60007a-f5be-4bfc-9723-c39affa0a6d3',
+        'e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2',
+        '9439a27b-18ae-42d8-9778-5f68f891805e',
+        'ae506c68-352c-4094-9caa-ee9d42052317',
+        'ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae',
+        'c714dcee-cad3-4e12-8f3c-12bdcfcdb048',
+        '930fdb3b-11a8-46fe-9bac-577332e2640e',
+        '276cc624-87ea-4f08-ab93-f770e3790175',
+        '9d425400-e9b2-4424-9a4b-d4c7abac4140',
+        '5e2d93d8-8ad0-4435-b150-1692aacaa994',
+        '6ed0a554-cbee-4b44-84ea-fd6c042f4fe1',
+        'ec71221e-ac43-46f9-89b8-ee7d80f7e1c5',
+        '70745df8-f2f5-42bd-8074-fbc10334fcc5',
+        '57242fad-77ca-454f-b71b-f187181a9f23',
+        'c6bf789c-ba3a-4209-971d-b63abf0ab733',
+        '0512bb38-d531-4acf-9e7e-0add90816068',
+        '847a96b6-df94-4927-97e6-8cc9ea66ced7',
+        '7aeae0e2-70ee-4705-821d-1bba5d5b2ddd',
+        'dcbe20e8-647f-4f1d-8696-f1c5bbb570e3',
+        '7c4cc09e-7a92-40dd-8338-b2286535c4ed',
+        '971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6'
    ]

+    gimp_list = [
+        '7a4deb26-d57d-4ea9-9a73-630f66a7b568',
+        '554785e9-4523-4e7a-b8e1-8016f565f56a',
+        '77b8ab4d-994f-43ac-8930-8ca087d7c4b4',
+        'f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce',
+        'd52d6308-ec58-42b7-a2c9-de80e4837b2b',
+        '2a729ded-3296-423d-aec4-7dd55ed5fbb3',
+        'b148e375-fe0b-4bec-90e7-38632b0d73c2',
+        'a746add2-cab0-4740-ac36-c3769d9bfb46',
+        '7b7617bd-57cc-468e-9c91-40c4ec2bcb3d',
+        'd16c99dc-2a1e-46f2-b350-d97c86c85c15',
+        '06ca5602-62ca-47f6-ad4f-da151cde54cc',
+        'e2dd0213-26db-4349-abe5-d5667bfd725c',
+        'f723c744-e62c-4ae6-98d1-750d3cd7d79d',
+        '72f83cdc-bf76-4531-9a1b-eb893a13f8aa',
+        '7767eef2-56a3-4cea-8c9f-48c070c7d65b',
+        '734d6579-c07d-47a8-9ae2-13339795476b'
+    ]
+
+    thunderbird_list = [
+        'bb5e4c0d-f964-439c-97b6-bdb9747de3f4', 
+        '7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3', 
+        '12086550-11c0-466b-b367-1d9e75b3910e', 
+        '06fe7178-4491-4589-810f-2e2bc9502122', 
+        '6766f2b8-8a72-417f-a9e5-56fcaa735837', 
+        'e1e75309-3ddb-4d09-92ec-de869c928143', 
+        '3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5', 
+        '35253b65-1c19-4304-8aa4-6884b8218fc0', 
+        'd088f539-cab4-4f9a-ac92-9999fc3a656e', 
+        '2ad9387a-65d8-4e33-ad5b-7580065a27ca', 
+        '480bcfea-d68f-4aaa-a0a9-2589ef319381', 
+        '030eeff7-b492-4218-b312-701ec99ee0cc', 
+        '94760984-3ff5-41ee-8347-cf1af709fea0', 
+        '99146c54-4f37-4ab8-9327-5f3291665e1e', 
+        'c9e7eaf2-b1a1-4efc-a982-721972fa9f02']
+
+    multiple_list = [
+        '2b9493d7-49b8-493a-a71b-56cd1f4d6908',
+        '2c9fc0de-3ee7-45e1-a5df-c86206ad78b5',
+        '2fe4b718-3bd7-46ec-bdce-b184f5653624',
+        '3680a5ee-6870-426a-a997-eba929a0d25c',
+        '46407397-a7d5-4c6b-92c6-dbe038b1457b',
+        '4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc',
+        '510f64c8-9bcc-4be1-8d30-638705850618',
+        '51f5801c-18b3-4f25-b0c3-02f85507a078',
+        '58565672-7bfe-48ab-b828-db349231de6b',
+        '78aed49a-a710-4321-a793-b611a7c5b56b',
+        '897e3b53-5d4d-444b-85cb-2cdc8a97d903',
+        '937087b6-f668-4ba6-9110-60682ee33441',
+        'a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb',
+        'b52b40a5-ad70-4c53-b5b0-5650a8387052',
+        'c867c42d-a52d-4a24-8ae3-f75d256b5618',
+        'd9b7c649-c975-4f53-88f5-940b29c47247',
+        'e135df7c-7687-4ac0-a5f0-76b74438b53e',
+        'ee9a3c83-f437-4879-8918-be5efbb9fac7',
+        'f7dfbef3-7697-431c-883a-db8583a4e4f9',
+        'f8cfa149-d1c1-4215-8dac-4a0932bad3c2',
+        '6d72aad6-187a-4392-a4c4-ed87269c51cf',
+        'f918266a-b3e0-4914-865d-4faa564f1aef',
+        'da52d699-e8d2-4dc5-9191-a2199e0b6a9b',
+        'bc2b57f3-686d-4ec9-87ce-edf850b7e442',
+        '74d5859f-ed66-4d3e-aa0e-93d7a592ce41',
+        'b5062e3e-641c-4e3a-907b-ac864d2e7652',
+        '00fa164e-2612-4439-992e-157d019a8436',
+        'acb0f96b-e27c-44d8-b55f-7cb76609dfcd',
+        '69acbb55-d945-4927-a87b-8480e1a5bb7e',
+        '48d05431-6cd5-4e76-82eb-12b60d823f7d',
+        '68a25bd4-59c7-4f4d-975e-da0c8509c848',
+        'eb303e01-261e-4972-8c07-c9b4e7a4922a',
+        '0c825995-5b70-4526-b663-113f4c999dd2',
+        'c7c1e4c3-9e92-4eba-a4b8-689953975ea4',
+        'd1acdb87-bb67-4f30-84aa-990e56a09c92',
+        'deec51c9-3b1e-4b9e-993c-4776f20e8bb2',
+        '8e116af7-7db7-4e35-a68b-b0939c066c78',
+        '337d318b-aa07-4f4f-b763-89d9a2dd013f',
+        '82e3c869-49f6-4305-a7ce-f3e64a0618e7',
+        '185f29bd-5da0-40a6-b69c-ba7f4e0324ef',
+        '869de13e-bef9-4b91-ba51-f6708c40b096',
+        '2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e',
+        '3a93cae4-ad3e-403e-8c12-65303b271818',
+        '1f18aa87-af6f-41ef-9853-cdb8f32ebdea',
+        '26150609-0da3-4a7d-8868-0faf9c5f01bb',
+        '9219480b-3aed-47fc-8bac-d2cffc5849f7',
+        '881deb30-9549-4583-a841-8270c65f2a17',
+        '7e287123-70ca-47b9-8521-47db09b69b14',
+        'e2392362-125e-4f76-a2ee-524b183a3412',
+        '5bc63fb9-276a-4439-a7c1-9dc76401737f',
+        '26660ad1-6ebb-4f59-8cba-a8432dfe8d38',
+        'a82b78bb-7fde-4cb3-94a4-035baf10bcf0',
+        '36037439-2044-4b50-b9d1-875b5a332143',
+        '716a6079-22da-47f1-ba73-c9d58f986a38',
+        '873cafdd-a581-47f6-8b33-b9696ddb7b05',
+        'a74b607e-6bb5-4ea8-8a7c-5d97c7bbcd2a',
+        '6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a',
+        'da922383-bfa4-4cd3-bbad-6bebab3d7742',
+        '2373b66a-092d-44cb-bfd7-82e86e7a3b4d',
+        '81c425f5-78f3-4771-afd6-3d2973825947',
+        'bb83cab4-e5c7-42c7-a67b-e46068032b86',
+        '227d2f97-562b-4ccb-ae47-a5ec9e142fbb',
+        'b337d106-053f-4d37-8da0-7f9c4043a66b',
+        '20236825-b5df-46e7-89bf-62e1d640a897',
+        '8df7e444-8e06-4f93-8a1a-c5c974269d82',
+        'aad10cd7-9337-4b62-b704-a857848cedf2',
+        '02ce9a50-7af2-47ed-8596-af0c230501f8',
+        '4c26e3f3-3a14-4d86-b44a-d3cedebbb487',
+        'a503b07f-9119-456b-b75d-f5146737d24f',
+        '09a37c51-e625-49f4-a514-20a773797a8a',
+        '3e3fc409-bff3-4905-bf16-c968eee3f807',
+        'f5c13cdd-205c-4719-a562-348ae5cd1d91',
+        '5990457f-2adb-467b-a4af-5c857c92d762',
+        '415ef462-bed3-493a-ac36-ca8c6d23bf1b',
+        '7ff48d5b-2df2-49da-b500-a5150ffc7f18',
+        '9f3bb592-209d-43bc-bb47-d77d9df56504',
+        'dd60633f-2c72-42ba-8547-6f2c8cb0fdb0',
+        'ce2b64a2-ddc1-4f91-8c7d-a88be7121aac',
+        '3f05f3b9-29ba-4b6b-95aa-2204697ffc06',
+        'e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56',
+        'f8369178-fafe-40c2-adc4-b9b08a125456',
+        '778efd0a-153f-4842-9214-f05fc176b877',
+        '47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5',
+        'c2751594-0cd5-4088-be1b-b5f2f9ec97c4',
+        '788b3701-3ec9-4b67-b679-418bfa726c22',
+        '48c46dc7-fe04-4505-ade7-723cba1aa6f6',
+        '42d25c08-fb87-4927-8b65-93631280a26f',
+        'bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108',
+        'e8172110-ec08-421b-a6f5-842e6451911f',
+        '42f4d1c7-4521-4161-b646-0a8934e36081',
+        '3c8f201a-009d-4bbe-8b65-a6f8b35bb57f',
+        'd68204bf-11c1-4b13-b48b-d303c73d4bf6',
+        '91190194-f406-4cd6-b3f9-c43fac942b22',
+        '7f35355e-02a6-45b5-b140-f0be698bcf85',
+        '98e8e339-5f91-4ed2-b2b2-12647cb134f4',
+        '0e5303d4-8820-42f6-b18d-daf7e633de21',
+        'df67aebb-fb3a-44fd-b75b-51b6012df509',
+        '5df7b33a-9f77-4101-823e-02f863e1c1ae',
+        'aceb0368-56b8-4073-b70e-3dc9aee184e0',
+        '22a4636f-8179-4357-8e87-d1743ece1f81',
+        '236833a3-5704-47fc-888c-4f298f09f799',
+        '67890eb6-6ce5-4c00-9e3d-fb4972699b06',
+    ]
+    
+
+    # for example_id in calc_list:
+    #     try:
+    #         with eventlet.Timeout(600, False):
+    #             main("libreoffice_calc", example_id, gpt4_model="gemini-pro-vision")
+    #     except Exception as e:
+    #         logger.error("An error occurred while running the example: %s", e)
+    #         continue
+
    # for example_id in vs_code_list:
-    #     try:
-    #         main("vs_code", example_id)
-    #     except Exception as e:
-    #         logger.error("An error occurred while running the example: %s", e)
-    #         continue
-
-    # multiple_list = [
-    #     "f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
-    #     "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
-    #     "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
-    #     "b52b40a5-ad70-4c53-b5b0-5650a8387052",
-    #     "46407397-a7d5-4c6b-92c6-dbe038b1457b",
-    #     "2b9493d7-49b8-493a-a71b-56cd1f4d6908",
-    #     "51f5801c-18b3-4f25-b0c3-02f85507a078",
-    #     "2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
-    #     "510f64c8-9bcc-4be1-8d30-638705850618",
-    #     "937087b6-f668-4ba6-9110-60682ee33441",
-    #     "ee9a3c83-f437-4879-8918-be5efbb9fac7",
-    #     "3680a5ee-6870-426a-a997-eba929a0d25c",
-    #     "e135df7c-7687-4ac0-a5f0-76b74438b53e",
-    #     "58565672-7bfe-48ab-b828-db349231de6b",
-    #     "2fe4b718-3bd7-46ec-bdce-b184f5653624"
-    # ]
+    #     main("vs_code", example_id, gpt4_model="gemini-pro-vision")
    #
-    # for example_id in multiple_list:
-    #     try:
-    #         main("multi_apps", example_id)
-    #     except Exception as e:
-    #         logger.error("An error occurred while running the example: %s", e)
-    #         continue
+    # for example_id in gimp_list:
+    #     main("gimp", example_id, gpt4_model="gemini-pro-vision")

+    for example_id in chrome_list:
+        main("chrome", example_id, "gemini-pro-vision")
+
+    for example_id in chrome_list:
+        main("chrome", example_id)
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -24,36 +24,56 @@ def find_leaf_nodes(xlm_file_str):
    collect_leaf_nodes(root, leaf_nodes)
    return leaf_nodes

+state_ns = "uri:deskat:state.at-spi.gnome.org"
+component_ns = "uri:deskat:component.at-spi.gnome.org"
+def judge_node(node: ET, platform="ubuntu") -> bool:
+    keeps: bool = node.tag.startswith("document")\
+               or node.tag.endswith("item")\
+               or node.tag.endswith("button")\
+               or node.tag.endswith("heading")\
+               or node.tag.endswith("label")\
+               or node.tag.endswith("scrollbar")\
+               or node.tag.endswith("searchbox")\
+               or node.tag.endswith("textbox")\
+               or node.tag.endswith("link")\
+               or node.tag.endswith("tabelement")\
+               or node.tag.endswith("textfield")\
+               or node.tag.endswith("textarea")\
+               or node.tag.endswith("menu")\
+               or node.tag in [ "alert", "canvas", "check-box"
+                              , "combo-box", "entry", "icon"
+                              , "image", "paragraph", "scroll-bar"
+                              , "section", "slider", "static"
+                              , "table-cell", "terminal", "text"
+                              , "netuiribbontab", "start", "trayclockwclass"
+                              , "traydummysearchcontrol", "uiimage", "uiproperty"
+                              , "uiribboncommandbar"
+                              ]
+    keeps = keeps and ( platform=="ubuntu"\
+                        and node.get("{{{:}}}showing".format(state_ns), "false")=="true"\
+                        and node.get("{{{:}}}visible".format(state_ns), "false")=="true"\
+                     or platform=="windows"\
+                        and node.get("{{{:}}}visible".format(state_ns), "false")=="true"\
+                      )\
+                    and ( node.get("{{{:}}}enabled".format(state_ns), "false")=="true"\
+                       or node.get("{{{:}}}editable".format(state_ns), "false")=="true"\
+                       or node.get("{{{:}}}expandable".format(state_ns), "false")=="true"\
+                       or node.get("{{{:}}}checkable".format(state_ns), "false")=="true"
+                        )\
+                    and (node.get("name", "") != "" or node.text is not None and len(node.text)>0)

-def filter_nodes(nodes):
+    coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(component_ns), "(-1, -1)"))
+    sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(component_ns), "(-1, -1)"))
+    keeps = keeps and coordinates[0]>0 and coordinates[1]>0 and sizes[0]>0 and sizes[1]>0
+    return keeps
+
+def filter_nodes(root: ET, platform="ubuntu"):
    filtered_nodes = []

-    for node in nodes:
-        if not node.get('{uri:deskat:state.at-spi.gnome.org}visible', None) == 'true':
-            # Not visible
-            continue
-        # Check if the node is a 'panel'
-        if node.tag == 'panel':
-            # Check if the 'panel' represents an interactive element
-            # or if it has certain attributes that are of interest.
-            # Add your conditions here...
-            if node.get('{uri:deskat:state.at-spi.gnome.org}focusable', 'false') == 'true':
-                filtered_nodes.append(node)
-        elif node.tag == 'text':
-            continue
-        elif node.get("name") == "" and node.text is None:
-            continue
-        else:
-            coords = tuple(
-                map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
-            if coords[0] < 0 or coords[1] < 0:
-                continue
-            size = tuple(
-                map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
-            if size[0] <= 0 or size[1] <= 0:
-                continue
-            # Node is not a 'panel', add to the list.
+    for node in root.iter():
+        if judge_node(node, platform):
            filtered_nodes.append(node)
+            #print(ET.tostring(node, encoding="unicode"))

    return filtered_nodes

@@ -134,12 +154,14 @@ def print_nodes_with_indent(nodes, indent=0):


 if __name__ == '__main__':
-    with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
-        xml_file_str = f.read()
-    filtered_nodes = filter_nodes(find_leaf_nodes(xml_file_str))
+    import json
+    with open('4.json', 'r', encoding='utf-8') as f:
+        xml_file_str = json.load(f)["AT"]
+    filtered_nodes = filter_nodes(ET.fromstring(xml_file_str))
    print(len(filtered_nodes))
-    masks = draw_bounding_boxes(filtered_nodes, 'screenshot.png',
-                                'chrome_desktop_example_1_tagged_remove.png', )
+    masks = draw_bounding_boxes( filtered_nodes, '4.png'
+                               , '4.a.png'
+                               )

    # print(masks)
    print(len(masks))
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -8,6 +8,7 @@ import uuid
 from http import HTTPStatus
 from io import BytesIO
 from typing import Dict, List
+import xml.etree.ElementTree as ET

 import backoff
 import dashscope
@@ -40,8 +41,8 @@ def encode_image(image_path):


 def linearize_accessibility_tree(accessibility_tree):
-    leaf_nodes = find_leaf_nodes(accessibility_tree)
-    filtered_nodes = filter_nodes(leaf_nodes)
+    #leaf_nodes = find_leaf_nodes(accessibility_tree)
+    filtered_nodes = filter_nodes(ET.fromstring(accessibility_tree))

    linearized_accessibility_tree = "tag\tname\ttext\tposition\tsize\n"
    # Linearize the accessibility tree nodes into a table format
@@ -524,11 +525,12 @@ class GPT4v_Agent:

    @backoff.on_exception(
        backoff.expo,
-        (APIError, RateLimitError, APIConnectionError),
+        (Exception),
        max_tries=10
    )
    def call_llm(self, payload):
        if self.model.startswith("gpt"):
+            logger.info("Generating content with GPT model: %s", self.model)
            response = requests.post(
                "https://api.openai.com/v1/chat/completions",
                headers=self.headers,
@@ -590,7 +592,7 @@ class GPT4v_Agent:
            client = OpenAI(api_key=TOGETHER_API_KEY,
                            base_url='https://api.together.xyz',
                            )
-
+            logger.info("Generating content with Mistral model: %s", self.model)
            response = client.chat.completions.create(
                messages=misrtal_messages,
                model="mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -644,10 +646,18 @@ class GPT4v_Agent:
                gemini_messages[1]['parts'][0] = gemini_messages[0]['parts'][0] + "\n" + gemini_messages[1]['parts'][0]
                gemini_messages.pop(0)

+            # since the gemini-pro-vision donnot support multi-turn message
+            if self.model == "gemini-pro-vision":
+                message_history_str = ""
+                for message in gemini_messages:
+                    message_history_str += "<|" + message['role'] + "|>\n" + message['parts'][0] + "\n"
+                gemini_messages = [{"role": "user", "parts": [message_history_str, gemini_messages[-1]['parts'][1]]}]
+
            print(gemini_messages)
            api_key = os.environ.get("GENAI_API_KEY")
            assert api_key is not None, "Please set the GENAI_API_KEY environment variable"
            genai.configure(api_key=api_key)
+            logger.info("Generating content with Gemini model: %s", self.model)
            response = genai.GenerativeModel(self.model).generate_content(
                gemini_messages,
                generation_config={