fix conflict

2024-03-21 16:01:31 +08:00
parent 3d2ff5d64e d8c8627e78
commit ca03baacf5
53 changed files with 3072 additions and 298 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,9 @@
 *.pth
 *.pt

+# Credential files
+evaluation_examples/settings/googledrive/credentials.json
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
--- a/README.md
+++ b/README.md
@@ -23,8 +23,11 @@ Please refer to [guidance](https://docs.google.com/document/d/1KBdeZwmZs2Vi_Wsnn
 2. Install the environment package, download the examples and the virtual machine image.
 For x86_64 Linux or Windows, you can install the environment package and download the examples and the virtual machine image by running the following commands:
 ```bash
-pip install desktop-env
-gdown xxxx
+git clone https://github.com/xlang-ai/DesktopEnv
+cd DesktopEnv
+pip install -r requirements.txt
+gdown https://drive.google.com/drive/folders/1HX5gcf7UeyR-2UmiA15Q9U-
+Wr6E6Gio8 -O Ubuntu --folder
 vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
 vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
 ```
@@ -89,4 +92,4 @@ If you find this environment useful, please consider citing our work:
  journal={arXiv preprint arXiv:xxxx.xxxx},
  year={2024}
 }
-```
+```
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -64,14 +64,14 @@ class PythonController:
        It can be used to execute the pyautogui commands, or... any other python command. who knows?
        """
        # command_list = ["python", "-c", self.pkgs_prefix.format(command=command)]
-        command_list = ["python3", "-c", self.pkgs_prefix.format(command=command)]
+        command_list = ["python", "-c", self.pkgs_prefix.format(command=command)]
        payload = json.dumps({"command": command_list, "shell": False})
        headers = {
            'Content-Type': 'application/json'
        }

        try:
-            response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
+            response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=90)
            if response.status_code == 200:
                logger.info("Command executed successfully: %s", response.text)
            else:
@@ -263,16 +263,19 @@ class PythonController:
        """
        Ends recording the screen.
        """
-        response = requests.post(self.http_server + "/end_recording")
-        if response.status_code == 200:
-            logger.info("Recording stopped successfully")
-            with open(dest, 'wb') as f:
-                for chunk in response.iter_content(chunk_size=8192):
-                    if chunk:
-                        f.write(chunk)
-        else:
-            logger.error("Failed to stop recording. Status code: %d", response.status_code)
-            return None
+        try:
+            response = requests.post(self.http_server + "/end_recording")
+            if response.status_code == 200:
+                logger.info("Recording stopped successfully")
+                with open(dest, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        if chunk:
+                            f.write(chunk)
+            else:
+                logger.error("Failed to stop recording. Status code: %d", response.status_code)
+                return None
+        except Exception as e:
+            logger.error("An error occurred while trying to download the recording: %s", e)

    # Additional info
    def get_vm_platform(self):
@@ -341,4 +344,4 @@ class PythonController:
            return response.json()["directory_tree"]
        else:
            logger.error("Failed to get directory tree. Status code: %d", response.status_code)
-            return None
+            return None
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -58,7 +58,8 @@ class DesktopEnv(gym.Env):
            tmp_dir: str = "tmp",
            cache_dir: str = "cache",
            screen_size: Tuple[int] = (1920, 1080),
-            headless: bool = False
+            headless: bool = False,
+            require_a11y_tree: bool = True,
    ):
        """
        Args:
@@ -77,6 +78,7 @@ class DesktopEnv(gym.Env):
        self.cache_dir_base: str = cache_dir
        self.vm_screen_size = screen_size  # todo: add the logic to get the screen size from the VM
        self.headless = headless
+        self.require_a11y_tree = require_a11y_tree

        os.makedirs(self.tmp_dir_base, exist_ok=True)

@@ -248,7 +250,7 @@ class DesktopEnv(gym.Env):

        observation = {
            "screenshot": self._get_obs(),
-            "accessibility_tree": self.controller.get_accessibility_tree(),
+            "accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
        }
        return observation

@@ -284,8 +286,8 @@ class DesktopEnv(gym.Env):

        observation = {
            "screenshot": self._get_obs(),
-            "accessibility_tree": self.controller.get_accessibility_tree(),
-            "terminal": self.controller.get_terminal_output(),
+            "accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
+            # "terminal": self.controller.get_terminal_output(),
            "instruction": self.instruction
        }

--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -77,6 +77,7 @@ from .general import (
    literal_match
 )
 from .gimp import (
+    check_structure_sim_resized,
    check_brightness_decrease_and_structure_sim,
    check_contrast_increase_and_structure_sim,
    check_saturation_increase_and_structure_sim,
--- a/desktop_env/evaluators/metrics/gimp.py
+++ b/desktop_env/evaluators/metrics/gimp.py
@@ -430,11 +430,11 @@ def check_image_size(src_path, rule):
    img = Image.open(src_path)

    # Check the size
-    if rule["height"] is not None:
+    if rule.get("height", None) is not None:
        height_same = img.size[1] == rule["height"]
    else:
        height_same = True
-    if rule["width"] is not None:
+    if rule.get("width", None) is not None:
        width_same = img.size[0] == rule["width"]
    else:
        width_same = True
@@ -568,3 +568,51 @@ def check_image_file_size(src_path, rule):
        return 1.0
    else:
        return 0.0
+
+
+if __name__ == "__main__":
+    actual_config_path = "../../../cache/sessionrc_test"
+    rule = {
+        "key": "hide-docks",
+        "value": "no"
+    }
+    print(check_config_status(actual_config_path, rule))
+
+    actual_config_path = "../../../cache/action-history_test"
+    rule = {
+        "key": ["history-item", "\"filters-vignette\""],
+        "value": "1"
+    }
+    print(check_config_status(actual_config_path, rule))
+
+    actual_config_path = "../../../cache/gimprc_test"
+    rule = {
+        "key": "undo-levels",
+        "value": "100"
+    }
+    print(check_config_status(actual_config_path, rule))
+
+    src_path = "../../../cache/734d6579-c07d-47a8-9ae2-13339795476b/green_background_with_object.png"
+    tgt_path = "../../../cache/734d6579-c07d-47a8-9ae2-13339795476b/white_background_with_object.png"
+    print(check_green_background(src_path, tgt_path))
+
+    tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png"
+    print(check_triangle_position(tgt_path))
+
+    src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi_sharper.png"
+    tgt_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi.png"
+    print(check_sharper(src_path, tgt_path))
+
+    src_path = "../../../cache/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f/compressed.jpeg"
+    rule = {
+        "max_size": 500000
+    }
+    print(check_image_file_size(src_path, rule))
+
+    src_path = "../../../cache/d16c99dc-2a1e-46f2-b350-d97c86c85c15/resized.png"
+    tgt_path = "../../../cache/d16c99dc-2a1e-46f2-b350-d97c86c85c15/dog_with_background.png"
+    rule = {
+        "height": 512
+    }
+    print(check_image_size(src_path, rule))
+    print(check_structure_sim_resized(src_path, tgt_path))
--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -236,6 +236,9 @@ def check_html_background_image(src_path: str, rule: Dict = None) -> float:
    Check if the background image is correctly set.
    multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
    """
+    if not src_path:
+        return 0.0
+
    from bs4 import BeautifulSoup
    with open(src_path, 'r') as f:
        html_content = f.read()
@@ -252,6 +255,9 @@ def compare_result_files(src_path, tgt_path):
    Compare whether the content of two files are the same.
    multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85
    """
+    if not src_path or not tgt_path:
+        return 0.0
+
    with open(src_path, 'r') as f:
        src_content = f.read().strip()
    with open(tgt_path, 'r') as f:
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -63,7 +63,7 @@ def execute_command():

    # Execute the command without any safety checks.
    try:
-        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True)
+        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, timeout=120)
        return jsonify({
            'status': 'success',
            'output': result.stdout,
@@ -414,9 +414,18 @@ def _create_pywinauto_node(node: BaseWrapper, depth: int = 0, flag: Optional[str
    attribute_dict: Dict[str, Any] = {"name": node.element_info.name}

    #  States {{{ # 
-    attribute_dict["{{{:}}}enabled".format(_accessibility_ns_map["st"])] = str(node.is_enabled()).lower()
-    attribute_dict["{{{:}}}visible".format(_accessibility_ns_map["st"])] = str(node.is_visible()).lower()
-    attribute_dict["{{{:}}}active".format(_accessibility_ns_map["st"])] = str(node.is_active()).lower()
+    try:
+        attribute_dict["{{{:}}}enabled".format(_accessibility_ns_map["st"])] = str(node.is_enabled()).lower()
+    except:
+        pass
+    try:
+        attribute_dict["{{{:}}}visible".format(_accessibility_ns_map["st"])] = str(node.is_visible()).lower()
+    except:
+        pass
+    try:
+        attribute_dict["{{{:}}}active".format(_accessibility_ns_map["st"])] = str(node.is_active()).lower()
+    except:
+        pass

    if hasattr(node, "is_minimized"):
        try:
@@ -545,12 +554,14 @@ def _create_pywinauto_node(node: BaseWrapper, depth: int = 0, flag: Optional[str
    node_role_name = "".join( map( lambda ch: ch if ch.isidentifier()\
                                                 or ch in {"-"}\
                                                 or ch.isalnum()
-                              else "-"
+                                               else "-"
                                 , node_role_name
                                 )
                            )
    if node_role_name.strip() == "":
        node_role_name = "unknown"
+    if not node_role_name[0].isalpha():
+        node_role_name = "tag" + node_role_name

    xml_node = lxml.etree.Element(
        node_role_name,
@@ -601,9 +612,14 @@ def get_accessibility_tree():

@app.route('/screen_size', methods=['POST'])
 def get_screen_size():
-    d = display.Display()
-    screen_width = d.screen().width_in_pixels
-    screen_height = d.screen().height_in_pixels
+    if platform_name=="Linux":
+        d = display.Display()
+        screen_width = d.screen().width_in_pixels
+        screen_height = d.screen().height_in_pixels
+    elif platform_name=="Windows":
+        user32 = ctypes.windll.user32
+        screen_width: int = user32.GetSystemMetrics(0)
+        screen_height: int = user32.GetSystemMetrics(1)
    return jsonify(
        {
            "width": screen_width,
--- a/evaluation_examples/examples/Windows/multi_app/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json
+++ b/evaluation_examples/examples/Windows/multi_app/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json
@@ -0,0 +1,96 @@
+{
+  "id": "185f29bd-5da0-40a6-b69c-ba7f4e0324ef",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Transfer the data from our 'Employee Performance Evaluation Summary' Excel sheet into our standardized PDF evaluation forms. Each employee's evaluation data should be accurately filled into the designated fields of the PDF form. It's crucial that the final PDF documents retain a uniform and professional look, ready for distribution to our staff or for filing purposes. Furthermore, please ensure that each PDF file is named according to the employee's name as it appears in the Excel document. This will greatly streamline our evaluation process and enhance our efficiency in managing employee performance records. Oh, use \"√\" as mark on characters.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Employee Performance Evaluation Summary.xlsx",
+            "url": "https://drive.google.com/uc?id=1uOzi66bzO_WUnoS4Oqsodrd7_YPLatEk&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\review_template.pdf",
+            "url": "https://drive.google.com/uc?id=1YJ4RPGFUuS48tBh31gBerA16JSMw498w&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\Employee Performance Evaluation Summary.xlsx"
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 2
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\review_template.pdf"
+      }
+    }
+  ],
+  "trajectory": "trajectories/185f29bd-5da0-40a6-b69c-ba7f4e0324ef",
+  "related_apps": [
+    "libreoffice_calc",
+    "os",
+    "pdf"
+  ],
+  "evaluator": {
+    "func": "compare_pdfs",
+    "result": {
+      "type": "cloud_file",
+      "path": [
+        "https://drive.google.com/uc?id=1kZM90nA1krRmV9ug5_BBe8VlrZRVLiLK&export=download",
+        "https://drive.google.com/uc?id=1zyLzYYThwyit9ciXpfNfPFlBomolOauY&export=download",
+        "https://drive.google.com/uc?id=1gMT7JBftuymajMAO5rwksORpeVq3uGmH&export=download",
+        "https://drive.google.com/uc?id=1x0DdtUSZyBifl1tGIWlWKn255WusJeR4&export=download",
+        "https://drive.google.com/uc?id=1UAcG32WO8XCXElcanjGwbSpJwFuyOkts&export=download",
+        "https://drive.google.com/uc?id=1PRgryg7Y5evKnDG2LPtAttVp9qAf5VyZ&export=download",
+        "https://drive.google.com/uc?id=1JxEDriCS2W7BQLdkIgxu_WFCRa9ib4D7&export=download"
+      ],
+      "dest": [
+        "Alex Lee_Gold.pdf",
+        "David Wilson_Gold.pdf",
+        "Emily Johnson_Gold.pdf",
+        "John Doe_Gold.pdf",
+        "Linda Green_Gold.pdf",
+        "Michael Brown_Gold.pdf",
+        "Sophia Carter_Gold.pdf"
+      ],
+      "multi": true,
+      "gives": [0,1,2,3,4,5,6]
+    },
+    "expected": {
+      "type": "vm_file",
+      "path": [
+        "C:\\Users\\chenj\\Desktop\\Alex Lee.pdf",
+        "C:\\Users\\chenj\\Desktop\\David Wilson.pdf",
+        "C:\\Users\\chenj\\Desktop\\Emily Johnson.pdf",
+        "C:\\Users\\chenj\\Desktop\\John Doe.pdf",
+        "C:\\Users\\chenj\\Desktop\\Linda Green.pdf",
+        "C:\\Users\\chenj\\Desktop\\Michael Brown.pdf",
+        "C:\\Users\\chenj\\Desktop\\Sophia Carter.pdf"
+      ],
+      "dest": [
+        "Alex Lee.pdf",
+        "David Wilson.pdf",
+        "Emily Johnson.pdf",
+        "John Doe.pdf",
+        "Linda Green.pdf",
+        "Michael Brown.pdf",
+        "Sophia Carter.pdf"
+      ],
+      "multi": true,
+      "gives": [0,1,2,3,4,5,6]
+    }
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json
+++ b/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json
@@ -0,0 +1,109 @@
+{
+  "id": "1f18aa87-af6f-41ef-9853-cdb8f32ebdea",
+  "snapshot": "libreoffice_calc",
+  "instruction": "I've prepared some grammar tests and placed them in the 'Grammar test' folder. I've already provided the multiple-choice answers for Test 1 in the 'answer doc' file. Could you please follow the same format to write out the answers for the remaining two tests in the doc file? This way, I can distribute them to the students as a reference. Thank you.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "command",
+      "parameters": {
+        "command": "mkdir \"C:\\Users\\chenj\\Desktop\\students work\" \"C:\\Users\\chenj\\Desktop\\Lec powerpoint\" \"C:\\Users\\chenj\\Desktop\\Grammar test\" \"C:\\Users\\chenj\\Desktop\\Grammar rules PDF\" C:\\Users\\chenj\\Desktop\\FDI",
+		"shell": true
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammer test 1.docx",
+            "url": "https://drive.google.com/uc?id=1VaXQ9XdzMv079xKFs0Y2XrwdmwFHIvBK&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammer test 2.docx",
+            "url": "https://drive.google.com/uc?id=1k2T88WreTwi-Yyp9mEJnreEQC3DdkJ2x&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammer test 3.docx",
+            "url": "https://drive.google.com/uc?id=1QgyQWVOcAJuPaSlrywb9nuFiQDySsTb2&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Answer.docx",
+            "url": "https://drive.google.com/uc?id=1BC2DuWJuZggmf6fXl6Ys9xQMZzU6a1br&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules02.pdf",
+            "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules01.pdf",
+            "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\fragrules.pdf",
+            "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\csfsrules.pdf",
+            "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Public Lecture Teaching Plan.docx",
+            "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+            "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download"
+          }
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/1f18aa87-af6f-41ef-9853-cdb8f32ebdea",
+  "related_apps": [
+    "os",
+    "libreoffice_writer"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "Answer - Word",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      }
+    ],
+    "func": "compare_docx_files",
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1TOMGWC3OFuP6yEGQuRJMEFWdg2NcBPSs&export=download",
+      "dest": "Answer gold.docx"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\Answer.docx",
+      "dest": "Answer.docx"
+    },
+    "options": {
+      "ignore_case": true,
+      "ignore_blanks": true
+    }
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json
+++ b/evaluation_examples/examples/Windows/multi_app/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json
@@ -0,0 +1,68 @@
+{
+    "id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
+    "snapshot": "multiapps",
+    "instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to Documents\\Test\\Speed (if the dir does not exist, create it).",
+    "source": "https://www.speedtest.net/",
+    "config": [
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+					"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+                    "--remote-debugging-port=1337"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+              "command": [
+				"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+				"--sh-exec", "ncat.exe 127.0.0.1 1337"
+              ]
+            }            
+        },
+        {
+            "type": "chrome_open_tabs",
+            "parameters": {
+              "urls_to_open": [
+                "https://www.speedtest.net/"
+              ]
+            }
+          },
+          {
+            "type": "activate_window",
+            "parameters": {
+              "window_name": "Google Chrome"
+            }
+          },
+          {
+            "type": "execute",
+            "parameters": {
+              "command": [
+                "python",
+                "-c",
+                "import pyautogui; import time; time.sleep(0.5);"
+              ]
+            }
+          }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps":[
+        "os",
+        "browser"
+    ],
+    "evaluator":{
+        "func": "compare_time_in_speedtest_results",
+        "result":{
+            "type": "vm_file",
+            "path": "C:\\Users\\chenj\\Documents\\Test\\Speed\\Speedtest Results Export-.csv",
+            "dest": "Speedtest Results Export-.csv",
+            "time_suffix": true
+        },
+        "expected":{
+            "type": "time_diff_range",
+            "diff_range_in_minutes": "60"
+        }
+    }
+}
--- a/evaluation_examples/examples/Windows/multi_app/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json
+++ b/evaluation_examples/examples/Windows/multi_app/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json
@@ -0,0 +1,127 @@
+{
+  "id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. Making the necessary adjustments if it turns out that the current formatting does not align with APA 7 standards or exists some errors.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "command",
+      "parameters": {
+        "command": "mkdir \"C:\\Users\\chenj\\Desktop\\students work\" \"C:\\Users\\chenj\\Desktop\\Lec powerpoint\" \"C:\\Users\\chenj\\Desktop\\Grammar test\" \"C:\\Users\\chenj\\Desktop\\Grammar rules PDF\" C:\\Users\\chenj\\Desktop\\FDI",
+		"shell": true
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\Zheng He .docx",
+            "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\The literature reviews of weekly readings.docx",
+            "url": "https://drive.google.com/uc?id=18zoZCNtP-wTkxXp2FhH3O_NdLZKVMPIr&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\The British Justice System.docx",
+            "url": "https://drive.google.com/uc?id=1z3YHSN4CvC5kN1AwTWB_-plRS4p5GAch&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\quiz2.docx",
+            "url": "https://drive.google.com/uc?id=1R5Bii_kvnv_fZVXV-6DMt6Hgq-1gXMo1&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\quiz.docx",
+            "url": "https://drive.google.com/uc?id=1PvlGMVX7YkricrjoPRe0e5VQlHeozRPD&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\Q1&2&3.docx",
+            "url": "https://drive.google.com/uc?id=1kLQ3lnba6p9lqikHhKDdbqrYagHnZWU_&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\Photo Ethics in Journalism.docx",
+            "url": "https://drive.google.com/uc?id=1V6nG6HP_9Kb5KBCRTpaGsRTdPxnJSmRm&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\cassie.docx",
+            "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\case study.docx",
+            "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules02.pdf",
+            "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules01.pdf",
+            "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\fragrules.pdf",
+            "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\csfsrules.pdf",
+            "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Public Lecture Teaching Plan.docx",
+            "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+            "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download"
+          }
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e",
+  "related_apps": [
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "case study - Word",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      }
+    ],
+    "func": "compare_references",
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1325Qfch0JaJ_wJ20ICxMoHeW8KLpK8v0&export=download",
+      "dest": "case study gold.docx"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\students work\\case study.docx",
+      "dest": "case study.docx"
+    },
+    "options": {
+      "content_only": true,
+      "reference_base_result": 0.92
+    }
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/3a93cae4-ad3e-403e-8c12-65303b271818.json
+++ b/evaluation_examples/examples/Windows/multi_app/3a93cae4-ad3e-403e-8c12-65303b271818.json
@@ -0,0 +1,162 @@
+{
+  "id": "3a93cae4-ad3e-403e-8c12-65303b271818",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!",
+  "source": "authors",
+  "config": [
+    {
+      "type": "command",
+      "parameters": {
+        "command": "mkdir \"C:\\Users\\chenj\\Desktop\\students work\" \"C:\\Users\\chenj\\Desktop\\Lec powerpoint\" \"C:\\Users\\chenj\\Desktop\\Grammar test\" \"C:\\Users\\chenj\\Desktop\\Grammar rules PDF\" C:\\Users\\chenj\\Desktop\\FDI",
+		"shell": true
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\Zheng He .docx",
+            "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\cassie.docx",
+            "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\case study.docx",
+            "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules02.pdf",
+            "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules01.pdf",
+            "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\fragrules.pdf",
+            "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\csfsrules.pdf",
+            "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Public Lecture Teaching Plan.docx",
+            "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+            "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download"
+          }
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818",
+  "related_apps": [
+    "os",
+    "libreoffice_calc"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "Course Timetable - Excel",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      }
+    ],
+    "func": [
+      "compare_table",
+      "compare_table",
+      "compare_table"
+    ],
+    "result": [
+      {
+        "type": "vm_file",
+        "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+        "dest": "Course Timetable.xlsx"
+      },
+      {
+        "type": "vm_file",
+        "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+        "dest": "Course Timetable.xlsx"
+      },
+      {
+        "type": "vm_file",
+        "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+        "dest": "Course Timetable.xlsx"
+      }
+    ],
+    "expected": [
+      {
+        "type": "cloud_file",
+        "path": "https://drive.google.com/uc?id=1VMOon8byWuoCW2Uk5etGMJLMzAfwFVyB&export=download",
+        "dest": "Course Timetable gold.xlsx"
+      },
+      {
+        "type": "cloud_file",
+        "path": "https://drive.google.com/uc?id=1jAThiIqILZ5t-RFPHVniSvAL8ZJO1H3P&export=download",
+        "dest": "Course Timetable gold 2.xlsx"
+      },
+      {
+        "type": "cloud_file",
+        "path": "https://drive.google.com/uc?id=1U0THDtPCgsw-Rb0N9fjF8DeOepPeUajP&export=download",
+        "dest": "Course Timetable gold 3.xlsx"
+      }
+    ],
+    "options": [
+      {
+        "rules": [
+          {
+            "type": "sheet_data",
+            "sheet_idx0": "RNSheet1",
+            "sheet_idx1": "ENSheet1",
+            "ignore_case": true
+          }
+        ]
+      },
+      {
+        "rules": [
+          {
+            "type": "sheet_data",
+            "sheet_idx0": "RNSheet1",
+            "sheet_idx1": "ENSheet1",
+            "ignore_case": true
+          }
+        ]
+      },
+      {
+        "rules": [
+          {
+            "type": "sheet_data",
+            "sheet_idx0": "RNSheet1",
+            "sheet_idx1": "ENSheet1",
+            "ignore_case": true
+          }
+        ]
+      }
+    ],
+    "conj": "or"
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
+++ b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
@@ -0,0 +1,143 @@
+{
+    "id": "46407397-a7d5-4c6b-92c6-dbe038b1457b",
+    "snapshot": "chrome",
+    "instruction": "Help me export charts, graph or other images from docx files received in email \"Lecture Document\" in Notes folder and upload these png files to the figures/ folder in Google Drive for later use (use numbers to name them).",
+    "source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive",
+    "config": [
+        {
+            "type": "googledrive",
+            "parameters": {
+                "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+                "operation": ["delete"],
+                "args": [
+                    {
+                        "query": "title = 'figures' and 'root' in parents and mimeType = 'application/vnd.google-apps.folder'",
+                        "trash": false
+                    }
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+					"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+					"--remote-debugging-port=1337"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+					"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+					"--sh-exec", "ncat.exe 127.0.0.1 1337"
+				]
+            }
+        },
+        {
+            "type": "chrome_open_tabs",
+            "parameters": {
+                "urls_to_open": [
+                    "https://news.google.com",
+                    "https://x.com"
+                ]
+            }
+        },
+        {
+            "type": "login",
+            "parameters": {
+                "settings_file": "evaluation_examples/settings/google/settings.json",
+                "platform": "googledrive"
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+						"url": "https://drive.google.com/uc?id=1Yy-ZrkMq4pIQq1Y75bD2WVJXxHMTaMqE&export=download",
+                        "path": "C:\\Users\\chenj\\thunderbird-profile.7z"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "execute",
+            "parameters": {
+                "command": [
+					"C:\\Program Files\\7-Zip\\7z.exe",
+					"x", "C:\\Users\\chenj\\thunderbird-profile.7z"
+                ]
+            }
+        },
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "rd /s /q C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"shell": true
+			}
+		},
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "move C:\\Users\\chenj\\Thunderbird C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"shell": true
+			}
+		},
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "C:\\Program Files\\Mozilla Thunderbird\\thunderbird.exe"
+                ]
+            }
+        }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "thunderbird",
+        "chrome"
+    ],
+    "evaluator": {
+        "func": "compare_image_list",
+        "result": {
+            "type": "googledrive_file",
+            "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+            "query_list": [
+                [
+                    "title = 'figures' and trashed = false and 'root' in parents and mimeType = 'application/vnd.google-apps.folder'",
+                    "title = '1.png' and trashed = false"
+                ],
+                [
+                    "title = 'figures' and trashed = false and 'root' in parents and mimeType = 'application/vnd.google-apps.folder'",
+                    "title = '2.png' and trashed = false"
+                ],
+                [
+                    "title = 'figures' and trashed = false and 'root' in parents and mimeType = 'application/vnd.google-apps.folder'",
+                    "title = '3.png' and trashed = false"
+                ]
+            ],
+            "dest": [
+                "1.png",
+                "2.png",
+                "3.png"
+            ]
+        },
+        "expected": {
+            "type": "cloud_file",
+            "path": [
+                "https://drive.usercontent.google.com/download?id=19J5tzWjx9hdo-n0MC3upzAntVMa8WUgk&export=download&authuser=0&confirm=t&uuid=be790579-8db9-4bd2-a757-beb27af386af&at=APZUnTVM2PjNDXhlwFZ6WAFdNVsD:1706497547717",
+                "https://drive.usercontent.google.com/download?id=1S04RpR5dk80LylIYGvA4e3sAUBd6wdlQ&export=download&authuser=0&confirm=t&uuid=b302de03-04f7-455c-ab0c-b3cbbeb6929a&at=APZUnTVD8zMZGO1_GWaFUm1cNXul:1706497555463",
+                "https://drive.usercontent.google.com/download?id=11NRLh93RTzEd0Cy-cYwMyNJSFG7-vP9c&export=download&authuser=0&confirm=t&uuid=02500115-dea3-481a-af4f-a723d9a62169&at=APZUnTW9-gENlsyfdIPA4PTA0emh:1706497560874"
+            ],
+            "dest": [
+                "1_gold.png",
+                "2_gold.png",
+                "3_gold.png"
+            ],
+            "multi": true,
+            "gives": [0, 1, 2]
+        }
+    }
+}
--- a/evaluation_examples/examples/Windows/multi_app/6d72aad6-187a-4392-a4c4-ed87269c51cf.json
+++ b/evaluation_examples/examples/Windows/multi_app/6d72aad6-187a-4392-a4c4-ed87269c51cf.json
@@ -0,0 +1,18 @@
+{
+	"id": "6d72aad6-187a-4392-a4c4-ed87269c51cf",
+	"snapshot": "libreoffice_calc",
+	"instruction": "Could you please converting MS Office PowerPoint presentation to video and play it with VLC?",
+	"source": "https://superuser.com/questions/923171/converting-openoffice-impress-presentation-to-video-without-screen-recording",
+	"config": [
+    ],
+	"trajectory": "trajectories/6d72aad6-187a-4392-a4c4-ed87269c51cf",
+	"related_apps": [
+		"excel",
+		"powerpoint",
+		"word",
+		"vlc"
+    ],
+	"evaluator": {
+		"func": "infeasible"
+	}
+}
--- a/evaluation_examples/examples/Windows/multi_app/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
+++ b/evaluation_examples/examples/Windows/multi_app/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
@@ -0,0 +1,72 @@
+{
+	"id": "6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a",
+	"snapshot": "multiapps",
+	"instruction": "I now want to count the meeting cities of the three machine learning conferences in the past ten years from 2013 to 2019(including 2013 and 2019). I have listed the names and years of the conferences in excel. Please fill in the vacant locations.",
+	"source": "author",
+	"config": [
+		{
+			"type": "download",
+			"parameters": {
+				"files": [
+					{
+						"url": "https://drive.google.com/uc?export=download&id=19wUxTQeoKr6ihJWJ_9cu2tzKQH0cnxWH",
+						"path": "C:\\Users\\chenj\\Desktop\\ConferenceCity.xlsx"
+					}
+				]
+			}
+		},
+		{
+			"type": "open",
+			"parameters": {
+				"path": "C:\\Users\\chenj\\Desktop\\ConferenceCity.xlsx"
+			}
+		}
+	],
+	"trajectory": "trajectories/",
+	"related_apps": [
+		"calc", "chrome", "os"
+	],
+	"evaluator": {
+		"postconfig":[
+			{
+				"type": "download",
+				"parameters": {
+					"files": [
+						{
+							"url": "https://drive.google.com/uc?export=download&id=1ZcITkIOs2Z86S5L6MShSohFs3_xVfeCP",
+							"path": "C:\\Users\\chenj\\Desktop\\ConferenceCity_Gold.xlsx"
+						}
+					]
+				}
+			},
+			{
+				"type": "activate_window",
+				"parameters": {
+					"window_name": "ConferenceCity - Excel"
+				}
+			},
+			{
+				"type": "execute",
+				"parameters": {
+					"command": [
+						"python",
+						"-c",
+						"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+					]
+				}
+			}
+		],
+		"func": "compare_conference_city_in_order",
+		"expected": {
+			"type": "rule",
+			"rules":{
+				"expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
+			}
+		},
+		"result": {
+			"type": "vm_file",
+			"path": "C:\\Users\\chenj\\Desktop\\ConferenceCity.xlsx",
+			"dest": "ConferenceCity.xlsx"
+		}
+	}
+}
--- a/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
+++ b/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
@@ -0,0 +1,170 @@
+{
+	"id": "74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
+	"snapshot": "chrome",
+	"instruction": "Help me to set up an initial web extension project with help of the web tool, tagging it \"happy-extension v0.0.1\". Leave description blank for now. Include a background script and browser action, while other features are not required. Remember to unzip the auto-generated folder into \"Documents\\Projects\".",
+	"source": "authors",
+	"config": [
+		{
+			"type": "launch",
+			"parameters": {
+				"command": [
+					"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+					"--remote-debugging-port=1337"
+				]
+			}
+		},
+		{
+			"type": "launch",
+			"parameters": {
+				"command": [
+					"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+					"--sh-exec", "ncat.exe 127.0.0.1 1337"
+				]
+			}
+		},
+		{
+			"type": "chrome_open_tabs",
+			"parameters": {
+				"urls_to_open": [
+					"https://webext.eu"
+				]
+			}
+		},
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "mkdir C:\\Users\\chenj\\Documents\\Projects",
+				"shell": "true"
+			}
+		},
+		{
+			"type": "launch",
+			"parameters": {
+				"command": [
+					"explorer.exe",
+					"C:\\Users\\chenj\\Documents\\Projects"
+				]
+			}
+		}
+	],
+	"trajectory": "trajectories/74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
+	"related_apps": [
+		"chrome",
+		"os"
+	],
+	"evaluator": {
+		"func": [
+			"check_json",
+			"diff_text_file",
+			"diff_text_file",
+			"diff_text_file",
+			"diff_text_file"
+		],
+		"result": [
+			{
+				"type": "vm_file",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\manifest.json",
+				"dest": "manifest.json"
+			},
+			{
+				"type": "vm_file",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\background_script.js",
+				"dest": "background_script.js"
+			},
+			{
+				"type": "vm_file",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\browserAction\\index.html",
+				"dest": "index.html"
+			},
+			{
+				"type": "vm_file",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\browserAction\\style.css",
+				"dest": "style.css"
+			},
+			{
+				"type": "vm_file",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\browserAction\\script.js",
+				"dest": "script.js"
+			}
+		],
+		"expected": [
+			{
+				"type": "rule",
+				"rules": {
+					"expect": [
+						{
+							"key": [
+								"name"
+							],
+							"method": "eq",
+							"ref": "happy-extension"
+						},
+						{
+							"key": [
+								"version"
+							],
+							"method": "eq",
+							"ref": "0.0.1"
+						},
+						{
+							"key": [
+								"background",
+								"scripts"
+							],
+							"method": "eq",
+							"ref": [
+								"background_script.js"
+							]
+						},
+						{
+							"key": [
+								"browser_action",
+								"default_icon"
+							],
+							"method": "eq",
+							"ref": {
+								"64": "icons/icon.png"
+							}
+						},
+						{
+							"key": [
+								"browser_action",
+								"default_popup"
+							],
+							"method": "eq",
+							"ref": "browserAction/index.html"
+						},
+						{
+							"key": [
+								"browser_action",
+								"default_title"
+							],
+							"method": "eq",
+							"ref": "happy-extension"
+						}
+					]
+				}
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=1t5Llhn6seDUXVs-eILu6CjwFEQL9Z5Qm&export=download",
+				"dest": "background_script.js"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=19fMAsWd6q4ElLdOceJ-otHbxRJA_pc_U&export=download",
+				"dest": "index.html"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=1fwfiRPjdug8uh6z23RFO1JtlGH_L_Hl_&export=download",
+				"dest": "style.css"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=14YYnhCfRtHQNk8M4fBPaUQeteoFMGBsA&export=download",
+				"dest": "script.js"
+			}
+		]
+	}
+}
--- a/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
+++ b/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
@@ -0,0 +1,96 @@
+{
+    "id": "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
+    "snapshot": "chrome",
+    "instruction": "I have a LibreOffice Writer file form.docx on the desktop. Help me convert it to PDF format and store the PDF in the forms/ folder in my Google Drive.",
+    "source": "https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive",
+    "config": [
+        {
+            "type": "googledrive",
+            "parameters": {
+                "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+                "operation": ["delete"],
+                "args": [
+                    {
+                        "query": "title = 'form.pdf' or title = 'form.docx' or title = 'form.docx.pdf' or title = 'forms'",
+                        "trash": false
+                    }
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+                    "--remote-debugging-port=1337"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+					"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+					"--sh-exec", "ncat.exe 127.0.0.1 1337"
+                ]
+            }
+        },
+        {
+            "type": "chrome_open_tabs",
+            "parameters": {
+                "urls_to_open": [
+                    "https://www.zhihu.com/",
+                    "https://www.coursera.org/",
+                    "https://www.deepl.com",
+                    "https://www.wikidata.org/wiki/Wikidata:Main_Page"
+                ]
+            }
+        },
+        {
+            "type": "login",
+            "parameters": {
+                "settings_file": "evaluation_examples/settings/google/settings.json",
+                "platform": "googledrive"
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+              "files": [
+                {
+                  "url": "https://drive.usercontent.google.com/download?id=18TvzE8jnULU2g9XJsT-TaPEKcLGNVfu0&export=download&authuser=0&confirm=t&uuid=d914e031-9aa6-431b-81c0-73fcb87af027&at=APZUnTUx56WM_I3gnhHo-eZX__kx:1706158167271",
+                  "path": "C:\\Users\\chenj\\Desktop\\form.docx"
+                }
+              ]
+            }
+          },
+          {
+            "type": "open",
+            "parameters": {
+              "path": "C:\\Users\\chenj\\Desktop\\form.docx"
+            }
+          }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "libreoffice_writer",
+        "chrome"
+    ],
+    "evaluator": {
+        "func": "compare_pdfs",
+        "result": {
+            "type": "googledrive_file",
+            "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+            "query": [
+                "title = 'forms' and mimeType = 'application/vnd.google-apps.folder' and trashed = false",
+                "( title = 'form.pdf' or title = 'form.docx.pdf' ) and trashed = false"
+            ],
+            "dest": "form.pdf"
+        },
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=118wb7zmG8yP7DS1cImP9-GcOeKib3fLp&export=download&authuser=0&confirm=t&uuid=b82542fa-7731-4014-8ebc-d940f0fb83fe&at=APZUnTVkmL9rk3EpA0Ak5JLPEnJZ:1706101389421",
+            "dest": "form_gold.pdf"
+        }
+    }
+}
--- a/evaluation_examples/examples/Windows/multi_app/8e116af7-7db7-4e35-a68b-b0939c066c78.json
+++ b/evaluation_examples/examples/Windows/multi_app/8e116af7-7db7-4e35-a68b-b0939c066c78.json
@@ -0,0 +1,241 @@
+{
+  "id": "8e116af7-7db7-4e35-a68b-b0939c066c78",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Please update my bookkeeping sheet with the recent transactions from the provided folder, detailing my expenses over the past few days.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\my_bookkeeping.xlsx",
+            "url": "https://drive.google.com/uc?id=1QOSpTZPFzFZeC0tng4Gfws544LFln836&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_0.jpeg",
+            "url": "https://drive.google.com/uc?id=1b0BRc-BzXObVCUEonJfRbDsrgxZugj3U&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_1.jpg",
+            "url": "https://drive.google.com/uc?id=1S-JBDqwEf7Z_JXDItK_F4BOHgScTjlyN&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_2.jpg",
+            "url": "https://drive.google.com/uc?id=1Ys2abZi9_0y8sxuj2vCbC0OhjC6YdrC-&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_3.pdf",
+            "url": "https://drive.google.com/uc?id=1sKvBbGDpmUkv891xTqX7w5dtEvchQahd&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_4.jpg",
+            "url": "https://drive.google.com/uc?id=1kW7xH5bc2jRaKGDKHDrgSehTrPgkyzkc&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\my_bookkeeping.xlsx"
+      }
+    }
+  ],
+  "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78",
+  "related_apps": [
+    "libreoffice_calc",
+    "os",
+    "image",
+    "pdf"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "my_bookkeeping - Excel",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 1.0
+        }
+      }
+    ],
+    "func": "compare_table",
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\my_bookkeeping.xlsx",
+      "dest": "my_bookkeeping.xlsx"
+    },
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1ygEDdVlkf2ZyqMxJ_ktqo9G_g--rc6co&export=download",
+      "dest": "my_bookkeeping_gold.xlsx"
+    },
+    "options": {
+      "rules": [
+        {
+          "type": "sheet_fuzzy",
+          "sheet_idx0": "RNSheet1",
+          "sheet_idx1": "ENSheet1",
+          "rules": [
+            {
+              "range": [
+                "A1:A8",
+                "B1:B8",
+                "C1:C8",
+                "D1:D8",
+                "E1:E8"
+              ],
+              "type": "exact_match"
+            }
+          ]
+        },
+        {
+          "type": "sheet_fuzzy",
+          "sheet_idx0": "RNSheet1",
+          "sheet_idx1": "ENSheet1",
+          "rules": [
+            {
+              "range": [
+                "C9:C13"
+              ],
+              "type": "exact_match",
+              "ignore_case": true
+            }
+          ]
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D9",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -186.93
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D10",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3670
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D11",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -5.7
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D12",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -154.06
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D13",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -8.1
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E9",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": 603.07
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E10",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3066.93
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E11",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3072.63
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E12",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3226.69
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E13",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3234.79
+            }
+          }
+        }
+      ]
+    }
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json
+++ b/evaluation_examples/examples/Windows/multi_app/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json
@@ -0,0 +1,91 @@
+{
+  "id": "a82b78bb-7fde-4cb3-94a4-035baf10bcf0",
+  "snapshot": "libreoffice_calc",
+  "instruction": "I'm really enjoying this paper. Could you please locate the personal webpages of the initial author and the last three authors? Please include them in a browser bookmark folder titled 'Liked Authors.'",
+  "source": "authors",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+		  "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+		  "ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+		  "--sh-exec", "ncat.exe 127.0.0.1 1337"
+        ]
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 2
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1IlJ8kU5MlR6OqJHchsSUJzLCmcrG-8N7&export=download&authuser=0&confirm=t&uuid=d2a1810f-edea-4bfd-9d79-e668b9f11876&at=APZUnTVv_eqtC86YzkEU8_jIhC9W:1709522229162",
+            "path": "C:\\Users\\chenj\\Desktop\\2206.08853.pdf"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\2206.08853.pdf"
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 2
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+          "python",
+          "-c",
+          "import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-20)"
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/a82b78bb-7fde-4cb3-94a4-035baf10bcf0",
+  "related_apps": [
+    "chrome",
+    "pdf"
+  ],
+  "evaluator": {
+    "func": "is_expected_bookmarks",
+    "result": {
+      "type": "bookmarks"
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "type": "liked_authors_websites_urls",
+        "names": [
+          "Liked Authors"
+        ],
+        "urls": [
+          ["https://jimfan.me/", "https://research.nvidia.com/person/linxi-jim-fan"],
+          ["https://research.nvidia.com/person/de-an-huang", "https://ai.stanford.edu/~dahuang/"],
+          ["https://yukezhu.me/", "https://www.cs.utexas.edu/people/faculty-researchers/yuke-zhu", "https://experts.utexas.edu/yuke_zhu", "https://research.nvidia.com/person/yuke-zhu"],
+          ["http://tensorlab.cms.caltech.edu/users/anima/", "https://www.eas.caltech.edu/people/anima"]
+        ]
+      }
+    }
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
+++ b/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
@@ -0,0 +1,94 @@
+{
+	"id": "b5062e3e-641c-4e3a-907b-ac864d2e7652",
+	"snapshot": "libreoffice_calc",
+	"instruction": "Please help me to extract the name, e-mail, and affiliation of the first author from each paper in the folder and organize them in an Excel table. Include headers for each field. Sort the authors by their full names alphabetically and save the table as \"Documents\\authors.xlsx\".",
+	"source": "authors",
+	"config": [
+		{
+			"type": "command",
+			"parameters": {
+				"command": "mkdir C:\\Users\\chenj\\Documents\\Papers",
+				"shell": true
+			}
+		},
+		{
+			"type": "download",
+			"parameters": {
+				"files": [
+					{
+						"path": "C:\\Users\\chenj\\Documents\\Papers\\zhang_appagent.pdf",
+						"url": "https://arxiv.org/pdf/2312.13771.pdf"
+					},
+					{
+						"path": "C:\\Users\\chenj\\Documents\\Papers\\niu_screenagent.pdf",
+						"url": "https://arxiv.org/pdf/2402.07945.pdf"
+					},
+					{
+						"path": "C:\\Users\\chenj\\Documents\\Papers\\koh_visualwebarena.pdf",
+						"url": "https://arxiv.org/pdf/2401.13649.pdf"
+					},
+					{
+						"path": "C:\\Users\\chenj\\Documents\\Papers\\deng_mind2web.pdf",
+						"url": "https://papers.nips.cc/paper_files/paper/2023/file/5950bf290a1570ea401bf98882128160-Paper-Datasets_and_Benchmarks.pdf"
+					}
+				]
+			}
+		},
+		{
+			"type": "launch",
+			"parameters": {
+				"command": [
+					"explorer.exe",
+					"C:\\Users\\chenj\\Documents\\Papers"
+				]
+			}
+		}
+	],
+	"trajectory": "trajectories/b5062e3e-641c-4e3a-907b-ac864d2e7652",
+	"related_apps": [
+		"libreoffice_calc",
+		"os"
+	],
+	"evaluator": {
+		"func": "compare_table",
+		"result": {
+			"type": "vm_file",
+			"path": "C:\\Users\\chenj\\authors.xlsx",
+			"dest": "authors.xlsx"
+		},
+		"expected": {
+			"type": "cloud_file",
+			"path": "https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download",
+			"dest": "authors-gt.xlsx"
+		},
+		"options": {
+			"rules": [
+				{
+					"type": "sheet_fuzzy",
+					"sheet_idx0": "RNSheet1",
+					"sheet_idx1": "ENSheet1",
+					"rules": [
+						{
+							"range": ["A1:C1"],
+							"type": "includes",
+							"ignore_case": true
+						},
+						{
+							"range": ["A2:B5"],
+							"type": "exact_match",
+							"trim_leadings": " ",
+							"trim_trailings": " "
+						},
+						{
+							"range": ["C2:C5"],
+							"type": "exact_match",
+							"trim_leadings": " ",
+							"trim_trailings": " ",
+							"ignore_case": true
+						}
+					]
+				}
+			]
+		}
+	}
+}
--- a/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
+++ b/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
@@ -0,0 +1,106 @@
+{
+    "id": "c867c42d-a52d-4a24-8ae3-f75d256b5618",
+    "snapshot": "thunderbird",
+    "instruction": "Please assist me in exporting my contacts of Personal Address Book from Thunderbird into contacts.csv file in the desktop and convert it to .xlsx with Libreoffice Calc.",
+    "source": "https://www.sync.blue/en/sync/mozilla-thunderbird/google-sheets/",
+    "config": [
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "C:\\Program Files\\Microsoft Office\\root\\Office16\\EXCEL.EXE"
+                ]
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.google.com/uc?id=1njAaNiujlh1DZzGK7nL5iZsppsNAMkH7&export=download",
+                        "path": "C:\\Users\\chenj\\thunderbird-profile.7z"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "execute",
+            "parameters": {
+                "command": [
+					"C:\\Program Files\\7-Zip\\7z.exe",
+					"x", "C:\\Users\\chenj\\thunderbird-profile.7z"
+                ]
+            }
+        },
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "rd /s /q C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"shell": true
+			}
+		},
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "move C:\\Users\\chenj\\Thunderbird C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"shell": true
+			}
+		},
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "C:\\Program Files\\Mozilla Thunderbird\\thunderbird.exe"
+                ]
+            }
+        }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "thunderbird",
+        "libreoffice_calc"
+    ],
+    "evaluator": {
+        "func": [
+            "compare_csv",
+            "compare_table"
+        ],
+        "conj": "and",
+        "result": [
+            {
+                "type": "vm_file",
+                "path": "C:\\Users\\chenj\\Desktop\\contacts.csv",
+                "dest": "contacts.csv"
+            },
+            {
+                "type": "vm_file",
+                "path": "C:\\Users\\chenj\\Desktop\\contacts.xlsx",
+                "dest": "contacts.xlsx"
+            }
+        ],
+        "expected": [
+            {
+                "type": "cloud_file",
+                "path": "https://drive.usercontent.google.com/download?id=1StwASpAR2ALq2Y1vugGsdUJptg6FwjEm&export=download&authuser=0&confirm=t&uuid=56339e19-b889-4da1-ab72-5e0b90f13fff&at=APZUnTVWFF2pBrtWU_hXgzfbrWP2:1706719668676",
+                "dest": "contacts_gold.csv"
+            },
+            {
+                "type": "cloud_file",
+                "path": "https://drive.usercontent.google.com/download?id=1s25eUpvkMzSm6p_WA7O13t6mVqmkxr2C&export=download&authuser=0&confirm=t&uuid=901cbd32-6026-4391-a5cc-989e1047cf7c&at=APZUnTUs27mZceDshB_f9Tx4PFyz:1706719610831",
+                "dest": "contacts_gold.xlsx"
+            }
+        ],
+        "options": [
+            {},
+            {
+                "rules": [
+                    {
+                        "type": "sheet_data",
+                        "sheet_idx0": "RI0",
+                        "sheet_idx1": "EI0"
+                    }
+                ]
+            }
+        ]
+    }
+}
--- a/evaluation_examples/examples/Windows/multi_app/d1acdb87-bb67-4f30-84aa-990e56a09c92.json
+++ b/evaluation_examples/examples/Windows/multi_app/d1acdb87-bb67-4f30-84aa-990e56a09c92.json
@@ -0,0 +1,128 @@
+{
+  "id": "d1acdb87-bb67-4f30-84aa-990e56a09c92",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Hello! I'm eagerly planning a culinary adventure to Hong Kong and have curated a list of must-visit restaurants that I've been longing to explore. However, I could use some assistance in compiling a few essential details about these establishments. Would you be so kind as to help me out? It would be fantastic if you could search for these restaurants on Google Maps. I'm particularly interested in obtaining their addresses, any available websites, and contact phone numbers. If you could gather this information and input it into my form file, I would be immensely grateful. Many thanks in advance!",
+  "source": "authors",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\restaurants.txt",
+            "url": "https://drive.google.com/uc?id=1IehFLJPZcFv8Ujk31ExbyGLji9AylmmJ&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\MUST_VISIT.xlsx",
+            "url": "https://drive.google.com/uc?id=1fXmjvZcwkIcckMIAXi3Hv_JAbVWpgs_l&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\MUST_VISIT.xlsx"
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\restaurants.txt"
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 5
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "restaurants.txt"
+      }
+    }
+  ],
+  "trajectory": "trajectories/d1acdb87-bb67-4f30-84aa-990e56a09c92",
+  "related_apps": [
+    "os",
+    "chrome",
+    "libreoffice_calc"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "MUST_VISIT - Excel",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+						"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 1.0
+        }
+      }
+    ],
+    "func": "compare_table",
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\MUST_VISIT.xlsx",
+      "dest": "MUST_VISIT.xlsx"
+    },
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download",
+      "dest": "MUST_VISIT-gt.xlsx"
+    },
+    "options": {
+      "rules": [
+        {
+          "type": "sheet_fuzzy",
+          "sheet_idx0": "RNSheet1",
+          "sheet_idx1": "ENSheet1",
+          "rules": [
+            {
+              "range": ["A1:A6", "D1:D6"],
+              "type": "exact_match"
+            },
+            {
+              "range": ["B1:B6"],
+              "type": "fuzzy_match",
+              "threshold": 85,
+              "normalization": [
+                ["Rd", "Road"],
+                ["St", "Street"]
+              ],
+              "ignore_case": true
+            },
+            {
+              "range": ["C1:C6"],
+              "type": "includes",
+              "trim_leadings": "+ ",
+              "ignore_chars": " ()-"
+            }
+          ]
+        }
+      ]
+    }
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json
+++ b/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json
@@ -0,0 +1,79 @@
+{
+    "id": "da52d699-e8d2-4dc5-9191-a2199e0b6a9b",
+    "snapshot": "libreoffice_calc",
+    "instruction": "Examine the spreadsheet on the desktop, which contains a record of books read in 2022. Take the website https://howlongtoread.com/ as a reference to identify the book with the slowest reading pace, measured in words per day. I have an empty document named 'book_list_result.docx' on the desktop; please open it and record the title there.",
+    "source": "GAIA",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1JGZNCShtmpu7A8Z8lkjc8hdFEAMXZVvh&export=download&authuser=0&confirm=t&uuid=67063da6-2a72-4ed2-92b2-ade508439ce4&at=APZUnTUgS17YjX-D0oSvALwnPosB:1709368886960",
+                        "path": "C:\\Users\\chenj\\Desktop\\2023_validation_Book_Reading_Rate.xlsx"
+                    },
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1iySmK8zvTzgmERH7KQuESP05NBsMunhV&export=download&authuser=0&confirm=t&uuid=130f6cee-0f9a-4f2e-a84d-89a3b302f350&at=APZUnTXugQOTOApe1_zxUbafo2Sp:1709369519349",
+                        "path": "C:\\Users\\chenj\\Desktop\\book_list_result.docx"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "open",
+            "parameters": {
+                "path": "C:\\Users\\chenj\\Desktop\\2023_validation_Book_Reading_Rate.xlsx"
+            }
+        }
+    ],
+    "trajectory": "trajectories/da52d699-e8d2-4dc5-9191-a2199e0b6a9b",
+    "related_apps": [
+        "libreoffice_calc",
+        "chrome",
+        "libreoffice_writer"
+    ],
+    "evaluator": {
+        "func": "compare_docx_files",
+        "postconfig": [
+            {
+                "type": "activate_window",
+                "parameters": {
+                    "window_name": "book_list_result - Word",
+                    "strict": true
+                }
+            },
+            {
+                "type": "sleep",
+                "parameters": {
+                    "seconds": 0.5
+                }
+            },
+            {
+                "type": "execute",
+                "parameters": {
+                    "command": [
+                        "python",
+                        "-c",
+						"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+                    ]
+                }
+            },
+			{
+				"type": "sleep",
+				"parameters": {
+					"seconds": 0.5
+				}
+			}
+        ],
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=1rpvOlHZO0AqC85od8pJtx8YcDPljcejN&export=download&authuser=0&confirm=t&uuid=24a3a5e3-a188-4a41-ad01-a4709dc1c0b6&at=APZUnTWx56rr8-iTuXkfV5poOK-I:1709369145446",
+            "dest": "book_list_result_Gold.docx"
+        },
+        "result": {
+            "type": "vm_file",
+            "path": "C:\\Users\\chenj\\Desktop\\book_list_result.docx",
+            "dest": "book_list_result.docx"
+        }
+    }
+}
--- a/evaluation_examples/examples/Windows/multi_app/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json
+++ b/evaluation_examples/examples/Windows/multi_app/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json
@@ -0,0 +1,101 @@
+{
+  "id": "deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Find a paper list of all the new foundation language models issued on 11st Oct. 2023 via arxiv daily, and organize it into the sheet I opened.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\New Large Language Models.xlsx",
+            "url": "https://drive.google.com/uc?id=1NJFAUDzatd5TbBqXeCy3-ok4BWj-xayT&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\New Large Language Models.xlsx"
+      }
+    }
+  ],
+  "trajectory": "trajectories/deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
+  "related_apps": [
+    "libreoffice_calc",
+    "chrome",
+    "os"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "New Large Language Models - Excel",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 1.0
+        }
+      }
+    ],
+    "func": "compare_table",
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\New Large Language Models.xlsx",
+      "dest": "New Large Language Models.xlsx"
+    },
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1BHOyjFo72b74YKWTqPMaoNvCzICkos-G&export=download",
+      "dest": "New Large Language Models Gold.xlsx"
+    },
+    "options": {
+      "rules": [
+        {
+          "type": "sheet_fuzzy",
+          "sheet_idx0": "RNSheet1",
+          "sheet_idx1": "ENSheet1",
+          "rules": [
+            {
+              "range": [
+                "B2:B5",
+                "C2:C5"
+              ],
+              "type": "exact_match"
+            },
+            {
+              "range": [
+                "A2:A5"
+              ],
+              "type": "fuzzy_match",
+              "threshold": 90,
+              "ignore_case": true
+            }
+          ]
+        }
+      ]
+    }
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/e2392362-125e-4f76-a2ee-524b183a3412.json
+++ b/evaluation_examples/examples/Windows/multi_app/e2392362-125e-4f76-a2ee-524b183a3412.json
@@ -0,0 +1,121 @@
+{
+  "id": "e2392362-125e-4f76-a2ee-524b183a3412",
+  "snapshot": "chrome",
+  "instruction": "I recently started using the famous personal academic homepage template from academicpages.github.io to build my own personal homepage, and I have cloned it to my local Documents\\Code\\Website folder. According to an online tutorial, I can configure my name and contact information in the _config.yaml file. However, I am not familiar with the YAML file format. Please help me find the sections related to the name and contact information in this file and change them to \"Test Account\" and \"Test@gmail.com\".",
+  "source": "authors",
+  "config": [
+    {
+      "type": "command",
+      "parameters": {
+        "command": "mkdir C:\\Users\\chenj\\Documents\\Code\\Website",
+        "shell": true
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\.tmp.7z",
+            "url": "https://drive.google.com/uc?id=1LYc6rBSuCNBtTQIg-m9zP6KmlEB_Zfdo&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+					"C:\\Program Files\\7-Zip\\7z.exe",
+          "x", "-oC:\\Users\\chenj\\Documents\\Code\\Website",
+          "C:\\Users\\chenj\\.tmp.7z"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+					"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+					"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+					"--sh-exec", "ncat.exe 127.0.0.1 1337"
+				]
+      }
+    },
+    {
+      "type": "chrome_open_tabs",
+      "parameters": {
+        "urls_to_open": [
+          "https://academicpages.github.io/"
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/e2392362-125e-4f76-a2ee-524b183a3412",
+  "related_apps": [
+    "chrome",
+    "os",
+    "vscode"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);"
+          ]
+        }
+      }
+    ],
+    "func": "check_json",
+    "options": {
+      "is_yaml": true
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": [
+          {
+            "key": [
+              "name"
+            ],
+            "method": "eq",
+            "ref": "Test Account"
+          },
+          {
+            "key": [
+              "author",
+              "name"
+            ],
+            "method": "eq",
+            "ref": "Test Account"
+          },
+          {
+            "key": [
+              "author",
+              "email"
+            ],
+            "method": "eq",
+            "ref": "Test@gmail.com"
+          }
+        ]
+      }
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Documents\\Code\\Website\\academicpages.github.io\\_config.yml",
+      "dest": "_config.yaml"
+    }
+  }
+}
--- a/evaluation_examples/examples/Windows/multi_app/eb303e01-261e-4972-8c07-c9b4e7a4922a.json
+++ b/evaluation_examples/examples/Windows/multi_app/eb303e01-261e-4972-8c07-c9b4e7a4922a.json
@@ -0,0 +1,82 @@
+{
+    "id": "eb303e01-261e-4972-8c07-c9b4e7a4922a",
+    "snapshot": "libreoffice_impress",
+    "instruction": "Tomorrow, I'm scheduled to deliver a talk, and my PowerPoint slides and speaking notes are saved on the desktop. Help me insert my planned remarks for each slide into the \"note\" section of the PowerPoint as a reminder. I've completed this task for some slides; assist me in completing the remaining part.",
+    "source": "authors",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1MdgN8ECxzLgHgjq8eKqrNQt3MPDjnKwa&export=download&authuser=0&confirm=t&uuid=ed5c37db-c565-4ca1-bbd1-bbdba13e9306&at=APZUnTUNi8YTLlZqMZ0r--bBpBEG:1709449877819",
+                        "path": "C:\\Users\\chenj\\Desktop\\lecture1-2021-with-ink.pptx"
+                    },
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1FkPOcsWpsjUXSUld1NblwyVzcsE19uIe&export=download&authuser=0&confirm=t&uuid=27501bc0-732b-4ff7-abf4-a52427aea264&at=APZUnTWleaafIVF2iZkiuHo0vQ66:1709449873140",
+                        "path": "C:\\Users\\chenj\\Desktop\\notes.docx"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "open",
+            "parameters": {
+                "path": "C:\\Users\\chenj\\Desktop\\lecture1-2021-with-ink.pptx"
+            }
+        }
+    ],
+    "trajectory": "trajectories/eb303e01-261e-4972-8c07-c9b4e7a4922a",
+    "related_apps": [
+        "libreoffice_impress",
+        "libreoffice_writer"
+    ],
+    "evaluator": {
+        "postconfig": [
+            {
+                "type": "activate_window",
+                "parameters": {
+                    "window_name": "lecture1-2021-with-ink - PowerPoint",
+                    "strict": true
+                }
+            },
+            {
+                "type": "sleep",
+                "parameters": {
+                    "seconds": 5
+                }
+            },
+            {
+                "type": "execute",
+                "parameters": {
+                    "command": [
+                        "python",
+                        "-c",
+						"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+                    ]
+                }
+            },
+            {
+                "type": "sleep",
+                "parameters": {
+                    "seconds": 0.5
+                }
+            }
+        ],
+        "func": "compare_pptx_files",
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=18orj_0q6N4w7ijADOJeU5ZkDDw-RdFUl&export=download&authuser=0&confirm=t&uuid=c05d2bce-bccb-4504-8fe4-7c409788d727&at=APZUnTVlCicnIm0cMdJ9FrZg4MSN:1709453015475",
+            "dest": "lecture1-2021-with-ink_Gold.pptx"
+        },
+        "result": {
+            "type": "vm_file",
+            "path": "C:\\Users\\chenj\\Desktop\\lecture1-2021-with-ink.pptx",
+            "dest": "lecture1-2021-with-ink.pptx"
+        },
+        "options": {
+            "examine_shape": false,
+            "examine_bullets": false
+        }
+    }
+}
--- a/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
+++ b/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
@@ -0,0 +1,53 @@
+{
+    "id": "f918266a-b3e0-4914-865d-4faa564f1aef",
+    "snapshot": "vscode",
+    "instruction": "Please complete the code and retrieve the output from the Python script 'calculator.py' located on the desktop and save it as 'log.txt' in the same directory as the Python file.",
+    "source": "GAIA",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1l09TnSiXo-qOK2UazcIdrT_M6JwTfzq7&export=download&authuser=0&confirm=t&uuid=80bd550f-f3a6-4b69-ae0f-221c12b11fd9&at=APZUnTWgUlKuIDJZmkr0Q9Bze3w_:1709784652645",
+                        "path": "C:\\Users\\chenj\\Desktop\\calculator.zip"
+                    }
+                ]
+            }
+        },
+		{
+			"type": "execute",
+			"parameters": {
+				"command": [
+					"C:\\Program Files\\7-Zip\\7z.exe",
+					"C:\\Users\\chenj\\Desktop\\calculator.zip"
+				]
+			}
+		},
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "del C:\\Users\\chenj\\Desktop\\calculator.zip",
+				"shell": true
+			}
+		}
+    ],
+    "trajectory": "trajectories/f918266a-b3e0-4914-865d-4faa564f1aef",
+    "related_apps": [
+        "vscode",
+        "os"
+    ],
+    "evaluator": {
+        "func": "compare_text_file",
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=1-14AgA1nHNL22VD_3QtRzWaMjIBa3RvJ&export=download&authuser=0&confirm=t&uuid=6aa05bf1-4964-4f7b-8983-d28540b4053b&at=APZUnTXuJgDHIYA2FZl3A_OQJEOF:1709881263131",
+            "dest": "log_Gold.txt"
+        },
+        "result": {
+            "type": "vm_file",
+            "path": "C:\\Users\\chenj\\Desktop\\log.txt",
+            "dest": "log.txt"
+        }
+    }
+}
--- a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json
+++ b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json
@@ -86,13 +86,14 @@
    ],
    "func": [
      "check_image_size",
-      "check_structure_sim"
+      "check_structure_sim_resized"
    ],
    "expected": [
      {
-        "type": "vm_file",
-        "path": "/home/user/Desktop/dog_with_background.png",
-        "dest": "dog_with_background.png"
+        "type": "rule",
+        "rules": {
+          "height": 512
+        }
      },
      {
        "type": "vm_file",
@@ -102,10 +103,9 @@
    ],
    "result": [
      {
-        "type": "rule",
-        "rules": {
-          "height": 512
-        }
+        "type": "vm_file",
+        "path": "/home/user/Desktop/dog_with_background.png",
+        "dest": "dog_with_background.png"
      },
      {
        "type": "vm_file",
--- a/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json
+++ b/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json
@@ -63,6 +63,12 @@
      "type": "vm_file",
      "path": "/home/user/Desktop/saa-format-guide.pptx",
      "dest": "saa-format-guide.pptx"
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "color": "red"
+      }
    }
  }
 }
--- a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
+++ b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
@@ -30,12 +30,12 @@
    ],
    "evaluator": {
        "func": "check_brightness_decrease_and_structure_sim",
-        "expected": {
+        "result": {
            "type": "vm_file",
            "path": "/home/user/Desktop/background.png",
            "dest": "background.png"
        },
-        "result": {
+        "expected": {
            "type": "cloud_file",
            "path": "https://drive.usercontent.google.com/download?id=13if1UwZ5ay6ADAVW2jp3rcyvAEBse6MJ&export=download&authuser=0&confirm=t&uuid=2ea03068-1874-4240-baa1-f8bb2f917a99&at=APZUnTXq6dVlASg819jCaI1A-rm2:1710136385956",
            "dest": "image_original.png"
--- a/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json
+++ b/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json
@@ -9,7 +9,7 @@
            "parameters": {
              "files": [
                {
-                  "url": "https://drive.usercontent.google.com/download?id=1e12nL_V7bffaLSocQ86EiGCdygzggWeu&export=download",
+                  "url": "https://drive.usercontent.google.com/download?id=1epTcblcYh8j_wFtA-aiXPIF2Oo1IVw8A&export=download",
                  "path": "/home/user/Desktop/Dickinson_Slides.pptx"
                }
              ]
@@ -36,7 +36,7 @@
        },
        "expected": {
            "type": "cloud_file",
-            "path": "https://drive.usercontent.google.com/download?id=1Xl6tgQ0K5qA1BDA2fKTK2xFLzXwbtkZ6&export=download",
+            "path": "https://drive.usercontent.google.com/download?id=1vUvaQLJUtFgbZi7lSzl0y0TS_WecFczm&export=download",
            "dest": "notes_gold.docx"
        },
        "options": {
--- a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
+++ b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
@@ -1,50 +1,50 @@
 {
-    "id": "6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a",
-    "snapshot": "multiapps",
-    "instruction": "I now want to count the meeting cities of the three machine learning conferences in the past ten years from 2013 to 2019(including 2013 and 2019). I have listed the names and years of the conferences in excel. Please fill in the vacant locations.",
-    "source": "author",
-    "config": [
-      {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.google.com/uc?export=download&id=19wUxTQeoKr6ihJWJ_9cu2tzKQH0cnxWH",
-                    "path": "/home/user/Desktop/ConferenceCity.xlsx"
-                }
-            ]
-        }
-      },
-      {
-        "type": "open",
-        "parameters": {
-          "path": "/home/user/Desktop/ConferenceCity.xlsx"
-        }
-      }
-    ],
-    "trajectory": "trajectories/",
-    "related_apps": [
-      "calc", "chrome", "os"
-    ],
-    "evaluator": {
-      "postconfig":[      
-        {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.google.com/uc?export=download&id=1ZcITkIOs2Z86S5L6MShSohFs3_xVfeCP",
-                    "path": "/home/user/Desktop/ConferenceCity_Gold.xlsx"
-                }
-            ]
-        }
-      },
-      {
-        "type": "activate_window",
-        "parameters": {
-          "window_name": "ConferenceCity.xlsx - LibreOffice Calc"
-        }
-      },
+	"id": "6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a",
+	"snapshot": "multiapps",
+	"instruction": "I now want to count the meeting cities of the three machine learning conferences in the past ten years from 2013 to 2019(including 2013 and 2019). I have listed the names and years of the conferences in excel. Please fill in the vacant locations.",
+	"source": "author",
+	"config": [
+		{
+			"type": "download",
+			"parameters": {
+				"files": [
+					{
+						"url": "https://drive.google.com/uc?export=download&id=19wUxTQeoKr6ihJWJ_9cu2tzKQH0cnxWH",
+						"path": "/home/user/Desktop/ConferenceCity.xlsx"
+					}
+				]
+			}
+		},
+		{
+			"type": "open",
+			"parameters": {
+				"path": "/home/user/Desktop/ConferenceCity.xlsx"
+			}
+		}
+	],
+	"trajectory": "trajectories/",
+	"related_apps": [
+		"calc", "chrome", "os"
+	],
+	"evaluator": {
+		"postconfig":[
+			{
+				"type": "download",
+				"parameters": {
+					"files": [
+						{
+							"url": "https://drive.google.com/uc?export=download&id=1ZcITkIOs2Z86S5L6MShSohFs3_xVfeCP",
+							"path": "/home/user/Desktop/ConferenceCity_Gold.xlsx"
+						}
+					]
+				}
+			},
+			{
+				"type": "activate_window",
+				"parameters": {
+					"window_name": "ConferenceCity.xlsx - LibreOffice Calc"
+				}
+			},
 			{
 				"type": "execute",
 				"parameters": {
@@ -55,18 +55,18 @@
 					]
 				}
 			}
-      ],
-      "func": "compare_conference_city_in_order",
-      "expected": {
-        "type": "rule",
-        "rules":{
-          "expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
-        }
-      },
-      "result": {
-        "type": "vm_file",
-        "path": "/home/user/Desktop/ConferenceCity.xlsx",
-        "dest": "ConferenceCity.xlsx"
-      }
-    }
-  }
+		],
+		"func": "compare_conference_city_in_order",
+		"expected": {
+			"type": "rule",
+			"rules":{
+				"expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
+			}
+		},
+		"result": {
+			"type": "vm_file",
+			"path": "/home/user/Desktop/ConferenceCity.xlsx",
+			"dest": "ConferenceCity.xlsx"
+		}
+	}
+}
--- a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
+++ b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
@@ -38,7 +38,7 @@
 			}
 		},
 		{
-			"type": "execute",
+			"type": "launch",
 			"parameters": {
 				"command": [
 					"nautilus",
@@ -109,4 +109,4 @@
 			]
 		}
 	}
-}
+}
--- a/evaluation_examples/examples/multi_apps/demo.py
+++ b/evaluation_examples/examples/multi_apps/demo.py
@@ -1,19 +0,0 @@
-import pandas as pd
-
-file_path = "/Users/lxc/Downloads/Speedtest.csv"
-# 找到csv第二行的第二个数据格里的值
-# with open(file_path, "r") as f:
-#     for i, line in enumerate(f):
-#         if i == 1:
-#             data = line.split(",")[1]
-#             break
-# print(data)
-
-with open(file_path, "r") as f:
-    reader = pd.read_csv(f, sep=',', header=None)
-    # for column in reader.columns:
-    #     if column.startswith("TEST_DATE"):
-    #         data_col = column
-    #         break
-    for data in reader['TEST_DATE']:
-        print(data)
--- a/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json
+++ b/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json
@@ -31,7 +31,7 @@
        "command": [
          "tar",
          "-xJvf",
-          ".tmp.tar.xz",
+          "/home/user/.tmp.tar.xz",
          "-C",
          "/home/user/Code/Website/"
        ]
@@ -124,4 +124,4 @@
      "dest": "_config.yaml"
    }
  }
-}
+}
--- a/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json
+++ b/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json
@@ -62,4 +62,4 @@
      "dest": "settings.json"
    }
  }
-}
+}
--- a/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json
+++ b/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json
@@ -63,4 +63,4 @@
      "dest": "settings.json"
    }
  }
-}
+}
--- a/evaluation_examples/settings/google/settings.json
+++ b/evaluation_examples/settings/google/settings.json
@@ -1,4 +1,4 @@
 {
    "email": "xlang2024anonym@gmail.com",
-    "password": "q]wN~0iD>H:6"
-}
+    "password": "Evt5LLj!VJ6Y!C$B"
+}
--- a/evaluation_examples/settings/googledrive/credentials.json
+++ b/evaluation_examples/settings/googledrive/credentials.json
@@ -1 +0,0 @@
-{"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-13T10:09:01Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
--- a/evaluation_examples/test_all.json
+++ b/evaluation_examples/test_all.json
@@ -286,7 +286,6 @@
    "788b3701-3ec9-4b67-b679-418bfa726c22",
    "48c46dc7-fe04-4505-ade7-723cba1aa6f6",
    "42d25c08-fb87-4927-8b65-93631280a26f",
-    "bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
    "e8172110-ec08-421b-a6f5-842e6451911f",
    "42f4d1c7-4521-4161-b646-0a8934e36081",
    "3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
--- a/evaluation_examples/test_small.json
+++ b/evaluation_examples/test_small.json
@@ -70,7 +70,6 @@
    "c2751594-0cd5-4088-be1b-b5f2f9ec97c4",
    "48c46dc7-fe04-4505-ade7-723cba1aa6f6",
    "42d25c08-fb87-4927-8b65-93631280a26f",
-    "bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
    "3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
    "d68204bf-11c1-4b13-b48b-d303c73d4bf6",
    "91190194-f406-4cd6-b3f9-c43fac942b22",
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -2,6 +2,7 @@ import datetime
 import json
 import logging
 import os
+# import wandb

 from wrapt_timeout_decorator import *

@@ -13,7 +14,6 @@ with open("./settings.json", "r") as file:
    data = json.load(file)
 time_limit = data["time_limit"]

-
@timeout(time_limit, use_signals=False)
 def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
    agent.reset()
@@ -21,9 +21,9 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
    done = False
    step_idx = 0
    env.controller.start_recording()
-
+    # str_table = wandb.Table(columns=["Screenshot", "A11T", "Modle Response", "Action", "Action timestamp", "Done"])
    while not done and step_idx < max_steps:
-        actions = agent.predict(
+        response, actions = agent.predict(
            instruction,
            obs
        )
@@ -31,20 +31,22 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
            # Capture the timestamp before executing the action
            action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
            logger.info("Step %d: %s", step_idx + 1, action)
-
            obs, reward, done, info = env.step(action, args.sleep_after_execution)

            logger.info("Reward: %.2f", reward)
            logger.info("Done: %s", done)
-            logger.info("Info: %s", info)
-
            # Save screenshot and trajectory information
            with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"),
                      "wb") as _f:
                with open(obs['screenshot'], "rb") as __f:
                    screenshot = __f.read()
                _f.write(screenshot)
-
+            # get a11tree and save to wandb
+            # thisrun_a11tree = env.controller.get_accessibility_tree()
+            # str_table.add_data(wandb.Image(data_or_path=os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), caption=f"step_{step_idx + 1}_{action_timestamp}"),
+            #                 thisrun_a11tree,
+            #                 response, action, action_timestamp, done)
+            # run.log({"Reward": reward})
            with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                f.write(json.dumps({
                    "step_num": step_idx + 1,
@@ -56,14 +58,15 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
                    "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
                }))
                f.write("\n")
-
            if done:
                logger.info("The episode is done.")
                break
        step_idx += 1
+    # run.log({"str_trajectory": str_table})
    result = env.evaluate()
    logger.info("Result: %.2f", result)
    scores.append(result)
    with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
        f.write(f"{result}\n")
    env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+    # run.log({"Result": result})
--- a/main.py
+++ b/main.py
@@ -70,38 +70,6 @@ def human_agent():
    done = False
    logger.info('\x1b[32m[TASK INSTRUCTION]: \x1b[32;3m%s\x1b[0m', example["instruction"])

-    trajectory = [
-        {
-            "action_type": "MOVE_TO",        #
-            "parameters": {
-                "x": 754,
-                "y": 1057
-            }
-        },
-        {"action_type": "CLICK", "parameters": {"button": "right", "num_clicks": 1}}
-    ]
-
-    for i in range(len(trajectory)):
-        # action = get_human_action()
-
-        # action = {
-        #     "action_type": 0,
-        #     "click_type": 3,
-        # }
-        logger.info(trajectory[i])
-
-        observation, reward, done, info = env.step(trajectory[i])
-        observation.pop("accessibility_tree")
-        logger.info("Observation: %s", observation)
-        logger.info("Reward: %.2f", reward)
-        logger.info("Info: %s", info)
-
-        logger.info("================================\n")
-
-        if done:
-            logger.info("The episode is done.")
-            break
-
    input("Press Enter to start human operation...")
    human_start_time = time.time()
    input("Press Enter to finish human operation.")
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -2,7 +2,7 @@ import xml.etree.ElementTree as ET

 from PIL import Image, ImageDraw, ImageFont

-from typing import Tuple
+from typing import Tuple, List

 def find_leaf_nodes(xlm_file_str):
    if not xlm_file_str:
@@ -40,7 +40,7 @@ def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:
               or node.tag.endswith("textfield")\
               or node.tag.endswith("textarea")\
               or node.tag.endswith("menu")\
-               or node.tag in [ "alert", "canvas", "check-box"
+               or node.tag in { "alert", "canvas", "check-box"
                              , "combo-box", "entry", "icon"
                              , "image", "paragraph", "scroll-bar"
                              , "section", "slider", "static"
@@ -48,7 +48,7 @@ def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:
                              , "netuiribbontab", "start", "trayclockwclass"
                              , "traydummysearchcontrol", "uiimage", "uiproperty"
                              , "uiribboncommandbar"
-                              ]
+                              }
    keeps = keeps and ( platform=="ubuntu"\
                        and node.get("{{{:}}}showing".format(state_ns), "false")=="true"\
                        and node.get("{{{:}}}visible".format(state_ns), "false")=="true"\
@@ -66,7 +66,7 @@ def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:

    coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(component_ns), "(-1, -1)"))
    sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(component_ns), "(-1, -1)"))
-    keeps = keeps and coordinates[0]>0 and coordinates[1]>0 and sizes[0]>0 and sizes[1]>0
+    keeps = keeps and coordinates[0]>=0 and coordinates[1]>=0 and sizes[0]>0 and sizes[1]>0
    return keeps

 def filter_nodes(root: ET, platform="ubuntu", check_image=False):
@@ -86,6 +86,7 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
    draw = ImageDraw.Draw(image)
    marks = []
    drew_nodes = []
+    text_informations: List[str] = ["index\ttag\tname\ttext"]

    try:
        # Adjust the path to the font file you have or use a default one
@@ -135,18 +136,38 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
                #draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
                draw.rectangle(text_bbox, fill='black')
                draw.text(text_position, str(index), font=font, anchor="lb", fill="white")
-                index += 1

                # each mark is an x, y, w, h tuple
                marks.append([coords[0], coords[1], size[0], size[1]])
                drew_nodes.append(_node)

+                if _node.text:
+                    node_text = ( _node.text if '"' not in _node.text\
+                             else '"{:}"'.format(_node.text.replace('"', '""'))
+                                )
+                elif _node.get("{uri:deskat:uia.windows.microsoft.org}class", "").endswith("EditWrapper") \
+                        and _node.get("{uri:deskat:value.at-spi.gnome.org}value"):
+                    node_text: str = _node.get("{uri:deskat:value.at-spi.gnome.org}value")
+                    node_text = (node_text if '"' not in node_text\
+                             else '"{:}"'.format(node_text.replace('"', '""'))
+                                )
+                else:
+                    node_text = '""'
+                text_information: str = "{:d}\t{:}\t{:}\t{:}"\
+                                            .format( index, _node.tag
+                                                   , _node.get("name", "")
+                                                   , node_text
+                                                   )
+                text_informations.append(text_information)
+
+                index += 1
+
            except ValueError:
                pass

    # Save the result
    image.save(output_image_file_path)
-    return marks, drew_nodes
+    return marks, drew_nodes, "\n".join(text_informations)


 def print_nodes_with_indent(nodes, indent=0):
@@ -157,12 +178,12 @@ def print_nodes_with_indent(nodes, indent=0):

 if __name__ == '__main__':
    import json
-    with open('selection_sorted(imaged).xml', 'r', encoding='utf-8') as f:
+    with open('3.xml', 'r', encoding='utf-8') as f:
        xml_file_str = f.read()
    filtered_nodes = filter_nodes(ET.fromstring(xml_file_str))
    print(len(filtered_nodes))
-    masks = draw_bounding_boxes( filtered_nodes, 'selection_sorted(imaged).png'
-                               , 'selection_sorted(imaged).ai.png'
+    masks = draw_bounding_boxes( filtered_nodes, '3.a.png'
+                               , '3.png'
                               )

    # print(masks)
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -5,19 +5,22 @@ import os
 import re
 import time
 import uuid
-import openai
 import xml.etree.ElementTree as ET
+import numpy as np
 from http import HTTPStatus
 from io import BytesIO
-from typing import Dict, List
-from google.api_core.exceptions import InvalidArgument
+from typing import Dict, List, Tuple, Union
+
 import backoff
 import dashscope
 import google.generativeai as genai
+import openai
 import requests
+import cv2
 from PIL import Image
+from google.api_core.exceptions import InvalidArgument

-from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes, draw_bounding_boxes
+from mm_agents.accessibility_tree_wrap.heuristic_retrieve import filter_nodes, draw_bounding_boxes
 from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION, \
    SYS_PROMPT_IN_A11Y_OUT_CODE, SYS_PROMPT_IN_A11Y_OUT_ACTION, \
    SYS_PROMPT_IN_BOTH_OUT_CODE, SYS_PROMPT_IN_BOTH_OUT_ACTION, \
@@ -25,6 +28,14 @@ from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_S

 logger = logging.getLogger("desktopenv.agent")

+def downsample_image(img: Union[str, np.ndarray], ratio: Tuple[float, float]):
+    fx, fy = ratio
+    if isinstance(img, str):
+        img = cv2.imread(img)
+
+    resized = cv2.resize(img, None, fx=fx, fy=fy, interpolation=cv2.INTER_AREA)
+    return resized
+

 # Function to encode the image
 def encode_image(image_path):
@@ -36,27 +47,36 @@ def linearize_accessibility_tree(accessibility_tree):
    # leaf_nodes = find_leaf_nodes(accessibility_tree)
    filtered_nodes = filter_nodes(ET.fromstring(accessibility_tree))

-    linearized_accessibility_tree = "tag\tname\ttext\tposition (top-left x&y)\tsize (w&h)\n"
+    linearized_accessibility_tree = ["tag\tname\ttext\tposition (top-left x&y)\tsize (w&h)"]
    # Linearize the accessibility tree nodes into a table format

    for node in filtered_nodes:
-        linearized_accessibility_tree += node.tag + "\t"
-        linearized_accessibility_tree += node.attrib.get('name') + "\t"
+        #linearized_accessibility_tree += node.tag + "\t"
+        #linearized_accessibility_tree += node.attrib.get('name') + "\t"
        if node.text:
-            linearized_accessibility_tree += (node.text if '"' not in node.text else '"{:}"'.format(
-                node.text.replace('"', '""'))) + "\t"
+            text = ( node.text if '"' not in node.text\
+                else '"{:}"'.format(node.text.replace('"', '""'))
+                   )
        elif node.get("{uri:deskat:uia.windows.microsoft.org}class", "").endswith("EditWrapper") \
                and node.get("{uri:deskat:value.at-spi.gnome.org}value"):
            text: str = node.get("{uri:deskat:value.at-spi.gnome.org}value")
-            linearized_accessibility_tree += (text if '"' not in text else '"{:}"'.format(
-                text.replace('"', '""'))) + "\t"
+            text = (text if '"' not in text\
+                else '"{:}"'.format(text.replace('"', '""'))
+                   )
        else:
-            linearized_accessibility_tree += '""\t'
-        linearized_accessibility_tree += node.attrib.get(
-            '{uri:deskat:component.at-spi.gnome.org}screencoord', "") + "\t"
-        linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size', "") + "\n"
+            text = '""'
+        #linearized_accessibility_tree += node.attrib.get(
+                #, "") + "\t"
+        #linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size', "") + "\n"
+        linearized_accessibility_tree.append(
+                "{:}\t{:}\t{:}\t{:}\t{:}".format(
+                    node.tag, node.get("name", ""), text
+                  , node.get('{uri:deskat:component.at-spi.gnome.org}screencoord', "")
+                  , node.get('{uri:deskat:component.at-spi.gnome.org}size', "")
+                  )
+              )

-    return linearized_accessibility_tree
+    return "\n".join(linearized_accessibility_tree)


 def tag_screenshot(screenshot, accessibility_tree):
@@ -67,9 +87,9 @@ def tag_screenshot(screenshot, accessibility_tree):
    # nodes = filter_nodes(find_leaf_nodes(accessibility_tree))
    nodes = filter_nodes(ET.fromstring(accessibility_tree), check_image=True)
    # Make tag screenshot
-    marks, drew_nodes = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)
+    marks, drew_nodes, element_list = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)

-    return marks, drew_nodes, tagged_screenshot_file_path
+    return marks, drew_nodes, tagged_screenshot_file_path, element_list


 def parse_actions_from_string(input_string):
@@ -255,7 +275,6 @@ class PromptAgent:
            if self.observation_type == "screenshot_a11y_tree":
                _screenshot = previous_obs["screenshot"]
                _linearized_accessibility_tree = previous_obs["accessibility_tree"]
-                logger.debug("LINEAR AT: %s", _linearized_accessibility_tree)

                messages.append({
                    "role": "user",
@@ -341,7 +360,8 @@ class PromptAgent:
        # {{{1
        if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
            base64_image = encode_image(obs["screenshot"])
-            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"]) if self.observation_type == "screenshot_a11y_tree" else None
+            logger.debug("LINEAR AT: %s", linearized_accessibility_tree)

            if self.observation_type == "screenshot_a11y_tree":
                self.observations.append({
@@ -375,6 +395,7 @@ class PromptAgent:
            })
        elif self.observation_type == "a11y_tree":
            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+            logger.debug("LINEAR AT: %s", linearized_accessibility_tree)

            self.observations.append({
                "screenshot": None,
@@ -393,11 +414,13 @@ class PromptAgent:
            })
        elif self.observation_type == "som":
            # Add som to the screenshot
-            masks, drew_nodes, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
+            masks, drew_nodes, tagged_screenshot, linearized_accessibility_tree = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
            base64_image = encode_image(tagged_screenshot)
+            logger.debug("LINEAR AT: %s", linearized_accessibility_tree)

            self.observations.append({
-                "screenshot": base64_image
+                "screenshot": base64_image,
+                "accessibility_tree": linearized_accessibility_tree
            })

            messages.append({
@@ -405,7 +428,8 @@ class PromptAgent:
                "content": [
                    {
                        "type": "text",
-                        "text": "Given the tagged screenshot as below. What's the next step that you will do to help with the task?"
+                        "text": "Given the tagged screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                            linearized_accessibility_tree)
                    },
                    {
                        "type": "image_url",
@@ -422,7 +446,8 @@ class PromptAgent:
        # with open("messages.json", "w") as f:
        #     f.write(json.dumps(messages, indent=4))

-        logger.info("Generating content with GPT model: %s", self.model)
+        #logger.info("PROMPT: %s", messages)
+
        response = self.call_llm({
            "model": self.model,
            "messages": messages,
@@ -441,7 +466,7 @@ class PromptAgent:
            actions = None
            self.thoughts.append("")

-        return actions
+        return response, actions

    @backoff.on_exception(
        backoff.expo,
@@ -461,7 +486,7 @@ class PromptAgent:
                "Content-Type": "application/json",
                "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
            }
-            # logger.info("Generating content with GPT model: %s", self.model)
+            logger.info("Generating content with GPT model: %s", self.model)
            response = requests.post(
                "https://api.openai.com/v1/chat/completions",
                headers=headers,
@@ -495,7 +520,7 @@ class PromptAgent:
            temperature = payload["temperature"]

            claude_messages = []
-            
+
            for i, message in enumerate(messages):
                claude_message = {
                    "role": message["role"],
@@ -503,17 +528,17 @@ class PromptAgent:
                }
                assert len(message["content"]) in [1, 2], "One text, or one text with one image"
                for part in message["content"]:
-                    
+
                    if part['type'] == "image_url":
                        image_source = {}
                        image_source["type"] = "base64"
                        image_source["media_type"] = "image/png"
                        image_source["data"] = part['image_url']['url'].replace("data:image/png;base64,", "")
                        claude_message['content'].append({"type": "image", "source": image_source})
-                        
+
                    if part['type'] == "text":
                        claude_message['content'].append({"type": "text", "text": part['text']})
-            
+
                claude_messages.append(claude_message)

            # the claude not support system message in our endpoint, so we concatenate it at the first user message
@@ -522,83 +547,155 @@ class PromptAgent:
                claude_messages[1]['content'].insert(0, claude_system_message_item)
                claude_messages.pop(0)

+            logger.debug("CLAUDE MESSAGE: %s", repr(claude_messages))
+
+            # headers = {
+            #     "x-api-key": os.environ["ANTHROPIC_API_KEY"],
+            #     "anthropic-version": "2023-06-01",
+            #     "content-type": "application/json"
+            # }
+
+            # headers = {
+            #     "Accept": "application / json",
+            #     "Authorization": "Bearer " + os.environ["ANTHROPIC_API_KEY"],
+            #     "User-Agent": "Apifox/1.0.0 (https://apifox.com)",
+            #     "Content-Type": "application/json"
+            # }

            headers = {
-                "x-api-key": os.environ["ANTHROPIC_API_KEY"],
-                "anthropic-version": "2023-06-01",
-                "content-type": "application/json"
+                "Authorization": os.environ["ANTHROPIC_API_KEY"],
+                "Content-Type": "application/json"
            }

+
+
            payload = {
                "model": self.model,
                "max_tokens": max_tokens,
-                "messages": claude_messages
+                "messages": claude_messages,
+                "temperature": temperature,
+                "top_p": top_p
            }

-            response = requests.post(
-                "https://api.anthropic.com/v1/messages",
-                headers=headers,
-                json=payload
-            )
-            
-            if response.status_code != 200:
-
-                logger.error("Failed to call LLM: " + response.text)
-                time.sleep(5)
-                return ""
+            max_attempts = 20
+            attempt = 0
+            while attempt < max_attempts:
+                # response = requests.post("https://api.aigcbest.top/v1/chat/completions", headers=headers, json=payload)
+                response = requests.post("https://token.cluade-chat.top/v1/chat/completions", headers=headers, json=payload)
+                if response.status_code == 200:
+                    result = response.json()['choices'][0]['message']['content']
+                    break
+                else:
+                    logger.error(f"Failed to call LLM: {response.text}")
+                    time.sleep(10)
+                    attempt += 1
            else:
-                return response.json()['content'][0]['text']
+                print("Exceeded maximum attempts to call LLM.")
+                result = ""
+                
+            return result


-        # elif self.model.startswith("mistral"):
-        #     print("Call mistral")
-        #     messages = payload["messages"]
-        #     max_tokens = payload["max_tokens"]
-        #
-        #     misrtal_messages = []
-        #
-        #     for i, message in enumerate(messages):
-        #         mistral_message = {
-        #             "role": message["role"],
-        #             "content": []
-        #         }
-        #
-        #         for part in message["content"]:
-        #             mistral_message['content'] = part['text'] if part['type'] == "text" else None
-        #
-        #         misrtal_messages.append(mistral_message)
-        #
-        #     # the mistral not support system message in our endpoint, so we concatenate it at the first user message
-        #     if misrtal_messages[0]['role'] == "system":
-        #         misrtal_messages[1]['content'] = misrtal_messages[0]['content'] + "\n" + misrtal_messages[1]['content']
-        #         misrtal_messages.pop(0)
-        #
-        #     # openai.api_base = "http://localhost:8000/v1"
-        #     # openai.api_key = "test"
-        #     # response = openai.ChatCompletion.create(
-        #     #     messages=misrtal_messages,
-        #     #     model="Mixtral-8x7B-Instruct-v0.1"
-        #     # )
-        #
-        #     from openai import OpenAI
-        #     TOGETHER_API_KEY = "d011650e7537797148fb6170ec1e0be7ae75160375686fae02277136078e90d2"
-        #
-        #     client = OpenAI(api_key=TOGETHER_API_KEY,
-        #                     base_url='https://api.together.xyz',
-        #                     )
-        #     logger.info("Generating content with Mistral model: %s", self.model)
-        #     response = client.chat.completions.create(
-        #         messages=misrtal_messages,
-        #         model="mistralai/Mixtral-8x7B-Instruct-v0.1",
-        #         max_tokens=1024
-        #     )
-        #
-        #     try:
-        #         # return response['choices'][0]['message']['content']
-        #         return response.choices[0].message.content
-        #     except Exception as e:
-        #         print("Failed to call LLM: " + str(e))
-        #         return ""
+        elif self.model.startswith("mistral"):
+            print("Call mistral")
+            messages = payload["messages"]
+            max_tokens = payload["max_tokens"]
+            top_p = payload["top_p"]
+            temperature = payload["temperature"]
+
+            mistral_messages = []
+
+            for i, message in enumerate(messages):
+                mistral_message = {
+                    "role": message["role"],
+                    "content": ""
+                }
+
+                for part in message["content"]:
+                    mistral_message['content'] = part['text'] if part['type'] == "text" else ""
+
+                mistral_messages.append(mistral_message)
+
+
+            from openai import OpenAI
+
+            client = OpenAI(api_key=os.environ["TOGETHER_API_KEY"],
+                            base_url='https://api.together.xyz',
+                            )
+            logger.info("Generating content with Mistral model: %s", self.model)
+            
+            flag = 0
+            while True:
+                try:
+                    if flag > 20: break
+                    response = client.chat.completions.create(
+                        messages=mistral_messages,
+                        model=self.model,
+                        max_tokens=max_tokens
+                    )
+                    break
+                except:
+                    if flag == 0:
+                        mistral_messages = [mistral_messages[0]] + mistral_messages[-1:]
+                    else:
+                        mistral_messages[-1]["content"] = ' '.join(mistral_messages[-1]["content"].split()[:-500])
+                    flag = flag + 1
+
+            try:
+                return response.choices[0].message.content
+            except Exception as e:
+                print("Failed to call LLM: " + str(e))
+                return ""
+
+        elif self.model.startswith("THUDM"):
+            # THUDM/cogagent-chat-hf
+            print("Call CogAgent")
+            messages = payload["messages"]
+            max_tokens = payload["max_tokens"]
+            top_p = payload["top_p"]
+            temperature = payload["temperature"]
+
+            cog_messages = []
+
+            for i, message in enumerate(messages):
+                cog_message = {
+                    "role": message["role"],
+                    "content": []
+                }
+
+                for part in message["content"]:
+                    if part['type'] == "image_url":
+                        cog_message['content'].append(
+                            {"type": "image_url", "image_url": {"url": part['image_url']['url']}})
+
+                    if part['type'] == "text":
+                        cog_message['content'].append({"type": "text", "text": part['text']})
+
+                cog_messages.append(cog_message)
+
+            # the cogagent not support system message in our endpoint, so we concatenate it at the first user message
+            if cog_messages[0]['role'] == "system":
+                cog_system_message_item = cog_messages[0]['content'][0]
+                cog_messages[1]['content'].insert(0, cog_system_message_item)
+                cog_messages.pop(0)
+
+            payload = {
+                "model": self.model,
+                "max_tokens": max_tokens,
+                "messages": cog_messages
+            }
+
+            base_url = "http://127.0.0.1:8000"
+
+            response = requests.post(f"{base_url}/v1/chat/completions", json=payload, stream=False)
+            if response.status_code == 200:
+                decoded_line = response.json()
+                content = decoded_line.get("choices", [{}])[0].get("message", "").get("content", "")
+                return content
+            else:
+                print("Failed to call LLM: ", response.status_code)
+                return ""
+

        elif self.model.startswith("gemini"):
            def encoded_img_to_pil_img(data_str):
@@ -674,6 +771,7 @@ class PromptAgent:
            try:
                return response.text
            except Exception as e:
+                logger.error("Meet exception when calling Gemini API, " + str(e))
                return ""
        elif self.model.startswith("qwen"):
            messages = payload["messages"]
@@ -706,7 +804,7 @@ class PromptAgent:
            if response.status_code == HTTPStatus.OK:
                try:
                    return response.json()['output']['choices'][0]['message']['content']
-                except Exception as e:
+                except Exception:
                    return ""
            else:
                print(response.code)  # The error code.
--- a/mm_agents/download_ckpt.sh
+++ b/mm_agents/download_ckpt.sh
@@ -1,3 +0,0 @@
-wget https://github.com/UX-Decoder/Semantic-SAM/releases/download/checkpoint/swinl_only_sam_many2many.pth
-wget https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v1.pt
-wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
--- a/mm_agents/llm_server/CogAgent/CogAgent.py
+++ b/mm_agents/llm_server/CogAgent/CogAgent.py
@@ -0,0 +1,405 @@
+import os
+import gc
+import time
+import base64
+
+from contextlib import asynccontextmanager
+from typing import List, Literal, Union, Tuple, Optional
+import torch
+import uvicorn
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from loguru import logger
+from pydantic import BaseModel, Field
+from sse_starlette.sse import EventSourceResponse
+from transformers import AutoModelForCausalLM, LlamaTokenizer, PreTrainedModel, PreTrainedTokenizer, \
+    TextIteratorStreamer
+from PIL import Image
+from io import BytesIO
+
+MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/cogvlm-chat-hf')
+TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", 'lmsys/vicuna-7b-v1.5')
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+if os.environ.get('QUANT_ENABLED'):
+    QUANT_ENABLED = True
+else:
+    with torch.cuda.device(DEVICE):
+        __, total_bytes = torch.cuda.mem_get_info()
+        total_gb = total_bytes / (1 << 30)
+        if total_gb < 40:
+            QUANT_ENABLED = True
+        else:
+            QUANT_ENABLED = False
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    An asynchronous context manager for managing the lifecycle of the FastAPI app.
+    It ensures that GPU memory is cleared after the app's lifecycle ends, which is essential for efficient resource management in GPU environments.
+    """
+    yield
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+
+
+app = FastAPI(lifespan=lifespan)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+class ModelCard(BaseModel):
+    """
+    A Pydantic model representing a model card, which provides metadata about a machine learning model.
+    It includes fields like model ID, owner, and creation time.
+    """
+    id: str
+    object: str = "model"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    owned_by: str = "owner"
+    root: Optional[str] = None
+    parent: Optional[str] = None
+    permission: Optional[list] = None
+
+
+class ModelList(BaseModel):
+    object: str = "list"
+    data: List[ModelCard] = []
+
+
+class ImageUrl(BaseModel):
+    url: str
+
+
+class TextContent(BaseModel):
+    type: Literal["text"]
+    text: str
+
+
+class ImageUrlContent(BaseModel):
+    type: Literal["image_url"]
+    image_url: ImageUrl
+
+
+ContentItem = Union[TextContent, ImageUrlContent]
+
+
+class ChatMessageInput(BaseModel):
+    role: Literal["user", "assistant", "system"]
+    content: Union[str, List[ContentItem]]
+    name: Optional[str] = None
+
+
+class ChatMessageResponse(BaseModel):
+    role: Literal["assistant"]
+    content: str = None
+    name: Optional[str] = None
+
+
+class DeltaMessage(BaseModel):
+    role: Optional[Literal["user", "assistant", "system"]] = None
+    content: Optional[str] = None
+
+
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessageInput]
+    temperature: Optional[float] = 0.8
+    top_p: Optional[float] = 0.8
+    max_tokens: Optional[int] = None
+    stream: Optional[bool] = False
+    # Additional parameters
+    repetition_penalty: Optional[float] = 1.0
+
+
+class ChatCompletionResponseChoice(BaseModel):
+    index: int
+    message: ChatMessageResponse
+
+
+class ChatCompletionResponseStreamChoice(BaseModel):
+    index: int
+    delta: DeltaMessage
+
+
+class UsageInfo(BaseModel):
+    prompt_tokens: int = 0
+    total_tokens: int = 0
+    completion_tokens: Optional[int] = 0
+
+
+class ChatCompletionResponse(BaseModel):
+    model: str
+    object: Literal["chat.completion", "chat.completion.chunk"]
+    choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]]
+    created: Optional[int] = Field(default_factory=lambda: int(time.time()))
+    usage: Optional[UsageInfo] = None
+
+
+@app.get("/v1/models", response_model=ModelList)
+async def list_models():
+    """
+    An endpoint to list available models. It returns a list of model cards.
+    This is useful for clients to query and understand what models are available for use.
+    """
+    model_card = ModelCard(id="cogvlm-chat-17b")  # can be replaced by your model id like cogagent-chat-18b
+    return ModelList(data=[model_card])
+
+
+@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
+async def create_chat_completion(request: ChatCompletionRequest):
+    global model, tokenizer
+
+    if len(request.messages) < 1 or request.messages[-1].role == "assistant":
+        raise HTTPException(status_code=400, detail="Invalid request")
+
+    gen_params = dict(
+        messages=request.messages,
+        temperature=request.temperature,
+        top_p=request.top_p,
+        max_tokens=request.max_tokens or 1024,
+        echo=False,
+        stream=request.stream,
+    )
+
+    if request.stream:
+        generate = predict(request.model, gen_params)
+        return EventSourceResponse(generate, media_type="text/event-stream")
+    response = generate_cogvlm(model, tokenizer, gen_params)
+
+    usage = UsageInfo()
+
+    message = ChatMessageResponse(
+        role="assistant",
+        content=response["text"],
+    )
+    logger.debug(f"==== message ====\n{message}")
+    choice_data = ChatCompletionResponseChoice(
+        index=0,
+        message=message,
+    )
+    task_usage = UsageInfo.model_validate(response["usage"])
+    for usage_key, usage_value in task_usage.model_dump().items():
+        setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
+    return ChatCompletionResponse(model=request.model, choices=[choice_data], object="chat.completion", usage=usage)
+
+
+async def predict(model_id: str, params: dict):
+    """
+    Handle streaming predictions. It continuously generates responses for a given input stream.
+    This is particularly useful for real-time, continuous interactions with the model.
+    """
+
+    global model, tokenizer
+
+    choice_data = ChatCompletionResponseStreamChoice(
+        index=0,
+        delta=DeltaMessage(role="assistant"),
+        finish_reason=None
+    )
+    chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
+    yield "{}".format(chunk.model_dump_json(exclude_unset=True))
+
+    previous_text = ""
+    for new_response in generate_stream_cogvlm(model, tokenizer, params):
+        decoded_unicode = new_response["text"]
+        delta_text = decoded_unicode[len(previous_text):]
+        previous_text = decoded_unicode
+        delta = DeltaMessage(
+            content=delta_text,
+            role="assistant",
+        )
+        choice_data = ChatCompletionResponseStreamChoice(
+            index=0,
+            delta=delta,
+        )
+        chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
+        yield "{}".format(chunk.model_dump_json(exclude_unset=True))
+    choice_data = ChatCompletionResponseStreamChoice(
+        index=0,
+        delta=DeltaMessage(),
+    )
+    chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
+    yield "{}".format(chunk.model_dump_json(exclude_unset=True))
+
+
+def generate_cogvlm(model: PreTrainedModel, tokenizer: PreTrainedTokenizer, params: dict):
+    """
+    Generates a response using the CogVLM model. It processes the chat history and image data, if any,
+    and then invokes the model to generate a response.
+    """
+
+    for response in generate_stream_cogvlm(model, tokenizer, params):
+        pass
+    return response
+
+
+def process_history_and_images(messages: List[ChatMessageInput]) -> Tuple[
+    Optional[str], Optional[List[Tuple[str, str]]], Optional[List[Image.Image]]]:
+    """
+    Process history messages to extract text, identify the last user query,
+    and convert base64 encoded image URLs to PIL images.
+
+    Args:
+        messages(List[ChatMessageInput]): List of ChatMessageInput objects.
+    return: A tuple of three elements:
+             - The last user query as a string.
+             - Text history formatted as a list of tuples for the model.
+             - List of PIL Image objects extracted from the messages.
+    """
+    formatted_history = []
+    image_list = []
+    last_user_query = ''
+
+    for i, message in enumerate(messages):
+        role = message.role
+        content = message.content
+
+        if isinstance(content, list):  # text
+            text_content = ' '.join(item.text for item in content if isinstance(item, TextContent))
+        else:
+            text_content = content
+
+        if isinstance(content, list):  # image
+            for item in content:
+                if isinstance(item, ImageUrlContent):
+                    image_url = item.image_url.url
+                    if image_url.startswith("data:image/jpeg;base64,"):
+                        base64_encoded_image = image_url.split("data:image/jpeg;base64,")[1]
+                        image_data = base64.b64decode(base64_encoded_image)
+                        image = Image.open(BytesIO(image_data)).convert('RGB')
+                        image_list.append(image)
+                    elif image_url.startswith("data:image/png;base64,"):
+                        base64_encoded_image = image_url.split("data:image/png;base64,")[1]
+                        image_data = base64.b64decode(base64_encoded_image)
+                        image = Image.open(BytesIO(image_data)).convert('RGB')
+                        image_list.append(image)
+
+        if role == 'user':
+            if i == len(messages) - 1:  # 最后一条用户消息
+                last_user_query = text_content
+            else:
+                formatted_history.append((text_content, ''))
+        elif role == 'assistant':
+            if formatted_history:
+                if formatted_history[-1][1] != '':
+                    assert False, f"the last query is answered. answer again. {formatted_history[-1][0]}, {formatted_history[-1][1]}, {text_content}"
+                formatted_history[-1] = (formatted_history[-1][0], text_content)
+            else:
+                assert False, f"assistant reply before user"
+        else:
+            assert False, f"unrecognized role: {role}"
+
+    return last_user_query, formatted_history, image_list
+
+
+@torch.inference_mode()
+def generate_stream_cogvlm(model: PreTrainedModel, tokenizer: PreTrainedTokenizer, params: dict):
+    """
+    Generates a stream of responses using the CogVLM model in inference mode.
+    It's optimized to handle continuous input-output interactions with the model in a streaming manner.
+    """
+    messages = params["messages"]
+    temperature = float(params.get("temperature", 1.0))
+    repetition_penalty = float(params.get("repetition_penalty", 1.0))
+    top_p = float(params.get("top_p", 1.0))
+    max_new_tokens = int(params.get("max_tokens", 256))
+    query, history, image_list = process_history_and_images(messages)
+
+    logger.debug(f"==== request ====\n{query}")
+
+    input_by_model = model.build_conversation_input_ids(tokenizer, query=query, history=history,
+                                                        images=[image_list[-1]])
+    inputs = {
+        'input_ids': input_by_model['input_ids'].unsqueeze(0).to(DEVICE),
+        'token_type_ids': input_by_model['token_type_ids'].unsqueeze(0).to(DEVICE),
+        'attention_mask': input_by_model['attention_mask'].unsqueeze(0).to(DEVICE),
+        'images': [[input_by_model['images'][0].to(DEVICE).to(torch_type)]],
+    }
+    if 'cross_images' in input_by_model and input_by_model['cross_images']:
+        inputs['cross_images'] = [[input_by_model['cross_images'][0].to(DEVICE).to(torch_type)]]
+
+    input_echo_len = len(inputs["input_ids"][0])
+    streamer = TextIteratorStreamer(
+        tokenizer=tokenizer,
+        timeout=60.0,
+        skip_prompt=True,
+        skip_special_tokens=True
+)
+    gen_kwargs = {
+        "repetition_penalty": repetition_penalty,
+        "max_new_tokens": max_new_tokens,
+        "do_sample": True if temperature > 1e-5 else False,
+        "top_p": top_p if temperature > 1e-5 else 0,
+        'streamer': streamer,
+    }
+    if temperature > 1e-5:
+        gen_kwargs["temperature"] = temperature
+
+    total_len = 0
+    generated_text = ""
+    with torch.no_grad():
+        model.generate(**inputs, **gen_kwargs)
+        for next_text in streamer:
+            generated_text += next_text
+            yield {
+                "text": generated_text,
+                "usage": {
+                    "prompt_tokens": input_echo_len,
+                    "completion_tokens": total_len - input_echo_len,
+                    "total_tokens": total_len,
+                },
+            }
+    ret = {
+        "text": generated_text,
+        "usage": {
+            "prompt_tokens": input_echo_len,
+            "completion_tokens": total_len - input_echo_len,
+            "total_tokens": total_len,
+        },
+    }
+    yield ret
+
+
+gc.collect()
+torch.cuda.empty_cache()
+
+if __name__ == "__main__":
+    tokenizer = LlamaTokenizer.from_pretrained(
+        TOKENIZER_PATH,
+        trust_remote_code=True)
+
+    if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8:
+        torch_type = torch.bfloat16
+    else:
+        torch_type = torch.float16
+
+    print("========Use torch type as:{} with device:{}========\n\n".format(torch_type, DEVICE))
+
+    if 'cuda' in DEVICE:
+        if QUANT_ENABLED:
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_PATH,
+                load_in_4bit=True,
+                trust_remote_code=True,
+                torch_dtype=torch_type,
+                low_cpu_mem_usage=True
+            ).eval()
+        else:
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_PATH,
+                load_in_4bit=False,
+                trust_remote_code=True,
+                torch_dtype=torch_type,
+                low_cpu_mem_usage=True
+            ).to(DEVICE).eval()
+            
+    else:
+        model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, trust_remote_code=True).float().to(DEVICE).eval()
+    uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
--- a/mm_agents/llm_server/CogAgent/README.md
+++ b/mm_agents/llm_server/CogAgent/README.md
@@ -0,0 +1,7 @@
+## Deploy CogAgent as server
+
+```
+python CogAgent.py
+```
+
+The CogAgent LLM will be deployed on http://127.0.0.1:8000
--- a/mm_agents/prompts.py
+++ b/mm_agents/prompts.py
@@ -801,7 +801,7 @@ You CAN predict multiple actions at one step, but you should only return one act
 SYS_PROMPT_IN_SOM_OUT_TAG = """
 You are an agent which follow my instruction and perform desktop computer tasks as instructed.
 You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
-For each step, you will get an observation of the desktop by a screenshot with interact-able elements marked with numerical tags. And you will predict the action of the computer based on the image.
+For each step, you will get an observation of the desktop by 1) a screenshot with interact-able elements marked with numerical tags; and 2) accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the image and text information.

 You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot.
 You can replace x, y in the code with the tag of the element you want to operate with. such as:
--- a/requirements.txt
+++ b/requirements.txt
@@ -48,4 +48,5 @@ easyocr
 borb
 pypdf2
 pdfplumber
-
+wandb
+wrapt_timeout_decorator
--- a/run.py
+++ b/run.py
@@ -6,7 +6,9 @@ import datetime
 import json
 import logging
 import os
+import random
 import sys
+# import wandb

 from tqdm import tqdm

@@ -48,6 +50,11 @@ logger.addHandler(sdebug_handler)

 logger = logging.getLogger("desktopenv.experiment")

+# wandb config
+### set your wandb api key here
+# os.environ["WANDB_API_KEY"] = "48ec18fb4da7087238c6d6833eab9907565adbf3"
+# wandb.login(key=os.environ.get("WANDB_API_KEY", None))
+

 def config() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
@@ -69,7 +76,7 @@ def config() -> argparse.Namespace:
            "screenshot_a11y_tree",
            "som"
        ],
-        default="som",
+        default="a11y_tree",
        help="Observation type",
    )
    parser.add_argument("--screen_width", type=int, default=1920)
@@ -82,12 +89,16 @@ def config() -> argparse.Namespace:
    parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples")

    # lm config
-    parser.add_argument("--model", type=str, default="gpt-4-vision-preview")
+    parser.add_argument("--model", type=str, default="gpt-4-0125-preview")
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_p", type=float, default=0.9)
    parser.add_argument("--max_tokens", type=int, default=1500)
    parser.add_argument("--stop_token", type=str, default=None)

+    # example config
+    parser.add_argument("--domain", type=str, default="all")
+    parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_all.json")
+
    # logging related
    parser.add_argument("--result_dir", type=str, default="./results")
    args = parser.parse_args()
@@ -104,6 +115,25 @@ def test(

    # log args
    logger.info("Args: %s", args)
+    # set wandb project
+    cfg_args = \
+    {
+        "path_to_vm": args.path_to_vm,
+        "headless": args.headless,
+        "action_space": args.action_space,
+        "observation_type": args.observation_type,
+        "screen_width": args.screen_width,
+        "screen_height": args.screen_height,
+        "sleep_after_execution": args.sleep_after_execution,
+        "max_steps": args.max_steps,
+        "max_trajectory_length": args.max_trajectory_length,
+        "model": args.model,
+        "temperature": args.temperature,
+        "top_p": args.top_p,
+        "max_tokens": args.max_tokens,
+        "stop_token": args.stop_token,
+        "result_dir": args.result_dir
+    }

    agent = PromptAgent(
        model=args.model,
@@ -118,10 +148,13 @@ def test(
        action_space=agent.action_space,
        screen_size=(args.screen_width, args.screen_height),
        headless=args.headless,
+        require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
    )

    for domain in tqdm(test_all_meta, desc="Domain"):
        for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
+            # run = wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}",
+            #         name=f"{example_id}")
            # example setting
            config_file = os.path.join(args.test_config_base_dir, f"examples/{domain}/{example_id}.json")
            with open(config_file, "r", encoding="utf-8") as f:
@@ -133,6 +166,10 @@ def test(
            instruction = example["instruction"]

            logger.info(f"[Instruction]: {instruction}")
+            # wandb each example config settings
+            cfg_args["instruction"] = instruction
+            cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
+            # run.config.update(cfg_args)

            example_result_dir = os.path.join(
                args.result_dir,
@@ -148,13 +185,20 @@ def test(
                lib_run_single.run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir,
                                                  scores)
            except Exception as e:
+                logger.error(f"Exception in {domain}/{example_id}: {e}")
+                # wandb.log({"Exception": wandb.Table(data=[[f"Exception in {domain}/{example_id}: {e}"]], columns=["Error"])})
                env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
-                logger.error(f"Time limit exceeded in {domain}/{example_id}")
                with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                    f.write(json.dumps({
                        "Error": f"Time limit exceeded in {domain}/{example_id}"
                    }))
                    f.write("\n")
+            # wandb settings
+            # os.mkdir(os.path.join(wandb.run.dir, "results/"))
+            # for file in os.listdir(example_result_dir):
+            #     # move file to just under the root dir
+            #     os.rename(os.path.join(example_result_dir, file), os.path.join(wandb.run.dir, f"./results/{file}"))
+            # wandb.finish()

    env.close()
    logger.info(f"Average score: {sum(scores) / len(scores)}")
@@ -193,15 +237,13 @@ def get_unfinished(action_space, use_model, observation_type, result_dir, total_

 def get_result(action_space, use_model, observation_type, result_dir, total_file_json):
    target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+    if not os.path.exists(target_dir):
+        print("New experiment, no result yet.")
+        return None

    all_result = []

-    if not os.path.exists(target_dir):
-        return total_file_json
-
-    finished = {}
    for domain in os.listdir(target_dir):
-        finished[domain] = []
        domain_path = os.path.join(target_dir, domain)
        if os.path.isdir(domain_path):
            for example_id in os.listdir(domain_path):
@@ -209,10 +251,17 @@ def get_result(action_space, use_model, observation_type, result_dir, total_file
                if os.path.isdir(example_path):
                    if "result.txt" in os.listdir(example_path):
                        # empty all files under example_id
-                        all_result.append(float(open(os.path.join(example_path, "result.txt"), "r").read()))
+                        try:
+                            all_result.append(float(open(os.path.join(example_path, "result.txt"), "r").read()))
+                        except:
+                            all_result.append(0.0)

-    print("Success Rate:", sum(all_result) / len(all_result) * 100, "%")
-    return all_result
+    if not all_result:
+        print("New experiment, no result yet.")
+        return None
+    else:
+        print("Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
+        return all_result


 if __name__ == '__main__':
@@ -220,9 +269,12 @@ if __name__ == '__main__':
    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    args = config()

-    with open("evaluation_examples/test_all.json", "r", encoding="utf-8") as f:
+    with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
        test_all_meta = json.load(f)

+    if args.domain != "all":
+        test_all_meta = {args.domain: test_all_meta[args.domain]}
+
    test_file_list = get_unfinished(
        args.action_space,
        args.model,
@@ -241,5 +293,4 @@ if __name__ == '__main__':
        args.result_dir,
        test_all_meta
    )
-
-    # test(args, test_all_meta)
+    test(args, test_file_list)
--- a/settings.json
+++ b/settings.json
@@ -1,3 +1,3 @@
 {
-    "time_limit": "1200"
+    "time_limit": "1800"
 }
				`@@ -1 +0,0 @@`
				{"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-13T10:09:01Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}