From 909aa868f3b9560e424366b6265ea9e679914e29 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 27 Jan 2024 19:47:47 +0800
Subject: [PATCH 01/13] Improve on agent codes; add auto-running experiments
 code; Fix some examples

---
 .../5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json |  4 +-
 .../f3977615-2b45-4ac5-8bba-80c17dbe2a37.json |  2 +-
 experiment_a11y_tree.py                       | 96 ++++++++++++++++---
 experiment_screenshot.py                      | 70 ++++++++++----
 experiment_screenshot_a11y_tree.py            | 54 +++++++++--
 experiment_screenshot_seeact.py               | 54 +++++++++--
 experiment_screenshot_som.py                  | 54 +++++++++--
 mm_agents/gpt_4v_agent.py                     |  5 +
 8 files changed, 283 insertions(+), 56 deletions(-)

diff --git a/evaluation_examples/examples/os/5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json b/evaluation_examples/examples/os/5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json
index d51c886..f010182 100644
--- a/evaluation_examples/examples/os/5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json
+++ b/evaluation_examples/examples/os/5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57.json
@@ -11,7 +11,7 @@
         "files": [
           {
             "url": "https://drive.usercontent.google.com/download?id=1XaTnC_lLbR_tGTz8tcN2Tp6cNrMlNW3R&export=download&authuser=0&confirm=t&uuid=89e69a23-43cf-4316-833a-fb9d3e281460&at=APZUnTWn5zZTH4GlClO6lV1i4WwP:1706184669922",
-            "path": "poster_party_night.webp"
+            "path": "/home/user/Desktop/poster_party_night.webp"
           }
         ]
       }
@@ -19,7 +19,7 @@
     {
       "type": "execute",
       "parameters": {
-        "command": "mv ~/poster_party_night.webp ~/.local/share/Trash/files/",
+        "command": "gio trash /home/user/Desktop/poster_party_night.webp",
         "shell": true
       }
     },
diff --git a/evaluation_examples/examples/vlc/f3977615-2b45-4ac5-8bba-80c17dbe2a37.json b/evaluation_examples/examples/vlc/f3977615-2b45-4ac5-8bba-80c17dbe2a37.json
index 3be1e5d..8f49282 100644
--- a/evaluation_examples/examples/vlc/f3977615-2b45-4ac5-8bba-80c17dbe2a37.json
+++ b/evaluation_examples/examples/vlc/f3977615-2b45-4ac5-8bba-80c17dbe2a37.json
@@ -1,7 +1,7 @@
 {
   "id": "f3977615-2b45-4ac5-8bba-80c17dbe2a37",
   "snapshot": "chrome",
-  "instruction": "I want to watch two or more videos in same time on VLC. I tried to run multiple instances of VLC. It worked but can't play videos on those new instances. When I play video it plays on first instance instead of new instance.\nIs there any way to solve this problem? Take the three videos on my desktop for example, do that for me.",
+  "instruction": "I want to watch two or more videos in same time on VLC. I tried to run multiple instances of VLC. It worked but can't play videos on those new instances. When I play video it plays on first instance instead of new instance.\nIs there any way to solve this problem?",
   "source": "https://www.reddit.com/r/Fedora/comments/rhljzd/how_to_run_multiple_instances_of_vlc_media_player/",
   "config": [
     {
diff --git a/experiment_a11y_tree.py b/experiment_a11y_tree.py
index 728d0de..86e6a72 100644
--- a/experiment_a11y_tree.py
+++ b/experiment_a11y_tree.py
@@ -3,7 +3,8 @@ import json
 import logging
 import os
 import sys
-
+import threading
+import time
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
 
@@ -61,8 +62,6 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
         env.controller.start_recording()
 
     while not done and step_num < max_steps:
-        with open("accessibility_tree.xml", "w", encoding="utf-8") as f:
-            f.write(observation["accessibility_tree"])
         actions = agent.predict(observation)
         step_num += 1
         for action in actions:
@@ -98,34 +97,63 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
                 logger.info("The episode is done.")
                 break
 
-    if recording:
-        # send a request to the server to stop recording
-        env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+    def stop_recording():
+        try:
+            env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+        except Exception as e:
+            print(f"An error occurred while stopping the recording: {e}")
+
+    # Run the `record` function in a separate thread
+    recording_thread = threading.Thread(target=stop_recording())
+    recording_thread.start()
+
+    # Start a timer for your timeout length (in this case, 60 seconds)
+    timeout = 60  # seconds
+    start_time = time.time()
+
+    # The main thread will wait for the set timeout period or until the recording is done
+    while recording_thread.is_alive():
+        elapsed_time = time.time() - start_time
+        if elapsed_time >= timeout:
+            print("Timeout reached. Stopping recording.")
+            break
+        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
+
+    # kill the recording thread if it is still alive
+    if recording_thread.is_alive():
+        recording_thread.kill()
+
+    # Wait for the recording thread to finish before exiting
+    recording_thread.join()
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
 
+    with open(trajectory_recording_path, "a") as f:
+        f.write(json.dumps({
+            "result": result
+        }))
+        f.write("\n")
+
     # env.close()
     logger.info("Environment closed.")
 
 
-if __name__ == "__main__":
+def main(example_class, example_id):
     action_space = "pyautogui"
-    example_class = "chrome"
-    example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
-    gpt4_model = "gpt-4-vision-preview"
+    gpt4_model = "gpt-4-0125-preview"
     gemini_model = "gemini-pro-vision"
 
     logger.info("Running example %s/%s", example_class, example_id)
     logger.info("Using model %s", gpt4_model)
     # logger.info("Using model %s", gemini_model)
 
-    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
+    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_setup4"
+    example["snapshot"] = "exp_chrome"
 
     api_key = os.environ.get("OPENAI_API_KEY")
-    agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
+    agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], max_tokens=1000,
                         action_space=action_space, exp="a11y_tree")
 
     # api_key = os.environ.get("GENAI_API_KEY")
@@ -139,3 +167,45 @@ if __name__ == "__main__":
     os.makedirs(example_trajectory_dir, exist_ok=True)
 
     run_one_example(example, agent, 15, example_trajectory_dir)
+
+
+if __name__ == '__main__':
+    vlc_list = [
+        # "8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
+        # "8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
+        # "8f080098-ddb1-424c-b438-4e96e5e4786e",
+        # "bba3381f-b5eb-4439-bd9e-80c22218d5a7",
+        # "fba2c100-79e8-42df-ae74-b592418d54f4",
+        # "efcf0d81-0835-4880-b2fd-d866e8bc2294",
+        # "8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
+        # "aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
+        # "386dbd0e-0241-4a0a-b6a2-6704fba26b1c",
+        # "9195653c-f4aa-453d-aa95-787f6ccfaae9",
+        # "d06f0d4d-2cd5-4ede-8de9-598629438c6e",
+        # "a5bbbcd5-b398-4c91-83d4-55e1e31bbb81",
+        "f3977615-2b45-4ac5-8bba-80c17dbe2a37",
+        "215dfd39-f493-4bc3-a027-8a97d72c61bf"
+    ]
+    for example_id in vlc_list:
+        recording_thread = threading.Thread(target=main, args=("vlc", example_id))
+        recording_thread.start()
+
+        # Start a timer for your timeout length (in this case, 60 seconds)
+        timeout = 600  # seconds
+        start_time = time.time()
+
+        # The main thread will wait for the set timeout period or until the recording is done
+        while recording_thread.is_alive():
+            elapsed_time = time.time() - start_time
+            if elapsed_time >= timeout:
+                print("Timeout reached. Kill this example.")
+                break
+            time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
+
+        # kill the recording thread if it is still alive
+        if recording_thread.is_alive():
+            recording_thread.kill()
+
+        # Wait for the recording thread to finish before exiting
+        recording_thread.join()
+
diff --git a/experiment_screenshot.py b/experiment_screenshot.py
index 6d82730..943a8ec 100644
--- a/experiment_screenshot.py
+++ b/experiment_screenshot.py
@@ -3,10 +3,12 @@ import json
 import logging
 import os
 import sys
+import threading
+import time
 
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
-from mm_agents.gemini_pro_agent import GeminiPro_Agent
+# from mm_agents.gemini_pro_agent import GeminiPro_Agent
 
 #  Logger Configs {{{ # 
 logger = logging.getLogger()
@@ -98,21 +100,50 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
                 logger.info("The episode is done.")
                 break
 
-    if recording:
-        # send a request to the server to stop recording
-        env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+    def stop_recording():
+        try:
+            env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+        except Exception as e:
+            print(f"An error occurred while stopping the recording: {e}")
+
+    # Run the `record` function in a separate thread
+    recording_thread = threading.Thread(target=stop_recording())
+    recording_thread.start()
+
+    # Start a timer for your timeout length (in this case, 60 seconds)
+    timeout = 60  # seconds
+    start_time = time.time()
+
+    # The main thread will wait for the set timeout period or until the recording is done
+    while recording_thread.is_alive():
+        elapsed_time = time.time() - start_time
+        if elapsed_time >= timeout:
+            print("Timeout reached. Stopping recording.")
+            break
+        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
+
+    # kill the recording thread if it is still alive
+    if recording_thread.is_alive():
+        recording_thread.kill()
+
+    # Wait for the recording thread to finish before exiting
+    recording_thread.join()
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
 
+    with open(trajectory_recording_path, "a") as f:
+        f.write(json.dumps({
+            "result": result
+        }))
+        f.write("\n")
+
     # env.close()
     logger.info("Environment closed.")
 
 
-if __name__ == "__main__":
+def main(example_class, example_id):
     action_space = "pyautogui"
-    example_class = "thunderbird"
-    example_id = "bb5e4c0d-f964-439c-97b6-bdb9747de3f4"
     gpt4_model = "gpt-4-vision-preview"
     gemini_model = "gemini-pro-vision"
 
@@ -120,21 +151,28 @@ if __name__ == "__main__":
     logger.info("Using model %s", gpt4_model)
     # logger.info("Using model %s", gemini_model)
 
-    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
+    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_setup2"
+    example["snapshot"] = "exp_chrome"
 
-    # api_key = os.environ.get("OPENAI_API_KEY")
-    # agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
-
-    api_key = os.environ.get("GENAI_API_KEY")
-    agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
+    api_key = os.environ.get("OPENAI_API_KEY")
+    agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
+    #
+    # api_key = os.environ.get("GENAI_API_KEY")
+    # agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
 
     root_trajectory_dir = "exp_trajectory"
 
-    example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gpt4_model, example_id)
-    # example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gemini_model, example_id)
+    example_trajectory_dir = os.path.join(root_trajectory_dir, "screenshot", example_class, gpt4_model, example_id)
+    # example_trajectory_dir = os.path.join(root_trajectory_dir, "screenshot", example_class, gemini_model, example_id)
 
     os.makedirs(example_trajectory_dir, exist_ok=True)
 
     run_one_example(example, agent, 15, example_trajectory_dir)
+
+
+if __name__ == '__main__':
+    xx_list = [
+    ]
+    for example_id in xx_list:
+        main("xx", example_id)
diff --git a/experiment_screenshot_a11y_tree.py b/experiment_screenshot_a11y_tree.py
index 60c81b6..d32bc7e 100644
--- a/experiment_screenshot_a11y_tree.py
+++ b/experiment_screenshot_a11y_tree.py
@@ -3,6 +3,8 @@ import json
 import logging
 import os
 import sys
+import threading
+import time
 
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
@@ -96,21 +98,50 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
                 logger.info("The episode is done.")
                 break
 
-    if recording:
-        # send a request to the server to stop recording
-        env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+    def stop_recording():
+        try:
+            env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+        except Exception as e:
+            print(f"An error occurred while stopping the recording: {e}")
+
+    # Run the `record` function in a separate thread
+    recording_thread = threading.Thread(target=stop_recording())
+    recording_thread.start()
+
+    # Start a timer for your timeout length (in this case, 60 seconds)
+    timeout = 60  # seconds
+    start_time = time.time()
+
+    # The main thread will wait for the set timeout period or until the recording is done
+    while recording_thread.is_alive():
+        elapsed_time = time.time() - start_time
+        if elapsed_time >= timeout:
+            print("Timeout reached. Stopping recording.")
+            break
+        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
+
+    # kill the recording thread if it is still alive
+    if recording_thread.is_alive():
+        recording_thread.kill()
+
+    # Wait for the recording thread to finish before exiting
+    recording_thread.join()
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
 
+    with open(trajectory_recording_path, "a") as f:
+        f.write(json.dumps({
+            "result": result
+        }))
+        f.write("\n")
+
     # env.close()
     logger.info("Environment closed.")
 
 
-if __name__ == "__main__":
+def main(example_class, example_id):
     action_space = "pyautogui"
-    example_class = "chrome"
-    example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
     gpt4_model = "gpt-4-vision-preview"
     gemini_model = "gemini-pro-vision"
 
@@ -118,9 +149,9 @@ if __name__ == "__main__":
     logger.info("Using model %s", gpt4_model)
     # logger.info("Using model %s", gemini_model)
 
-    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
+    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_setup4"
+    example["snapshot"] = "exp_chrome"
 
     api_key = os.environ.get("OPENAI_API_KEY")
     agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
@@ -137,3 +168,10 @@ if __name__ == "__main__":
     os.makedirs(example_trajectory_dir, exist_ok=True)
 
     run_one_example(example, agent, 15, example_trajectory_dir)
+
+
+if __name__ == '__main__':
+    xx_list = [
+    ]
+    for example_id in xx_list:
+        main("xx", example_id)
diff --git a/experiment_screenshot_seeact.py b/experiment_screenshot_seeact.py
index b718693..3f72375 100644
--- a/experiment_screenshot_seeact.py
+++ b/experiment_screenshot_seeact.py
@@ -3,6 +3,8 @@ import json
 import logging
 import os
 import sys
+import threading
+import time
 
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
@@ -96,27 +98,56 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
                 logger.info("The episode is done.")
                 break
 
-    if recording:
-        # send a request to the server to stop recording
-        env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+    def stop_recording():
+        try:
+            env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+        except Exception as e:
+            print(f"An error occurred while stopping the recording: {e}")
+
+    # Run the `record` function in a separate thread
+    recording_thread = threading.Thread(target=stop_recording())
+    recording_thread.start()
+
+    # Start a timer for your timeout length (in this case, 60 seconds)
+    timeout = 60  # seconds
+    start_time = time.time()
+
+    # The main thread will wait for the set timeout period or until the recording is done
+    while recording_thread.is_alive():
+        elapsed_time = time.time() - start_time
+        if elapsed_time >= timeout:
+            print("Timeout reached. Stopping recording.")
+            break
+        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
+
+    # kill the recording thread if it is still alive
+    if recording_thread.is_alive():
+        recording_thread.kill()
+
+    # Wait for the recording thread to finish before exiting
+    recording_thread.join()
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
 
+    with open(trajectory_recording_path, "a") as f:
+        f.write(json.dumps({
+            "result": result
+        }))
+        f.write("\n")
+
     # env.close()
     logger.info("Environment closed.")
 
 
-if __name__ == "__main__":
+def main(example_class, example_id):
     action_space = "pyautogui"
-    example_class = "chrome"
-    example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
     gpt4_model = "gpt-4-vision-preview"
     gemini_model = "gemini-pro-vision"
 
-    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
+    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_setup4"
+    example["snapshot"] = "exp_chrome"
 
     api_key = os.environ.get("OPENAI_API_KEY")
     agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
@@ -133,3 +164,10 @@ if __name__ == "__main__":
     os.makedirs(example_trajectory_dir, exist_ok=True)
 
     run_one_example(example, agent, 15, example_trajectory_dir)
+
+
+if __name__ == '__main__':
+    xx_list = [
+    ]
+    for example_id in xx_list:
+        main("xx", example_id)
diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py
index 2a64bb3..abd77d9 100644
--- a/experiment_screenshot_som.py
+++ b/experiment_screenshot_som.py
@@ -3,6 +3,8 @@ import json
 import logging
 import os
 import sys
+import threading
+import time
 
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
@@ -96,27 +98,56 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
                 logger.info("The episode is done.")
                 break
 
-    if recording:
-        # send a request to the server to stop recording
-        env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+    def stop_recording():
+        try:
+            env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+        except Exception as e:
+            print(f"An error occurred while stopping the recording: {e}")
+
+    # Run the `record` function in a separate thread
+    recording_thread = threading.Thread(target=stop_recording())
+    recording_thread.start()
+
+    # Start a timer for your timeout length (in this case, 60 seconds)
+    timeout = 60  # seconds
+    start_time = time.time()
+
+    # The main thread will wait for the set timeout period or until the recording is done
+    while recording_thread.is_alive():
+        elapsed_time = time.time() - start_time
+        if elapsed_time >= timeout:
+            print("Timeout reached. Stopping recording.")
+            break
+        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
+
+    # kill the recording thread if it is still alive
+    if recording_thread.is_alive():
+        recording_thread.kill()
+
+    # Wait for the recording thread to finish before exiting
+    recording_thread.join()
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
 
+    with open(trajectory_recording_path, "a") as f:
+        f.write(json.dumps({
+            "result": result
+        }))
+        f.write("\n")
+
     # env.close()
     logger.info("Environment closed.")
 
 
-if __name__ == "__main__":
+def main(example_class, example_id):
     action_space = "pyautogui"
-    example_class = "chrome"
-    example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
     gpt4_model = "gpt-4-vision-preview"
     gemini_model = "gemini-pro-vision"
 
-    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
+    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_setup4"
+    example["snapshot"] = "exp_chrome"
 
     api_key = os.environ.get("OPENAI_API_KEY")
     agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
@@ -133,3 +164,10 @@ if __name__ == "__main__":
     os.makedirs(example_trajectory_dir, exist_ok=True)
 
     run_one_example(example, agent, 15, example_trajectory_dir)
+
+
+if __name__ == '__main__':
+    xx_list = [
+    ]
+    for example_id in xx_list:
+        main("xx", example_id)
diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py
index cc08b79..4fdb946 100644
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -63,6 +63,8 @@ def tag_screenshot(screenshot, accessibility_tree):
 
 
 def parse_actions_from_string(input_string):
+    if input_string.strip() in ['WAIT', 'DONE', 'FAIL']:
+        return [input_string.strip()]
     # Search for a JSON string within the input string
     actions = []
     matches = re.findall(r'```json\s+(.*?)\s+```', input_string, re.DOTALL)
@@ -95,6 +97,9 @@ def parse_actions_from_string(input_string):
 
 
 def parse_code_from_string(input_string):
+    if input_string.strip() in ['WAIT', 'DONE', 'FAIL']:
+        return [input_string.strip()]
+
     # This regular expression will match both ```code``` and ```python code```
     # and capture the `code` part. It uses a non-greedy match for the content inside.
     pattern = r"```(?:\w+\s+)?(.*?)```"

From ce0eafaa0df37d30b84942dabf30c96cc09d2f58 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 27 Jan 2024 20:37:02 +0800
Subject: [PATCH 02/13] Fix some errors found in writer examples

---
 .../66399b0d-8fda-4618-95c4-bfc6191617e9.json                   | 2 +-
 .../adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json                   | 2 +-
 .../b21acd93-60fd-4127-8a43-2f5178f4a830.json                   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/evaluation_examples/examples/libreoffice_writer/66399b0d-8fda-4618-95c4-bfc6191617e9.json b/evaluation_examples/examples/libreoffice_writer/66399b0d-8fda-4618-95c4-bfc6191617e9.json
index 47dde68..656e70c 100644
--- a/evaluation_examples/examples/libreoffice_writer/66399b0d-8fda-4618-95c4-bfc6191617e9.json
+++ b/evaluation_examples/examples/libreoffice_writer/66399b0d-8fda-4618-95c4-bfc6191617e9.json
@@ -27,7 +27,7 @@
         "command": [
           "python",
           "-c",
-          "import pyautogui; import time; time.sleep(1); pyautogui.press(\"down\", presses=38, interval=0.1); time.sleep(1); pyautogui.scroll(-2)"
+          "import pyautogui; import time; time.sleep(1); pyautogui.press(\"down\", presses=40, interval=0.1); time.sleep(1); pyautogui.scroll(-2)"
         ]
       }
     }
diff --git a/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json b/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json
index ba3c432..790b8e5 100644
--- a/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json
+++ b/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json
@@ -9,7 +9,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive.usercontent.google.com/download?id=1ShGL4gWSV7nzamAb0V2KqjoCOhyodcKU&export=download&authuser=0&confirm=t&uuid=5f67edb8-cbbf-4a83-b46e-f193ad55e1e8&at=APZUnTVRJenYCM--vETagQ5ACTT5:1704979226579",
+            "url": "https://drive.usercontent.google.com/download?id=1ShGL4gWSV7nzamAb0V2KqjoCOhyodcKU&export=download&authuser=0&confirm=t&uuid=17f2a63f-df71-4ea7-85a0-4b364afa336c&at=APZUnTXij39N124BO91KxN6yFR7Y:1706357955122",
             "path": "Desktop/Essay_Writing_English_for_uni.docx"
           }
         ]
diff --git a/evaluation_examples/examples/libreoffice_writer/b21acd93-60fd-4127-8a43-2f5178f4a830.json b/evaluation_examples/examples/libreoffice_writer/b21acd93-60fd-4127-8a43-2f5178f4a830.json
index 50a3cfd..20afec2 100644
--- a/evaluation_examples/examples/libreoffice_writer/b21acd93-60fd-4127-8a43-2f5178f4a830.json
+++ b/evaluation_examples/examples/libreoffice_writer/b21acd93-60fd-4127-8a43-2f5178f4a830.json
@@ -1,7 +1,7 @@
 {
   "id": "b21acd93-60fd-4127-8a43-2f5178f4a830",
   "snapshot": "libreoffice_writer",
-  "instruction": "I have been praciticing professional writing lately. Now I am writing essay which requires one paragraph each for introduction, body and conclusion with single-space for introduction, double-space for body then one-and-a-half-space for conclusion. The font size of this essay is 12. Could you help me on this?",
+  "instruction": "I have been practicing professional writing lately. Now I am writing essay which requires one paragraph each for introduction, body and conclusion with single-space for introduction, double-space for body then one-and-a-half-space for conclusion. The font size of this essay is 12. Could you help me on this?",
   "source": "https://superuser.com/questions/1097199/how-can-i-double-space-a-document-in-libreoffice?rq=1",
   "config": [
     {

From 297a080284a2cb13f0bbbac125218ba0379137cd Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Sat, 27 Jan 2024 21:29:20 +0800
Subject: [PATCH 03/13] ver Jan27thv2

updated get_accessibility_tree and added a series of try-catch
---
 desktop_env/server/main.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index 032b290..eef0afc 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -275,10 +275,22 @@ def _create_atspi_node(node: Accessible, depth: int, flag: Optional[str] = None)
     except NotImplementedError:
         pass
     else:
-        attribute_dict["{{{:}}}value".format(_accessibility_ns_map["val"])] = str(value.currentValue)
-        attribute_dict["{{{:}}}min".format(_accessibility_ns_map["val"])] = str(value.minimumValue)
-        attribute_dict["{{{:}}}max".format(_accessibility_ns_map["val"])] = str(value.maximumValue)
-        attribute_dict["{{{:}}}step".format(_accessibility_ns_map["val"])] = str(value.minimumIncrement)
+        try:
+            attribute_dict["{{{:}}}value".format(_accessibility_ns_map["val"])] = str(value.currentValue)
+        except:
+            pass
+        try:
+            attribute_dict["{{{:}}}min".format(_accessibility_ns_map["val"])] = str(value.minimumValue)
+        except:
+            pass
+        try:
+            attribute_dict["{{{:}}}max".format(_accessibility_ns_map["val"])] = str(value.maximumValue)
+        except:
+            pass
+        try:
+            attribute_dict["{{{:}}}step".format(_accessibility_ns_map["val"])] = str(value.minimumIncrement)
+        except:
+            pass
     #  }}} Value # 
 
     #  Action {{{ # 

From a9cb9dcf79b3112b55a082de5e38c2f1c55b39f1 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 27 Jan 2024 21:59:18 +0800
Subject: [PATCH 04/13] Fix some errors found in thunderbird examples

---
 .../thunderbird/94760984-3ff5-41ee-8347-cf1af709fea0.json       | 1 +
 .../thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/evaluation_examples/examples/thunderbird/94760984-3ff5-41ee-8347-cf1af709fea0.json b/evaluation_examples/examples/thunderbird/94760984-3ff5-41ee-8347-cf1af709fea0.json
index 2d48f96..3a0ab4d 100644
--- a/evaluation_examples/examples/thunderbird/94760984-3ff5-41ee-8347-cf1af709fea0.json
+++ b/evaluation_examples/examples/thunderbird/94760984-3ff5-41ee-8347-cf1af709fea0.json
@@ -72,6 +72,7 @@
           "extensions.activeThemeID": {
             "method": "re",
             "ref": "dark"
+          }
         }
       }
     }
diff --git a/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json b/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json
index 923f7be..b2cca67 100644
--- a/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json
+++ b/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json
@@ -5,7 +5,7 @@
   "source": "https://superuser.com/questions/1764409/how-to-send-email-with-thunderbird-without-configuring-an-incoming-email-service",
   "config": [
     {
-      "type": download",
+      "type": "download",
       "parameters": {
         "files": [
           {

From 394d6353fd929111da623fb3c477adf97205689a Mon Sep 17 00:00:00 2001
From: tsuky_chen <3107760494@qq.com>
Date: Sun, 28 Jan 2024 12:18:25 +0800
Subject: [PATCH 05/13] update libreoffice impress eval examples

---
 desktop_env/evaluators/metrics/slides.py      |  84 ++++++++---
 .../0f84bef9-9790-432e-92b7-eece357603fb.json |  22 ++-
 .../3b27600c-3668-4abd-8f84-7bcdebbccbdb.json |  58 +++----
 .../455d3c66-7dc6-4537-a39a-36d3e9119df7.json |  31 ++++
 .../550ce7e7-747b-495f-b122-acdc4d0b8e54.json |  87 +++++++----
 .../5d901039-a89c-4bfb-967b-bf66f4df075e.json |  58 +++----
 .../9ec204e4-f0a3-42f8-8458-b772a6797cab.json |  60 ++++----
 .../af23762e-2bfd-4a1d-aada-20fa8de9ce07.json |  58 +++----
 .../bf4e9888-f10f-47af-8dba-76413038b73c.json | 142 ++++++++++++++++++
 .../c59742c0-4323-4b9d-8a02-723c251deaa0.json |  58 +++----
 .../ce88f674-ab7a-43da-9201-468d38539e4a.json |  58 +++----
 11 files changed, 484 insertions(+), 232 deletions(-)
 create mode 100644 evaluation_examples/examples/libreoffice_impress/bf4e9888-f10f-47af-8dba-76413038b73c.json

diff --git a/desktop_env/evaluators/metrics/slides.py b/desktop_env/evaluators/metrics/slides.py
index c2be70e..616e7bc 100644
--- a/desktop_env/evaluators/metrics/slides.py
+++ b/desktop_env/evaluators/metrics/slides.py
@@ -96,6 +96,38 @@ def check_slide_numbers_color(pptx_file_path):
                     print(font_color)
                     return 1 if font_color is not None and is_red_color(font_color) else 0
 
+# import numpy as np
+# from PIL import Image
+# from skimage.metrics import structural_similarity as ssim
+
+# def compare_images(image1_path, image2_path):
+#     # You would call this function with the paths to the two images you want to compare:
+#     # score = compare_images('path_to_image1', 'path_to_image2')
+#     # print("Similarity score:", score)
+
+#     if not image1_path or not image2_path:
+#         return 0
+
+#     # Open the images and convert to grayscale
+#     image1 = Image.open(image1_path).convert('L')
+#     image2 = Image.open(image2_path).convert('L')
+
+#     # Resize images to the smaller one's size for comparison
+#     image1_size = image1.size
+#     image2_size = image2.size
+#     new_size = min(image1_size, image2_size)
+
+#     image1 = image1.resize(new_size, Image.Resampling.LANCZOS)
+#     image2 = image2.resize(new_size, Image.Resampling.LANCZOS)
+
+#     # Convert images to numpy arrays
+#     image1_array = np.array(image1)
+#     image2_array = np.array(image2)
+
+#     # Calculate SSIM between two images
+#     similarity_index = ssim(image1_array, image2_array)
+
+#     return similarity_index
 
 def compare_pptx_files(file1_path, file2_path, **options):
     # todo: not strictly match since not all information is compared because we cannot get the info through pptx
@@ -114,49 +146,50 @@ def compare_pptx_files(file1_path, file2_path, **options):
 
     # compare the number of slides
     if len(prs1.slides) != len(prs2.slides) and examine_number_of_slides:
-        return False
+        return 0
 
     # compare the content of each slide
     for slide1, slide2 in zip(prs1.slides, prs2.slides):
+
         # check if the shapes are the same
         for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
             if (
                     shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
-                return False
-
+                return 0
+            
             if hasattr(shape1, "text") and hasattr(shape2, "text"):
                 if shape1.text != shape2.text and examine_text:
-                    return False
+                    return 0
 
                 # check if the paragraphs are the same
                 for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
                     # check if the runs are the same
                     for run1, run2 in zip(para1.runs, para2.runs):
                         if run1.text != run2.text and examine_text:
-                            return False
+                            return 0
 
                         # check if the font properties are the same
                         if run1.font.name != run2.font.name and examine_font_name:
-                            return False
+                            return 0
 
                         if run1.font.size != run2.font.size and examine_font_size:
-                            return False
+                            return 0
 
                         if run1.font.bold != run2.font.bold and examine_font_bold:
-                            return False
+                            return 0
 
                         if run1.font.italic != run2.font.italic and examine_font_italic:
-                            return False
+                            return 0
 
                         if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb:
-                            return False
+                            return 0
 
                         if run1.font.underline != run2.font.underline and examine_font_underline:
-                            return False
+                            return 0
 
                         # fixme: Actually there are more properties to be compared, but we cannot get them through pptx
 
-    return True
+    return 1
 
 
 def check_strikethrough(pptx_path, rules):
@@ -220,15 +253,20 @@ def check_left_panel(accessibility_tree):
 
     root = ET.fromstring(accessibility_tree)
 
-    for root_pane in root.iter('root-pane'):
-        for panel in root_pane.iter('panel'):
-            for split_pane in panel.iter('split-pane'):
-                # Get the left panel
-                if split_pane.attrib.get("{{{}}}parentcoord".format(namespaces['cp'])) == "(0, 0)":
-                    # Get the visible attribute
-                    visible = split_pane.attrib.get("{{{}}}visible".format(namespaces['st']))
-                    if visible:
-                        # decide if it is left panel
-                        return 1.
+    for root_pane in root.iter('root-pane'):        
+        for split_pane in root_pane.iter('split-pane'):
+            for panel in split_pane.iter('panel'):
+                for scroll_panel in panel.iter('scroll-pane'):
+                    for document_frame in scroll_panel.iter('document-frame'):
+                        # Get the left panel
+                        panel_name = document_frame.get("name")
+                        # visible = scroll_bar.attrib.get(f"{{{namespaces['st']}}}visible")
+                        if panel_name == "Slides View":
+                            # Left panel is open
+                            return 1.0
 
-    return 0.
+    # Left panel is not open
+    return 0.0
+
+
+# print(compare_pptx_files("D:\\NJU\\HKUNLP\\Desktop-Env\\cache\\bf4e9888-f10f-47af-8dba-76413038b73c\\4.3-Template_4.29.2016.pptx", "D:\\NJU\HKUNLP\\Desktop-Env\\cache\\bf4e9888-f10f-47af-8dba-76413038b73c\\4.3-Template_4.29.2016_Gold.pptx"))
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_impress/0f84bef9-9790-432e-92b7-eece357603fb.json b/evaluation_examples/examples/libreoffice_impress/0f84bef9-9790-432e-92b7-eece357603fb.json
index 4d42b6a..0a91582 100644
--- a/evaluation_examples/examples/libreoffice_impress/0f84bef9-9790-432e-92b7-eece357603fb.json
+++ b/evaluation_examples/examples/libreoffice_impress/0f84bef9-9790-432e-92b7-eece357603fb.json
@@ -3,7 +3,25 @@
   "snapshot": "libreoffice_impress",
   "instruction": "On it Whenever I launch a LibreOffice Impress, it uses both screens, one for current slide and next slide and another for actual presentation. What I want is to use only one monitor which shows presentation. I dont want the screen with Current slide and Next slide so that it can be used for other purposes. How should I achieve this?",
   "source": "https://stackoverflow.com/questions/29036788/how-to-disable-libreoffice-impress-to-use-multiple-display",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1qKOdf1Wx9nGtk_3l7hjZ9gXWFzWgsyoH&export=download&authuser=0&confirm=t&uuid=0bceb604-af00-4940-a137-8dd00512d060&at=APZUnTUlTutATfe49vsbBrobLPAG:1706370599333",
+            "path": "Desktop/multimedia_classroom_podium-2020.pptx"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "Desktop/multimedia_classroom_podium-2020.pptx"
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
     "libreoffice_impress"
@@ -16,4 +34,4 @@
       "dest": "registrymodifications.xcu"
     }
   }
-}
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json b/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json
index 7a6d00c..3b33674 100644
--- a/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json
+++ b/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json
@@ -28,35 +28,35 @@
   ],
   "evaluator": {
     "postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "lec17-gui-events.pptx - LibreOffice Impress",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			}
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "lec17-gui-events.pptx - LibreOffice Impress",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5);"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      }
     ],
     "func": "evaluate_presentation_fill_to_rgb_distance",
     "expected": {
diff --git a/evaluation_examples/examples/libreoffice_impress/455d3c66-7dc6-4537-a39a-36d3e9119df7.json b/evaluation_examples/examples/libreoffice_impress/455d3c66-7dc6-4537-a39a-36d3e9119df7.json
index db194b0..8521484 100644
--- a/evaluation_examples/examples/libreoffice_impress/455d3c66-7dc6-4537-a39a-36d3e9119df7.json
+++ b/evaluation_examples/examples/libreoffice_impress/455d3c66-7dc6-4537-a39a-36d3e9119df7.json
@@ -27,6 +27,37 @@
     "libreoffice_impress"
   ],
   "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "wssf-project-plan-on-a-page.pptx - LibreOffice Impress",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5);"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      }
+    ],
     "func": "compare_images",
     "expected": {
       "type": "cloud_file",
diff --git a/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json b/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
index 8ab1196..8eed170 100644
--- a/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
+++ b/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
@@ -20,6 +20,22 @@
       "parameters": {
         "path": "Desktop/New_Club_Spring_2018_Training.pptx"
       }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 0.5
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+          "python",
+          "-c",
+          "import pyautogui; import time;  time.sleep(4); pyautogui.doubleClick(x=200, y=600); time.sleep(0.5);pyautogui.mouseDown(); pyautogui.mouseUp(); time.sleep(0.5);"
+        ]
+      }
     }
   ],
   "trajectory": "trajectories/",
@@ -28,43 +44,50 @@
   ],
   "evaluator": {
     "postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "New_Club_Spring_2018_Training.pptx - LibreOffice Impress",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			}
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "New_Club_Spring_2018_Training.pptx - LibreOffice Impress",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5)"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      }
     ],
     "func": "check_strikethrough",
     "expected": {
       "type": "rule",
       "rules": {
-        "slide_index_s": [4],
-        "shape_index_s": [1],
-        "paragraph_index_s": [1, 2]
+        "slide_index_s": [
+          4
+        ],
+        "shape_index_s": [
+          1
+        ],
+        "paragraph_index_s": [
+          1,
+          2
+        ]
       }
     },
     "result": {
diff --git a/evaluation_examples/examples/libreoffice_impress/5d901039-a89c-4bfb-967b-bf66f4df075e.json b/evaluation_examples/examples/libreoffice_impress/5d901039-a89c-4bfb-967b-bf66f4df075e.json
index 2654d8e..9482dcb 100644
--- a/evaluation_examples/examples/libreoffice_impress/5d901039-a89c-4bfb-967b-bf66f4df075e.json
+++ b/evaluation_examples/examples/libreoffice_impress/5d901039-a89c-4bfb-967b-bf66f4df075e.json
@@ -28,35 +28,35 @@
   ],
   "evaluator": {
     "postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "CPD_Background_Investigation_Process.pptx - LibreOffice Impress",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			}
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "CPD_Background_Investigation_Process.pptx - LibreOffice Impress",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5);"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      }
     ],
     "func": "check_image_stretch_and_center",
     "expected": {
diff --git a/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json b/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json
index fc9a9cb..ecc6722 100644
--- a/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json
+++ b/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json
@@ -28,35 +28,35 @@
   ],
   "evaluator": {
     "postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "MLA_Workshop_061X_Works_Cited.pptx - LibreOffice Impress",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			}
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "MLA_Workshop_061X_Works_Cited.pptx - LibreOffice Impress",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5);"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      }
     ],
     "func": "compare_pptx_files",
     "expected": {
@@ -66,7 +66,7 @@
     },
     "result": {
       "type": "vm_file",
-      "path": "/home/user/Desktop/MLA_Workshop_061X_Works_Cited.pptx",
+      "path": "Desktop/MLA_Workshop_061X_Works_Cited.pptx",
       "dest": "MLA_Workshop_061X_Works_Cited.pptx"
     }
   }
diff --git a/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json b/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
index 4435b4d..d2ca0ae 100644
--- a/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
+++ b/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
@@ -28,35 +28,35 @@
   ],
   "evaluator": {
     "postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "Forests.pptx - LibreOffice Impress",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			}
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "Forests.pptx - LibreOffice Impress",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5);"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      }
     ],
     "func": "compare_pptx_files",
     "expected": {
diff --git a/evaluation_examples/examples/libreoffice_impress/bf4e9888-f10f-47af-8dba-76413038b73c.json b/evaluation_examples/examples/libreoffice_impress/bf4e9888-f10f-47af-8dba-76413038b73c.json
new file mode 100644
index 0000000..a887e8a
--- /dev/null
+++ b/evaluation_examples/examples/libreoffice_impress/bf4e9888-f10f-47af-8dba-76413038b73c.json
@@ -0,0 +1,142 @@
+{
+    "id": "bf4e9888-f10f-47af-8dba-76413038b73c",
+    "snapshot": "libreoffice_impress",
+    "instruction": "I have a series of .png images, saved as pic1.png, pic2.png,..., pic6.png on the Desktop. Now I want to insert these pictures into the current blank presentation to create a document suitable to run continuously in a kiosk or multimedia show. Could you help me?",
+    "source": "https://help.libreoffice.org/6.4/en-US/text/simpress/guide/photo_album.html?DbPAR=IMPRESS",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1o8BOn9Q5NpQYYcXxeCHL_znkG11ni7ws&export=download&authuser=0&confirm=t&uuid=21a62a00-fd30-4405-ae87-00c3a165e381&at=APZUnTWlhaHv92g3wMYQJn80Vdpo:1706366778535",
+                        "path": "Desktop/4.3-Template_4.29.2016.pptx"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "open",
+            "parameters": {
+                "path": "Desktop/4.3-Template_4.29.2016.pptx"
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=15mklOcjIUJkzU3GhFhZM69h5gG0WOFVa&export=download&authuser=0&confirm=t&uuid=f7f61d48-5354-4eed-b743-ef620f308d79&at=APZUnTWzBs107NBYI1OvLHId8ff5:1706365135272",
+                        "path": "Desktop/pic1.png"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=13-ATVSrmAIUNAzGlE65JkkHpa4d87tqj&export=download&authuser=0&confirm=t&uuid=5ebcaa42-805b-4176-9262-1a320e668666&at=APZUnTW0H_QRSJYAFPe4OppuwE5o:1706365136587",
+                        "path": "Desktop/pic2.png"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1-K1XgMJj3yj7bz3BWm9Zddk4_lvYO4IP&export=download&authuser=0&confirm=t&uuid=6c268649-b627-49aa-a1aa-c97510b96e69&at=APZUnTV630QYwgh3nx-jtBp_uGAc:1706365137855",
+                        "path": "Desktop/pic3.png"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1Bk8esN_UFxOU50AMxJoSY8C-mQgX3uY3&export=download&authuser=0&confirm=t&uuid=c5a6ae81-426e-40e2-ac1b-16ba161e347d&at=APZUnTWD9bHDiTsGWBE1tJfcR6Zp:1706365140032",
+                        "path": "Desktop/pic4.png"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1DZapoWlmXPYsFdFGppCj2F-MZvpGZXca&export=download&authuser=0&confirm=t&uuid=7afc4de2-cdee-4c9e-9e4d-305509054627&at=APZUnTXX-2uDuQdsmA5kQRzDGRs7:1706365140910",
+                        "path": "Desktop/pic5.png"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1jwBqiulZbICiCn3c9W6avo0-Mu7yeIAV&export=download&authuser=0&confirm=t&uuid=64f0e1d1-ec71-453a-887e-b81218e4e756&at=APZUnTWQeckP8zn8b8grHfMtQYr2:1706365141943",
+                        "path": "Desktop/pic6.png"
+                    }
+                ]
+            }
+        }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "libreoffice_impress"
+    ],
+    "evaluator": {
+        "postconfig": [
+            {
+                "type": "activate_window",
+                "parameters": {
+                    "window_name": "4.3-Template_4.29.2016.pptx - LibreOffice Impress",
+                    "strict": true
+                }
+            },
+            {
+                "type": "sleep",
+                "parameters": {
+                    "seconds": 0.5
+                }
+            },
+            {
+                "type": "execute",
+                "parameters": {
+                    "command": [
+                        "python",
+                        "-c",
+                        "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5);"
+                    ]
+                }
+            },
+            {
+                "type": "sleep",
+                "parameters": {
+                    "seconds": 0.5
+                }
+            }
+        ],
+        "func": "compare_pptx_files",
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=1WeqZYQ0ZZvcFowo8rK15XHEIjoFCMVTN&export=download&authuser=0&confirm=t&uuid=e3bb39f8-0606-4bdf-808c-5b26f158dd88&at=APZUnTU_aAkyljmX3R_fkT5geNPv:1706366780602",
+            "dest": "4.3-Template_4.29.2016_Gold.pptx"
+        },
+        "result": {
+            "type": "vm_file",
+            "path": "Desktop/4.3-Template_4.29.2016.pptx",
+            "dest": "4.3-Template_4.29.2016.pptx"
+        },
+        "options": {
+            "examine_shape": false
+        }
+    }
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_impress/c59742c0-4323-4b9d-8a02-723c251deaa0.json b/evaluation_examples/examples/libreoffice_impress/c59742c0-4323-4b9d-8a02-723c251deaa0.json
index afbb92b..1e472ed 100644
--- a/evaluation_examples/examples/libreoffice_impress/c59742c0-4323-4b9d-8a02-723c251deaa0.json
+++ b/evaluation_examples/examples/libreoffice_impress/c59742c0-4323-4b9d-8a02-723c251deaa0.json
@@ -39,35 +39,35 @@
   ],
   "evaluator": {
     "postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "Mady_and_Mia_Baseball.pptx - LibreOffice Impress",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			}
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "Mady_and_Mia_Baseball.pptx - LibreOffice Impress",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5);"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      }
     ],
     "func": "compare_audios",
     "result": {
diff --git a/evaluation_examples/examples/libreoffice_impress/ce88f674-ab7a-43da-9201-468d38539e4a.json b/evaluation_examples/examples/libreoffice_impress/ce88f674-ab7a-43da-9201-468d38539e4a.json
index 64d63f7..352019a 100644
--- a/evaluation_examples/examples/libreoffice_impress/ce88f674-ab7a-43da-9201-468d38539e4a.json
+++ b/evaluation_examples/examples/libreoffice_impress/ce88f674-ab7a-43da-9201-468d38539e4a.json
@@ -28,35 +28,35 @@
   ],
   "evaluator": {
     "postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "AM_Last_Page_Template.pptx - LibreOffice Impress",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			}
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "AM_Last_Page_Template.pptx - LibreOffice Impress",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5);"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      }
     ],
     "func": "check_slide_orientation_Portrait",
     "result": {

From 4b8cab0805fd222fd52b31acf0ca659912901870 Mon Sep 17 00:00:00 2001
From: thomasshin <u3555016@hku.hk>
Date: Sun, 28 Jan 2024 14:36:28 +0800
Subject: [PATCH 06/13] check_highlighted_words modified

---
 desktop_env/evaluators/metrics/docs.py        | 26 ++++++++++++++-----
 .../6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json | 16 ++++++------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py
index a45c300..3e82e7d 100644
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -1,6 +1,8 @@
 import logging
 import os
 import xml.etree.ElementTree as ET
+import zipfile
+import re
 from typing import List, Dict, Any
 
 from docx import Document
@@ -247,14 +249,24 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
 def check_highlighted_words(file_path1, file_path2):
     if not compare_docx_files(file_path1, file_path2):
         return 0
-    document = Document(file_path1)
+        
+    # Extract content.xml from the .odt file
+    extract_dir = file_path1 + "_extracted"
+    with zipfile.ZipFile(file_path1, 'r') as zip_ref:
+        zip_ref.extractall(extract_dir)
+    content_xml_path = os.path.join(extract_dir, 'content.xml')
+    with open(content_xml_path, 'r') as file:
+        content_xml = file.read()
 
-    for paragraph in document.paragraphs:
-        for run in paragraph.runs:
-            if run.font.highlight_color is not None:
-                return 0  # Highlighted words found
+    # Check for yellow highlights in the content.xml
+    yellow_highlight_pattern = re.compile(r'(.{0,50}background-color="#ffff00"[^>]*>.{0,50})')
+    yellow_highlight_matches = yellow_highlight_pattern.findall(content_xml)
 
-    return 1  # No highlighted words found
+    # Return True if yellow highlights are NOT found, otherwise True
+    if yellow_highlight_matches:
+        return 0
+    else:
+        return 1
 
 
 def evaluate_strike_through_last_paragraph(file_path1, file_path2):
@@ -415,4 +427,4 @@ def compare_highlighted_text(file1, file2):
     doc2_highlighted = extract_highlighted_text(Document(file2))
 
     # Compare the sets of highlighted text to check if they are the same
-    return set(doc1_highlighted) == set(doc2_highlighted)
+    return set(doc1_highlighted) == set(doc2_highlighted)
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json b/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json
index 3280355..df69215 100644
--- a/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json
+++ b/evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json
@@ -9,8 +9,8 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive.usercontent.google.com/download?id=10hgB73d_DoQXQVgUjvgXFUCP1Hd9YxDb&export=download&authuser=0&confirm=t&uuid=845f9616-2fb7-476a-abab-8b620d482ac2&at=APZUnTXB71PxHF7Dq9TC2OL_cRLm:1706199789147",
-            "path": "Desktop/sample-recruitment-phone-script.docx"
+            "url": "https://drive.google.com/uc?id=1Ul4mtQ4SpUNLVDlaiJThPprBe6WTGZ2L&export=download",
+            "path": "Desktop/sample-recruitment-phone-script.odt"
           }
         ]
       }
@@ -18,7 +18,7 @@
     {
       "type": "open",
       "parameters": {
-        "path": "Desktop/sample-recruitment-phone-script.docx"
+        "path": "Desktop/sample-recruitment-phone-script.odt"
       }
     }
   ],
@@ -31,7 +31,7 @@
       {
         "type": "activate_window",
         "parameters": {
-          "window_name": "sample-recruitment-phone-script.docx - LibreOffice Writer",
+          "window_name": "sample-recruitment-phone-script.odt - LibreOffice Writer",
           "strict": true
         }
       },
@@ -55,13 +55,13 @@
     "func": "check_highlighted_words",
     "expected": {
       "type": "cloud_file",
-      "path": "https://drive.usercontent.google.com/download?id=1s9Dsy66-zxbCAgeTyCh0P7AT7P4jF6o3&export=download&authuser=0&confirm=t&uuid=1239f2a1-8c86-45a4-8e7d-36388ac22a69&at=APZUnTVZQzXQAMNsKKQzOw5ppT8A:1706017721589",
-      "dest": "sample-recruitment-phone-script_Gold.docx"
+      "path": "https://drive.google.com/uc?id=12iMkgCYuUyhKUXux96kANLIeud0Wz9ct&export=download",
+      "dest": "sample-recruitment-phone-script_Gold.odt"
     },
     "result": {
       "type": "vm_file",
-      "path": "Desktop/sample-recruitment-phone-script.docx",
-      "dest": "sample-recruitment-phone-script.docx"
+      "path": "Desktop/sample-recruitment-phone-script.odt",
+      "dest": "sample-recruitment-phone-script.odt"
     }
   }
 }
\ No newline at end of file

From c875cad3e519cf14f346539fb4bf23d27ee34bf3 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sun, 28 Jan 2024 15:32:14 +0800
Subject: [PATCH 07/13] Fix some errors found in thunderbird examples

---
 .../0bf05a7d-b28b-44d2-955a-50b41e24012a.json |  86 ++++++------
 .../a01fbce3-2793-461f-ab86-43680ccbae25.json | 109 ----------------
 ...bce3-2793-461f-ab86-43680ccbae25.json.json |  85 ------------
 experiment_a11y_tree.py                       | 123 +++++++++++-------
 experiment_screenshot.py                      |  53 ++++----
 experiment_screenshot_a11y_tree.py            |  45 ++-----
 experiment_screenshot_seeact.py               |  32 +----
 experiment_screenshot_som.py                  |  32 +----
 8 files changed, 172 insertions(+), 393 deletions(-)
 delete mode 100644 evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json
 delete mode 100644 evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json.json

diff --git a/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json b/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json
index 4340397..48e5e61 100644
--- a/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json
+++ b/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json
@@ -21,55 +21,55 @@
         "path": "/home/user/Customers_New_7digit_Id.xlsx"
       }
     }
-  },
+  ],
   "trajectory": "trajectories/0bf05a7d-b28b-44d2-955a-50b41e24012a",
   "related_apps": [
     "libreoffice calc"
   ],
   "evaluator": {
     "postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "Customers_New_7digit_Id.xlsx - LibreOffice Calc",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"libreoffice",
-						"--convert-to",
-						"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
-						"--outdir",
-						"/home/user",
-						"/home/user/Customers_New_7digit_Id.xlsx"
-					]
-				}
-			}
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "Customers_New_7digit_Id.xlsx - LibreOffice Calc",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "libreoffice",
+            "--convert-to",
+            "csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
+            "--outdir",
+            "/home/user",
+            "/home/user/Customers_New_7digit_Id.xlsx"
+          ]
+        }
+      }
     ],
     "func": "compare_table",
     "result": {
diff --git a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json
deleted file mode 100644
index b19b14a..0000000
--- a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json
+++ /dev/null
@@ -1,109 +0,0 @@
-{
-	"id": "a01fbce3-2793-461f-ab86-43680ccbae25",
-	"snapshot": "libreoffice_calc",
-	"instruction": "I need to set the decimal separator as a comma (,) for localized data representation and clarity in visualization. Can you assist with this?",
-	"source": "https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc",
-	"config": [
-		{
-			"type": "download",
-			"parameters": {
-				"file": [
-					{
-						"url": "https://drive.usercontent.google.com/download?id=1uT0axjo9lwkKu6hYVnsAL2FCrdH0DLUv&export=download&authuser=0&confirm=t&uuid=e7da6304-9c7a-4862-8a30-9f2284b843da&at=APZUnTVNHThpAZJmF6IuPckFvslw:1704187618838",
-						"path": "/home/user/Set_Decimal_Separator_Dot.xlsx"
-					}
-				]
-			}
-		},
-		{
-			"type": "open",
-			"parameters": {
-				"path": "/home/user/Set_Decimal_Separator_Dot.xlsx"
-			}
-		}
-	],
-	"trajectory": "trajectories/a01fbce3-2793-461f-ab86-43680ccbae25",
-	"related_apps": [
-		"libreoffice_calc"
-	],
-	"evaluator": {
-		"postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "Set_Decimal_Separator_Dot.xlsx - LibreOffice Calc",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"libreoffice",
-						"--convert-to",
-						"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
-						"--outdir",
-						"/home/user",
-						"/home/user/Set_Decimal_Separator_Dot.xlsx"
-					]
-				}
-			}
-		],
-		"func": "compare_table",
-		"result": {
-			"type": "vm_file",
-			"path": [
-				"/home/user/Set_Decimal_Separator_Dot.xlsx",
-				"/home/user/Set_Decimal_Separator_Dot-Sheet1.csv"
-			],
-			"dest": [
-				"Set_Decimal_Separator_Dot.xlsx",
-				"Set_Decimal_Separator_Dot-Sheet1.csv"
-			],
-			"multi": true
-		},
-		"expected": {
-			"type": "cloud_file",
-			"path": [
-				"https://drive.usercontent.google.com/download?id=15O0l5fxVi1JX_12KOLfbxWPHjXPZPon5&export=download&authuser=0&confirm=t&uuid=395e6c57-11a7-4b33-af4c-98ff2390742b&at=APZUnTVKcrUGrjRfBEwT_AD53Cmn:1705497822975",
-				"https://drive.usercontent.google.com/download?id=1rKDWcovxw4Qtd3RHs7M5p_QqryI0SQO3&export=download&authuser=0&confirm=t&uuid=eb6ffb6d-f7c2-44d8-ad77-db6c0aaf5cc7&at=APZUnTWr2VxrJPiiKVMdFd0IykrR:1705497846507"
-			],
-			"dest": [
-				"Set_Decimal_Separator_Dot_gold.xlsx",
-				"Set_Decimal_Separator_Dot_gold-Sheet1.csv"
-			],
-			"multi": true
-		},
-		"options": {
-			"rules": [
-				{
-					"type": "sheet_print",
-					"sheet_idx0": "RNSheet1",
-					"sheet_idx1": "ENSheet1"
-				}
-			]
-		}
-	}
-}
diff --git a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json.json b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json.json
deleted file mode 100644
index e7bc03b..0000000
--- a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json.json
+++ /dev/null
@@ -1,85 +0,0 @@
-{
-	"id": "a01fbce3-2793-461f-ab86-43680ccbae25",
-	"snapshot": "libreoffice_calc",
-	"instruction": "I need to set the decimal separator as a comma (,) for localized data representation and clarity in visualization. Can you assist with this?",
-	"source": "https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc",
-	"config": [],
-	"trajectory": "trajectories/a01fbce3-2793-461f-ab86-43680ccbae25",
-	"related_apps": [
-		"libreoffice_calc"
-	],
-	"evaluator": {
-		"postconfig": [
-			{
-				"type": "activate_window",
-				"parameters": {
-					"window_name": "Set_Decimal_Separator_Dot.xlsx - LibreOffice Calc",
-					"strict": true
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"python",
-						"-c",
-						"import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");"
-					]
-				}
-			},
-			{
-				"type": "sleep",
-				"parameters": {
-					"seconds": 0.5
-				}
-			},
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"libreoffice",
-						"--convert-to",
-						"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true",
-						"--outdir",
-						"/home/user",
-						"/home/user/Set_Decimal_Separator_Dot.xlsx"
-					]
-				}
-			}
-		],
-		"func": "compare_table",
-		"result": {
-			"type": "vm_file",
-			"path": [
-				"/home/user/Set_Decimal_Separator_Dot.xlsx",
-				"/home/user/Set_Decimal_Separator_Dot.csv"
-			],
-			"dest": [
-				"Set_Decimal_Separator_Dot.xlsx",
-				"Set_Decimal_Separator_Dot.csv"
-			],
-			"multi": true
-		},
-		"expected": {
-			"type": "cloud_file",
-			"path": [
-				"https://drive.usercontent.google.com/download?id=15O0l5fxVi1JX_12KOLfbxWPHjXPZPon5&export=download&authuser=0&confirm=t&uuid=395e6c57-11a7-4b33-af4c-98ff2390742b&at=APZUnTVKcrUGrjRfBEwT_AD53Cmn:1705497822975",
-				"https://drive.usercontent.google.com/download?id=1rKDWcovxw4Qtd3RHs7M5p_QqryI0SQO3&export=download&authuser=0&confirm=t&uuid=eb6ffb6d-f7c2-44d8-ad77-db6c0aaf5cc7&at=APZUnTWr2VxrJPiiKVMdFd0IykrR:1705497846507"
-			],
-			"dest": [
-				"Set_Decimal_Separator_Dot_gold.xlsx",
-				"Set_Decimal_Separator_Dot_gold.csv"
-			],
-			"multi": true
-		},
-		"options": {
-			"as_shown": true
-		}
-	}
-}
diff --git a/experiment_a11y_tree.py b/experiment_a11y_tree.py
index 86e6a72..f03d2a6 100644
--- a/experiment_a11y_tree.py
+++ b/experiment_a11y_tree.py
@@ -1,10 +1,11 @@
+import ctypes
 import datetime
 import json
 import logging
 import os
 import sys
-import threading
-import time
+import func_timeout
+
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
 
@@ -45,6 +46,7 @@ logger = logging.getLogger("desktopenv.experiment")
 PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
 
 
+
 def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
     trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
     env = DesktopEnv(
@@ -103,28 +105,10 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
         except Exception as e:
             print(f"An error occurred while stopping the recording: {e}")
 
-    # Run the `record` function in a separate thread
-    recording_thread = threading.Thread(target=stop_recording())
-    recording_thread.start()
-
-    # Start a timer for your timeout length (in this case, 60 seconds)
-    timeout = 60  # seconds
-    start_time = time.time()
-
-    # The main thread will wait for the set timeout period or until the recording is done
-    while recording_thread.is_alive():
-        elapsed_time = time.time() - start_time
-        if elapsed_time >= timeout:
-            print("Timeout reached. Stopping recording.")
-            break
-        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
-
-    # kill the recording thread if it is still alive
-    if recording_thread.is_alive():
-        recording_thread.kill()
-
-    # Wait for the recording thread to finish before exiting
-    recording_thread.join()
+    try:
+        func_timeout.func_timeout(30, stop_recording)
+    except func_timeout.exceptions.FunctionTimedOut:
+        logger.info("Recording timed out.")
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
@@ -150,7 +134,7 @@ def main(example_class, example_id):
 
     with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_chrome"
+    example["snapshot"] = "exp_v1"
 
     api_key = os.environ.get("OPENAI_API_KEY")
     agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], max_tokens=1000,
@@ -186,26 +170,75 @@ if __name__ == '__main__':
         "f3977615-2b45-4ac5-8bba-80c17dbe2a37",
         "215dfd39-f493-4bc3-a027-8a97d72c61bf"
     ]
-    for example_id in vlc_list:
-        recording_thread = threading.Thread(target=main, args=("vlc", example_id))
-        recording_thread.start()
 
-        # Start a timer for your timeout length (in this case, 60 seconds)
-        timeout = 600  # seconds
-        start_time = time.time()
+    chrome_list = [
+        # "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
+        "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
+        "06fe7178-4491-4589-810f-2e2bc9502122",
+        "e1e75309-3ddb-4d09-92ec-de869c928143",
+        "35253b65-1c19-4304-8aa4-6884b8218fc0",
+        "2ad9387a-65d8-4e33-ad5b-7580065a27ca",
+        "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
+        "44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
+        "2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
+        "480bcfea-d68f-4aaa-a0a9-2589ef319381",
+        "af630914-714e-4a24-a7bb-f9af687d3b91"
+    ]
 
-        # The main thread will wait for the set timeout period or until the recording is done
-        while recording_thread.is_alive():
-            elapsed_time = time.time() - start_time
-            if elapsed_time >= timeout:
-                print("Timeout reached. Kill this example.")
-                break
-            time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
-
-        # kill the recording thread if it is still alive
-        if recording_thread.is_alive():
-            recording_thread.kill()
-
-        # Wait for the recording thread to finish before exiting
-        recording_thread.join()
+    calc_list = [
+        "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
+        "0bf05a7d-b28b-44d2-955a-50b41e24012a",
+        "7b802dad-6e0f-4204-9815-d4e3f57627d8",
+        "7a4e4bc8-922c-4c84-865c-25ba34136be1",
+        "2bd59342-0664-4ccb-ba87-79379096cc08",
+        "a9f325aa-8c05-4e4f-8341-9e4358565f4f",
+        "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
+        "7efeb4b1-3d19-4762-b163-63328d66303b",
+        "4e6fcf72-daf3-439f-a232-c434ce416af6",
+        "6054afcb-5bab-4702-90a0-b259b5d3217c",
+        "abed40dc-063f-4598-8ba5-9fe749c0615d",
+        "01b269ae-2111-4a07-81fd-3fcd711993b0",
+        "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
+        "af2b02f7-acee-4be4-8b66-499fab394915",
+        "da1d63b8-fa12-417b-ba18-f748e5f770f3",
+        "636380ea-d5f6-4474-b6ca-b2ed578a20f1",
+        "5ba77536-05c5-4aae-a9ff-6e298d094c3e",
+        "4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b",
+        "672a1b02-c62f-4ae2-acf0-37f5fb3052b0",
+        "648fe544-16ba-44af-a587-12ccbe280ea6",
+        "8985d1e4-5b99-4711-add4-88949ebb2308",
+        "9e606842-2e27-43bf-b1d1-b43289c9589b",
+        "fcb6e45b-25c4-4087-9483-03d714f473a9",
+        "68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2",
+        "fff629ea-046e-4793-8eec-1a5a15c3eb35",
+        "5c9a206c-bb00-4fb6-bb46-ee675c187df5",
+        "e975ae74-79bd-4672-8d1c-dc841a85781d",
+        "34a6938a-58da-4897-8639-9b90d6db5391",
+        "b5a22759-b4eb-4bf2-aeed-ad14e8615f19",
+        "2f9913a1-51ed-4db6-bfe0-7e1c95b3139e",
+        "2558031e-401d-4579-8e00-3ecf540fb492",
+        "39aa4e37-dc91-482e-99af-132a612d40f3",
+        "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
+        "4188d3a4-077d-46b7-9c86-23e1a036f6c1",
+        "51b11269-2ca8-4b2a-9163-f21758420e78",
+        "7e429b8d-a3f0-4ed0-9b58-08957d00b127",
+        "f5a90742-3fa2-40fc-a564-f29b054e0337",
+        "22df9241-f8d7-4509-b7f1-37e501a823f7",
+        "1434ca3e-f9e3-4db8-9ca7-b4c653be7d17",
+        "347ef137-7eeb-4c80-a3bb-0951f26a8aff",
+        "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
+        "3aaa4e37-dc91-482e-99af-132a612d40f3",
+        "37608790-6147-45d0-9f20-1137bb35703d",
+        "f9584479-3d0d-4c79-affa-9ad7afdd8850",
+        "d681960f-7bc3-4286-9913-a8812ba3261a",
+        "21df9241-f8d7-4509-b7f1-37e501a823f7",
+        "f6a90742-3fa2-40fc-a564-f29b054e0337",
+        "1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
+        "357ef137-7eeb-4c80-a3bb-0951f26a8aff",
+        "aa3a8974-2e85-438b-b29e-a64df44deb4b",
+        "a01fbce3-2793-461f-ab86-43680ccbae25",
+        "4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
+    ]
 
+    for example_id in calc_list:
+        main("libreoffice_calc", example_id)
diff --git a/experiment_screenshot.py b/experiment_screenshot.py
index 943a8ec..28b0f1e 100644
--- a/experiment_screenshot.py
+++ b/experiment_screenshot.py
@@ -3,11 +3,11 @@ import json
 import logging
 import os
 import sys
-import threading
 import time
-
+import func_timeout
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
+
 # from mm_agents.gemini_pro_agent import GeminiPro_Agent
 
 #  Logger Configs {{{ # 
@@ -77,7 +77,6 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
             logger.info("Done: %s", done)
             logger.info("Info: %s", info)
 
-
             # Save screenshot and trajectory information
             with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
                 with open(observation['screenshot'], "rb") as __f:
@@ -106,28 +105,10 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
         except Exception as e:
             print(f"An error occurred while stopping the recording: {e}")
 
-    # Run the `record` function in a separate thread
-    recording_thread = threading.Thread(target=stop_recording())
-    recording_thread.start()
-
-    # Start a timer for your timeout length (in this case, 60 seconds)
-    timeout = 60  # seconds
-    start_time = time.time()
-
-    # The main thread will wait for the set timeout period or until the recording is done
-    while recording_thread.is_alive():
-        elapsed_time = time.time() - start_time
-        if elapsed_time >= timeout:
-            print("Timeout reached. Stopping recording.")
-            break
-        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
-
-    # kill the recording thread if it is still alive
-    if recording_thread.is_alive():
-        recording_thread.kill()
-
-    # Wait for the recording thread to finish before exiting
-    recording_thread.join()
+    try:
+        func_timeout.func_timeout(30, stop_recording)
+    except func_timeout.exceptions.FunctionTimedOut:
+        logger.info("Recording timed out.")
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
@@ -153,10 +134,11 @@ def main(example_class, example_id):
 
     with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_chrome"
+    example["snapshot"] = "exp_v1"
 
     api_key = os.environ.get("OPENAI_API_KEY")
-    agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
+    agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space,
+                        exp="screenshot")
     #
     # api_key = os.environ.get("GENAI_API_KEY")
     # agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
@@ -172,7 +154,18 @@ def main(example_class, example_id):
 
 
 if __name__ == '__main__':
-    xx_list = [
+    chrome_list = [
+        # "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
+        # "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
+        # "06fe7178-4491-4589-810f-2e2bc9502122",
+        # "e1e75309-3ddb-4d09-92ec-de869c928143",
+        # "35253b65-1c19-4304-8aa4-6884b8218fc0",
+        # "2ad9387a-65d8-4e33-ad5b-7580065a27ca",
+        # "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
+        # "44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
+        # "2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
+        "480bcfea-d68f-4aaa-a0a9-2589ef319381",
+        "af630914-714e-4a24-a7bb-f9af687d3b91"
     ]
-    for example_id in xx_list:
-        main("xx", example_id)
+    for example_id in chrome_list:
+        main("chrome", example_id)
diff --git a/experiment_screenshot_a11y_tree.py b/experiment_screenshot_a11y_tree.py
index 4d3fe21..042bbfb 100644
--- a/experiment_screenshot_a11y_tree.py
+++ b/experiment_screenshot_a11y_tree.py
@@ -1,10 +1,10 @@
+import ctypes
 import datetime
 import json
 import logging
 import os
 import sys
-import threading
-import time
+import func_timeout
 
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
@@ -44,9 +44,10 @@ logger.addHandler(sdebug_handler)
 logger = logging.getLogger("desktopenv.experiment")
 
 PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
-#PATH_TO_VM = "../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
 
 
+# PATH_TO_VM = "../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
+
 def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
     trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
     env = DesktopEnv(
@@ -105,28 +106,10 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
         except Exception as e:
             print(f"An error occurred while stopping the recording: {e}")
 
-    # Run the `record` function in a separate thread
-    recording_thread = threading.Thread(target=stop_recording())
-    recording_thread.start()
-
-    # Start a timer for your timeout length (in this case, 60 seconds)
-    timeout = 60  # seconds
-    start_time = time.time()
-
-    # The main thread will wait for the set timeout period or until the recording is done
-    while recording_thread.is_alive():
-        elapsed_time = time.time() - start_time
-        if elapsed_time >= timeout:
-            print("Timeout reached. Stopping recording.")
-            break
-        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
-
-    # kill the recording thread if it is still alive
-    if recording_thread.is_alive():
-        recording_thread.kill()
-
-    # Wait for the recording thread to finish before exiting
-    recording_thread.join()
+    try:
+        func_timeout.func_timeout(30, stop_recording)
+    except func_timeout.exceptions.FunctionTimedOut:
+        logger.info("Recording timed out.")
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
@@ -143,9 +126,9 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
 
 def main(example_class, example_id):
     action_space = "pyautogui"
-    #example_class = "libreoffice_calc"
-    #example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
-    #example_id = "01b269ae-2111-4a07-81fd-3fcd711993b0"
+    # example_class = "libreoffice_calc"
+    # example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
+    # example_id = "01b269ae-2111-4a07-81fd-3fcd711993b0"
     gpt4_model = "gpt-4-vision-preview"
     gemini_model = "gemini-pro-vision"
 
@@ -155,9 +138,9 @@ def main(example_class, example_id):
 
     with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_chrome"
-    #example["snapshot"] = "exp_setup4"
-    #example["snapshot"] = "Snapshot 30"
+    example["snapshot"] = "exp_v1"
+    # example["snapshot"] = "exp_setup4"
+    # example["snapshot"] = "Snapshot 30"
 
     api_key = os.environ.get("OPENAI_API_KEY")
     agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
diff --git a/experiment_screenshot_seeact.py b/experiment_screenshot_seeact.py
index 3f72375..541d549 100644
--- a/experiment_screenshot_seeact.py
+++ b/experiment_screenshot_seeact.py
@@ -1,10 +1,10 @@
+import ctypes
 import datetime
 import json
 import logging
 import os
 import sys
-import threading
-import time
+import func_timeout
 
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
@@ -104,28 +104,10 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
         except Exception as e:
             print(f"An error occurred while stopping the recording: {e}")
 
-    # Run the `record` function in a separate thread
-    recording_thread = threading.Thread(target=stop_recording())
-    recording_thread.start()
-
-    # Start a timer for your timeout length (in this case, 60 seconds)
-    timeout = 60  # seconds
-    start_time = time.time()
-
-    # The main thread will wait for the set timeout period or until the recording is done
-    while recording_thread.is_alive():
-        elapsed_time = time.time() - start_time
-        if elapsed_time >= timeout:
-            print("Timeout reached. Stopping recording.")
-            break
-        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
-
-    # kill the recording thread if it is still alive
-    if recording_thread.is_alive():
-        recording_thread.kill()
-
-    # Wait for the recording thread to finish before exiting
-    recording_thread.join()
+    try:
+        func_timeout.func_timeout(30, stop_recording)
+    except func_timeout.exceptions.FunctionTimedOut:
+        logger.info("Recording timed out.")
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
@@ -147,7 +129,7 @@ def main(example_class, example_id):
 
     with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_chrome"
+    example["snapshot"] = "exp_v1"
 
     api_key = os.environ.get("OPENAI_API_KEY")
     agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py
index abd77d9..2ecdafe 100644
--- a/experiment_screenshot_som.py
+++ b/experiment_screenshot_som.py
@@ -1,10 +1,10 @@
+import ctypes
 import datetime
 import json
 import logging
 import os
 import sys
-import threading
-import time
+import func_timeout
 
 from desktop_env.envs.desktop_env import DesktopEnv
 from mm_agents.gpt_4v_agent import GPT4v_Agent
@@ -104,28 +104,10 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
         except Exception as e:
             print(f"An error occurred while stopping the recording: {e}")
 
-    # Run the `record` function in a separate thread
-    recording_thread = threading.Thread(target=stop_recording())
-    recording_thread.start()
-
-    # Start a timer for your timeout length (in this case, 60 seconds)
-    timeout = 60  # seconds
-    start_time = time.time()
-
-    # The main thread will wait for the set timeout period or until the recording is done
-    while recording_thread.is_alive():
-        elapsed_time = time.time() - start_time
-        if elapsed_time >= timeout:
-            print("Timeout reached. Stopping recording.")
-            break
-        time.sleep(0.1)  # Sleep for a short time to prevent this loop from using too much CPU
-
-    # kill the recording thread if it is still alive
-    if recording_thread.is_alive():
-        recording_thread.kill()
-
-    # Wait for the recording thread to finish before exiting
-    recording_thread.join()
+    try:
+        func_timeout.func_timeout(30, stop_recording)
+    except func_timeout.exceptions.FunctionTimedOut:
+        logger.info("Recording timed out.")
 
     result = env.evaluate()
     logger.info("Result: %.2f", result)
@@ -147,7 +129,7 @@ def main(example_class, example_id):
 
     with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_chrome"
+    example["snapshot"] = "exp_v1"
 
     api_key = os.environ.get("OPENAI_API_KEY")
     agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],

From be17bd3307d943b9808bd64f272d4a2c5d1b34e1 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sun, 28 Jan 2024 15:35:31 +0800
Subject: [PATCH 08/13] Fix some errors found in thunderbird examples

---
 .../0bf05a7d-b28b-44d2-955a-50b41e24012a.json |   2 +-
 .../1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json |   2 +-
 .../21df9241-f8d7-4509-b7f1-37e501a823f7.json |   2 +-
 .../3aaa4e37-dc91-482e-99af-132a612d40f3.json |   2 +-
 .../4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json |   2 +-
 .../a01fbce3-2793-461f-ab86-43680ccbae25.json | 109 ++++++++++++++++++
 .../aa3a8974-2e85-438b-b29e-a64df44deb4b.json |   2 +-
 7 files changed, 115 insertions(+), 6 deletions(-)
 create mode 100644 evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json

diff --git a/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json b/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json
index 48e5e61..358e0bf 100644
--- a/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json
+++ b/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json
@@ -7,7 +7,7 @@
     {
       "type": "download",
       "parameters": {
-        "file": [
+        "files": [
           {
             "url": "https://drive.usercontent.google.com/download?id=1DqGy5JRKOuZMRJ8O76d4Cds4WaRyz8V1&export=download&authuser=0&confirm=t&uuid=fa0694d1-2a77-4fd2-89d3-d9b854317823&at=APZUnTU9BxqG7E8tLZ104c0E8BEL:1705501029016",
             "path": "/home/user/Customers_New_7digit_Id.xlsx"
diff --git a/evaluation_examples/examples/libreoffice_calc/1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json b/evaluation_examples/examples/libreoffice_calc/1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json
index afc36a8..08b9f8f 100644
--- a/evaluation_examples/examples/libreoffice_calc/1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json
+++ b/evaluation_examples/examples/libreoffice_calc/1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json
@@ -7,7 +7,7 @@
 		{
 			"type": "download",
 			"parameters": {
-				"file": [
+				"files": [
 					{
 						"url": "https://drive.usercontent.google.com/download?id=1Wkepf_vic9o7CZFiosZ4jZT_Hy2WbRPZ&export=download&authuser=0&confirm=t&uuid=bc2ce901-a6bb-433f-bcce-cbe42d813f18&at=APZUnTVQcGTcXjwqenmtSH6IMFkM:1703858853235",
 						"path": "/home/user/Zoom_Out_Oversized_Cells.xlsx"
diff --git a/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json b/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json
index 328a996..199bc74 100644
--- a/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json
+++ b/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json
@@ -7,7 +7,7 @@
 		{
 			"type": "download",
 			"parameters": {
-				"file": [
+				"files": [
 					{
 						"url": "https://drive.usercontent.google.com/download?id=16PowrQA4E71xUoJmpXPHy0dr9HBcTRmo&export=download&authuser=0&confirm=t&uuid=9a6265f7-585c-4cf8-b321-3b859aec1e68&at=APZUnTWzzOw85wws0ojXNPsIwnjE:1703858126178",
 						"path": "/home/user/Represent_in_millions_billions.xlsx"
diff --git a/evaluation_examples/examples/libreoffice_calc/3aaa4e37-dc91-482e-99af-132a612d40f3.json b/evaluation_examples/examples/libreoffice_calc/3aaa4e37-dc91-482e-99af-132a612d40f3.json
index ed5ca1f..58dde61 100644
--- a/evaluation_examples/examples/libreoffice_calc/3aaa4e37-dc91-482e-99af-132a612d40f3.json
+++ b/evaluation_examples/examples/libreoffice_calc/3aaa4e37-dc91-482e-99af-132a612d40f3.json
@@ -7,7 +7,7 @@
 		{
 			"type": "download",
 			"parameters": {
-				"file": [
+				"files": [
 					{
 						"url": "https://drive.usercontent.google.com/download?id=17sNGNFpZtmwuz74Pid2WwWL_rgTkgTg0&export=download&authuser=0&confirm=t&uuid=18d3601d-e329-4525-bd11-633c678601d6&at=APZUnTVY9kvNoMeETP6HVRynDHqq:1706012082827",
 						"path": "/home/user/Export_Calc_to_CSV.xlsx"
diff --git a/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json b/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json
index dae73c2..d0c5a47 100644
--- a/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json
+++ b/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json
@@ -7,7 +7,7 @@
 		{
 			"type": "download",
 			"parameters": {
-				"file": [
+				"files": [
 					{
 						"url": "https://drive.usercontent.google.com/download?id=1LHxVvEpLI7kp0Iiy8K74gwkoGiwcLeYP&export=download&authuser=0&confirm=t&uuid=690287ee-d413-46e7-9b01-c56c12e445ff&at=APZUnTVCSd_ajhMGWpEgLHiExfbf:1704199487820",
 						"path": "/home/user/Padding_Decimals_In_Formular.xlsx"
diff --git a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json
new file mode 100644
index 0000000..b19b14a
--- /dev/null
+++ b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json
@@ -0,0 +1,109 @@
+{
+	"id": "a01fbce3-2793-461f-ab86-43680ccbae25",
+	"snapshot": "libreoffice_calc",
+	"instruction": "I need to set the decimal separator as a comma (,) for localized data representation and clarity in visualization. Can you assist with this?",
+	"source": "https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc",
+	"config": [
+		{
+			"type": "download",
+			"parameters": {
+				"file": [
+					{
+						"url": "https://drive.usercontent.google.com/download?id=1uT0axjo9lwkKu6hYVnsAL2FCrdH0DLUv&export=download&authuser=0&confirm=t&uuid=e7da6304-9c7a-4862-8a30-9f2284b843da&at=APZUnTVNHThpAZJmF6IuPckFvslw:1704187618838",
+						"path": "/home/user/Set_Decimal_Separator_Dot.xlsx"
+					}
+				]
+			}
+		},
+		{
+			"type": "open",
+			"parameters": {
+				"path": "/home/user/Set_Decimal_Separator_Dot.xlsx"
+			}
+		}
+	],
+	"trajectory": "trajectories/a01fbce3-2793-461f-ab86-43680ccbae25",
+	"related_apps": [
+		"libreoffice_calc"
+	],
+	"evaluator": {
+		"postconfig": [
+			{
+				"type": "activate_window",
+				"parameters": {
+					"window_name": "Set_Decimal_Separator_Dot.xlsx - LibreOffice Calc",
+					"strict": true
+				}
+			},
+			{
+				"type": "sleep",
+				"parameters": {
+					"seconds": 0.5
+				}
+			},
+			{
+				"type": "execute",
+				"parameters": {
+					"command": [
+						"python",
+						"-c",
+						"import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");"
+					]
+				}
+			},
+			{
+				"type": "sleep",
+				"parameters": {
+					"seconds": 0.5
+				}
+			},
+			{
+				"type": "execute",
+				"parameters": {
+					"command": [
+						"libreoffice",
+						"--convert-to",
+						"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
+						"--outdir",
+						"/home/user",
+						"/home/user/Set_Decimal_Separator_Dot.xlsx"
+					]
+				}
+			}
+		],
+		"func": "compare_table",
+		"result": {
+			"type": "vm_file",
+			"path": [
+				"/home/user/Set_Decimal_Separator_Dot.xlsx",
+				"/home/user/Set_Decimal_Separator_Dot-Sheet1.csv"
+			],
+			"dest": [
+				"Set_Decimal_Separator_Dot.xlsx",
+				"Set_Decimal_Separator_Dot-Sheet1.csv"
+			],
+			"multi": true
+		},
+		"expected": {
+			"type": "cloud_file",
+			"path": [
+				"https://drive.usercontent.google.com/download?id=15O0l5fxVi1JX_12KOLfbxWPHjXPZPon5&export=download&authuser=0&confirm=t&uuid=395e6c57-11a7-4b33-af4c-98ff2390742b&at=APZUnTVKcrUGrjRfBEwT_AD53Cmn:1705497822975",
+				"https://drive.usercontent.google.com/download?id=1rKDWcovxw4Qtd3RHs7M5p_QqryI0SQO3&export=download&authuser=0&confirm=t&uuid=eb6ffb6d-f7c2-44d8-ad77-db6c0aaf5cc7&at=APZUnTWr2VxrJPiiKVMdFd0IykrR:1705497846507"
+			],
+			"dest": [
+				"Set_Decimal_Separator_Dot_gold.xlsx",
+				"Set_Decimal_Separator_Dot_gold-Sheet1.csv"
+			],
+			"multi": true
+		},
+		"options": {
+			"rules": [
+				{
+					"type": "sheet_print",
+					"sheet_idx0": "RNSheet1",
+					"sheet_idx1": "ENSheet1"
+				}
+			]
+		}
+	}
+}
diff --git a/evaluation_examples/examples/libreoffice_calc/aa3a8974-2e85-438b-b29e-a64df44deb4b.json b/evaluation_examples/examples/libreoffice_calc/aa3a8974-2e85-438b-b29e-a64df44deb4b.json
index 7c8a302..90437d0 100644
--- a/evaluation_examples/examples/libreoffice_calc/aa3a8974-2e85-438b-b29e-a64df44deb4b.json
+++ b/evaluation_examples/examples/libreoffice_calc/aa3a8974-2e85-438b-b29e-a64df44deb4b.json
@@ -7,7 +7,7 @@
 		{
 			"type": "download",
 			"parameters": {
-				"file": [
+				"files": [
 					{
 						"url": "https://drive.usercontent.google.com/download?id=1O4bw7jEsVdFGeGeSX8hDjsvIbozV38sd&export=download&authuser=0&confirm=t&uuid=b6ceade0-e9c3-47bf-8c40-fef77b5ea1f1&at=APZUnTUUYaEx0Y_lAESeK1DfQZw6:1704179724348",
 						"path": "/home/user/Resize_Cells_Fit_Page.xlsx"

From 353ab6607dbda61ab9862e265560ba5ba347c326 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sun, 28 Jan 2024 16:51:38 +0800
Subject: [PATCH 09/13] Fix some errors found in thunderbird examples

---
 .../4188d3a4-077d-46b7-9c86-23e1a036f6c1.json |  2 +-
 experiment_a11y_tree.py                       | 46 +++++--------------
 2 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/evaluation_examples/examples/libreoffice_calc/4188d3a4-077d-46b7-9c86-23e1a036f6c1.json b/evaluation_examples/examples/libreoffice_calc/4188d3a4-077d-46b7-9c86-23e1a036f6c1.json
index 7f1137d..7b77db3 100644
--- a/evaluation_examples/examples/libreoffice_calc/4188d3a4-077d-46b7-9c86-23e1a036f6c1.json
+++ b/evaluation_examples/examples/libreoffice_calc/4188d3a4-077d-46b7-9c86-23e1a036f6c1.json
@@ -26,7 +26,7 @@
 	"related_apps": [
 		"libreoffice_calc"
 	],
-	"evaluators": {
+	"evaluator": {
     "postconfig": [
       {
         "type": "activate_window",
diff --git a/experiment_a11y_tree.py b/experiment_a11y_tree.py
index f03d2a6..5a8a5ad 100644
--- a/experiment_a11y_tree.py
+++ b/experiment_a11y_tree.py
@@ -186,39 +186,18 @@ if __name__ == '__main__':
     ]
 
     calc_list = [
-        "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
-        "0bf05a7d-b28b-44d2-955a-50b41e24012a",
-        "7b802dad-6e0f-4204-9815-d4e3f57627d8",
-        "7a4e4bc8-922c-4c84-865c-25ba34136be1",
-        "2bd59342-0664-4ccb-ba87-79379096cc08",
-        "a9f325aa-8c05-4e4f-8341-9e4358565f4f",
-        "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
-        "7efeb4b1-3d19-4762-b163-63328d66303b",
-        "4e6fcf72-daf3-439f-a232-c434ce416af6",
-        "6054afcb-5bab-4702-90a0-b259b5d3217c",
-        "abed40dc-063f-4598-8ba5-9fe749c0615d",
-        "01b269ae-2111-4a07-81fd-3fcd711993b0",
-        "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
-        "af2b02f7-acee-4be4-8b66-499fab394915",
-        "da1d63b8-fa12-417b-ba18-f748e5f770f3",
-        "636380ea-d5f6-4474-b6ca-b2ed578a20f1",
-        "5ba77536-05c5-4aae-a9ff-6e298d094c3e",
-        "4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b",
-        "672a1b02-c62f-4ae2-acf0-37f5fb3052b0",
-        "648fe544-16ba-44af-a587-12ccbe280ea6",
-        "8985d1e4-5b99-4711-add4-88949ebb2308",
-        "9e606842-2e27-43bf-b1d1-b43289c9589b",
-        "fcb6e45b-25c4-4087-9483-03d714f473a9",
-        "68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2",
-        "fff629ea-046e-4793-8eec-1a5a15c3eb35",
-        "5c9a206c-bb00-4fb6-bb46-ee675c187df5",
-        "e975ae74-79bd-4672-8d1c-dc841a85781d",
-        "34a6938a-58da-4897-8639-9b90d6db5391",
-        "b5a22759-b4eb-4bf2-aeed-ad14e8615f19",
-        "2f9913a1-51ed-4db6-bfe0-7e1c95b3139e",
-        "2558031e-401d-4579-8e00-3ecf540fb492",
-        "39aa4e37-dc91-482e-99af-132a612d40f3",
-        "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
+        # "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
+        # "0bf05a7d-b28b-44d2-955a-50b41e24012a",
+        # "7a4e4bc8-922c-4c84-865c-25ba34136be1",
+        # "2bd59342-0664-4ccb-ba87-79379096cc08",
+        # "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
+        # "7efeb4b1-3d19-4762-b163-63328d66303b",
+        # "4e6fcf72-daf3-439f-a232-c434ce416af6",
+        # "6054afcb-5bab-4702-90a0-b259b5d3217c",
+        # "abed40dc-063f-4598-8ba5-9fe749c0615d",
+        # "01b269ae-2111-4a07-81fd-3fcd711993b0",
+        # "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
+        # "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
         "4188d3a4-077d-46b7-9c86-23e1a036f6c1",
         "51b11269-2ca8-4b2a-9163-f21758420e78",
         "7e429b8d-a3f0-4ed0-9b58-08957d00b127",
@@ -232,7 +211,6 @@ if __name__ == '__main__':
         "f9584479-3d0d-4c79-affa-9ad7afdd8850",
         "d681960f-7bc3-4286-9913-a8812ba3261a",
         "21df9241-f8d7-4509-b7f1-37e501a823f7",
-        "f6a90742-3fa2-40fc-a564-f29b054e0337",
         "1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
         "357ef137-7eeb-4c80-a3bb-0951f26a8aff",
         "aa3a8974-2e85-438b-b29e-a64df44deb4b",

From cc21c3a6b195b7c9447397fb3b93574866ab4035 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sun, 28 Jan 2024 21:19:18 +0800
Subject: [PATCH 10/13] Fix some errors found in calc examples

---
 desktop_env/evaluators/metrics/__init__.py    |   2 +-
 desktop_env/evaluators/metrics/pdf.py         |   2 +
 desktop_env/evaluators/metrics/table.py       | 224 ++++++++++--------
 .../4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json |   5 +-
 .../6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json |   2 +-
 .../a01fbce3-2793-461f-ab86-43680ccbae25.json |   2 +-
 experiment_a11y_tree.py                       |  27 +--
 experiment_screenshot.py                      |  34 ++-
 mm_agents/gpt_4v_agent.py                     |  13 +
 9 files changed, 186 insertions(+), 125 deletions(-)

diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index f368699..82b889e 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -19,7 +19,7 @@ from .slides import check_presenter_console_disable, check_image_stretch_and_cen
     compare_pptx_files, check_strikethrough, \
     check_slide_orientation_Portrait, evaluate_presentation_fill_to_rgb_distance, check_left_panel
 # from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom, check_data_validations
-from .table import compare_table
+from .table import compare_table, compare_csv
 from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
 
 from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \
diff --git a/desktop_env/evaluators/metrics/pdf.py b/desktop_env/evaluators/metrics/pdf.py
index d607733..d6644d1 100644
--- a/desktop_env/evaluators/metrics/pdf.py
+++ b/desktop_env/evaluators/metrics/pdf.py
@@ -6,6 +6,8 @@ from pypdf import PdfReader
 
 
 def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float:
+    if pdf_file is None:
+        return 0.0
     reader = PdfReader(pdf_file)
     nb_pages: int = len(reader.pages)
     return float(getattr(operator, rules["relation"])(nb_pages, rules["ref_value"]))
diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py
index d7e4faf..4e8bac6 100644
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -1,32 +1,35 @@
+import functools
+import itertools
 import logging
-#import operator
+import os.path
+# import operator
 from numbers import Number
 from typing import Any, Union, cast, Callable, Iterable
 from typing import Dict, List, Tuple
-import os.path
-import itertools
-import functools
 
 import openpyxl
 import pandas as pd
 from openpyxl import Workbook
-from openpyxl.worksheet.worksheet import Worksheet
-#from openpyxl.worksheet.cell_range import MultiCellRange
-from openpyxl.worksheet.datavalidation import DataValidation
 from openpyxl.cell.cell import Cell
-#from openpyxl.utils import coordinate_to_tuple
+# from openpyxl.worksheet.cell_range import MultiCellRange
+from openpyxl.worksheet.datavalidation import DataValidation
+from openpyxl.worksheet.worksheet import Worksheet
 
-from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles
 from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
+from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles
+
+# from openpyxl.utils import coordinate_to_tuple
 
 logger = logging.getLogger("desktopenv.metric.table")
 
 BOOK = Union[pd.ExcelFile, Workbook, str]
-def _parse_sheet_idx( sheet_idx: Union[int, str]
-                    , result: BOOK, expected: BOOK
-                    , result_sheet_names: List[str]
-                    , expected_sheet_names: List[str]
-                    ) -> Tuple[BOOK, str]:
+
+
+def _parse_sheet_idx(sheet_idx: Union[int, str]
+                     , result: BOOK, expected: BOOK
+                     , result_sheet_names: List[str]
+                     , expected_sheet_names: List[str]
+                     ) -> Tuple[BOOK, str]:
     #  function _parse_sheet_idx {{{ # 
     if isinstance(sheet_idx, int):
         index: str = result_sheet_names[sheet_idx]
@@ -49,7 +52,10 @@ def _parse_sheet_idx( sheet_idx: Union[int, str]
     return book, index
     #  }}} function _parse_sheet_idx # 
 
+
 SHEET = Union[pd.DataFrame, Worksheet, List[str]]
+
+
 def _load_sheet(book: BOOK, index: str) -> SHEET:
     #  function _load_sheet {{{ # 
     if isinstance(book, str):
@@ -57,12 +63,12 @@ def _load_sheet(book: BOOK, index: str) -> SHEET:
         csv_name: str = "{:}-{:}.csv".format(os.path.splitext(book)[0], index)
 
         with open(csv_name) as f:
-            csv_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
-                                                            , map( lambda l: l.strip()
-                                                                 , reversed(f.read().splitlines())
-                                                                 )
+            csv_lines: List[str] = list(itertools.dropwhile(lambda l: len(l) == 0
+                                                            , map(lambda l: l.strip()
+                                                                  , reversed(f.read().splitlines())
+                                                                  )
                                                             )
-                                       )
+                                        )
         return csv_lines
     if isinstance(book, pd.ExcelFile):
         return pd.read_excel(book, index)
@@ -72,7 +78,8 @@ def _load_sheet(book: BOOK, index: str) -> SHEET:
     raise NotImplementedError("Not supported workbook format")
     #  }}} function _load_sheet # 
 
-def compare_table(result: str, expected: str, **options) -> float:
+
+def compare_table(result: str, expected: str = None, **options) -> float:
     #  function compare_table {{{ # 
     """
     Args:
@@ -99,21 +106,28 @@ def compare_table(result: str, expected: str, **options) -> float:
         return 0.
     worksheetr_names: List[str] = pdworkbookr.sheet_names
 
-    xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)
-    pdworkbooke = pd.ExcelFile(expected)
-    worksheete_names: List[str] = pdworkbooke.sheet_names
+    if expected is not None:
 
-    parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] =\
-            functools.partial( _parse_sheet_idx
-                             , result_sheet_names=worksheetr_names
-                             , expected_sheet_names=worksheete_names
-                             )
+        xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)
+        pdworkbooke = pd.ExcelFile(expected)
+        worksheete_names: List[str] = pdworkbooke.sheet_names
+    else:
+        xlworkbooke: Workbook = None
+        pdworkbooke = None
+        worksheete_names: List[str] = None
+
+    parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] = \
+        functools.partial(
+            _parse_sheet_idx,
+            result_sheet_names=worksheetr_names,
+            expected_sheet_names=worksheete_names
+        )
 
     passes = True
     for r in options["rules"]:
         if r["type"] == "sheet_name":
             #  Compare Sheet Names {{{ # 
-            metric: bool = worksheetr_names==worksheete_names
+            metric: bool = worksheetr_names == worksheete_names
             logger.debug("Assertion: %s.sheet_names == %s.sheet_names - %s", result, expected, metric)
             #  }}} Compare Sheet Names # 
 
@@ -174,8 +188,8 @@ def compare_table(result: str, expected: str, **options) -> float:
 
             styles1: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
             styles2: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
-            #number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
-            #number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
+            # number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
+            # number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
             metric: bool = styles1 == styles2
             logger.debug("Assertion: %s.style == %s.style - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
             #  }}} Compare Style (Also Conditional Formatiing) # 
@@ -188,11 +202,11 @@ def compare_table(result: str, expected: str, **options) -> float:
             sheet1: Worksheet = _load_sheet(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke))
             sheet2: Worksheet = _load_sheet(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke))
             metric: bool = sheet1.freeze_panes == sheet2.freeze_panes
-            logger.debug( "Assertion: %s.freeze(%s) == %s.freeze(%s) - %s"
-                        , r["sheet_idx0"], sheet1.freeze_panes
-                        , r["sheet_idx1"], sheet2.freeze_panes
-                        , metric
-                        )
+            logger.debug("Assertion: %s.freeze(%s) == %s.freeze(%s) - %s"
+                         , r["sheet_idx0"], sheet1.freeze_panes
+                         , r["sheet_idx1"], sheet2.freeze_panes
+                         , metric
+                         )
             #  }}} Compare Freezing # 
 
         elif r["type"] == "zoom":
@@ -204,7 +218,8 @@ def compare_table(result: str, expected: str, **options) -> float:
             sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
             zoom_scale: Number = sheet.sheet_view.zoomScale or 100.
             metric: bool = _match_value_to_rule(zoom_scale, r)
-            logger.debug("Assertion: %s.zoom(%.1f) %s %.1f - %s", r["sheet_idx"], zoom_scale, r["method"], r["ref"], metric)
+            logger.debug("Assertion: %s.zoom(%.1f) %s %.1f - %s", r["sheet_idx"], zoom_scale, r["method"], r["ref"],
+                         metric)
             #  }}} Check Zooming # 
 
         elif r["type"] == "data_validation":
@@ -231,15 +246,15 @@ def compare_table(result: str, expected: str, **options) -> float:
             sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
             data_validators: List[DataValidation] = sheet.data_validations.dataValidation
 
-            total_metric = len(data_validators)>=len(r["dv_props"])
+            total_metric = len(data_validators) >= len(r["dv_props"])
             for dat_vldt in data_validators:
                 metric = False
                 for prpt in r["dv_props"]:
-                    metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
+                    metric = metric or all(_match_value_to_rule(getattr(dat_vldt, attrbt)
                                                                 , mr
-                                                                )\
-                                        for attrbt, mr in prpt.items()
-                                          )
+                                                                ) \
+                                           for attrbt, mr in prpt.items()
+                                           )
                     if metric:
                         break
                 total_metric = total_metric and metric
@@ -256,14 +271,14 @@ def compare_table(result: str, expected: str, **options) -> float:
             # sheet_idx1: as sheet_idx0
             # props: list of str, see utils.load_rows_or_cols
 
-            rows1: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
-                                                     , obj="row"
-                                                     , **r
-                                                     )
-            rows2: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
-                                                     , obj="row"
-                                                     , **r
-                                                     )
+            rows1: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
+                                                      , obj="row"
+                                                      , **r
+                                                      )
+            rows2: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
+                                                      , obj="row"
+                                                      , **r
+                                                      )
             logger.debug("Rows1: %s", repr(rows1))
             logger.debug("Rows2: %s", repr(rows2))
             metric: bool = rows1 == rows2
@@ -276,14 +291,14 @@ def compare_table(result: str, expected: str, **options) -> float:
             # sheet_idx1: as sheet_idx0
             # props: list of str, see utils.load_rows_or_cols
 
-            cols1: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
-                                                     , obj="column"
-                                                     , **r
-                                                     )
-            cols2: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
-                                                     , obj="column"
-                                                     , **r
-                                                     )
+            cols1: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
+                                                      , obj="column"
+                                                      , **r
+                                                      )
+            cols2: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
+                                                      , obj="column"
+                                                      , **r
+                                                      )
             metric: bool = cols1 == cols2
             logger.debug("Assertion: %s[cols] == %s[cols] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
             #  }}} Check Row Properties # 
@@ -296,21 +311,21 @@ def compare_table(result: str, expected: str, **options) -> float:
             #   supported attributes: value & those supported by utils._read_cell_style
 
             sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
-            #data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
+            # data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
             cell: Cell = sheet[r["coordinate"]]
             metric: bool = True
             for prpt, rule in r["props"].items():
-                if prpt=="value":
+                if prpt == "value":
                     val = read_cell_value(*parse_idx(r["sheet_idx"], result, expected), r["coordinate"])
                 else:
                     val = _read_cell_style(prpt, cell)
 
                 metric = metric and _match_value_to_rule(val, rule)
 
-            logger.debug( "Assertion: %s[%s] :%s - %s"
-                        , r["sheet_idx"], r["coordinate"]
-                        , repr(r["props"]), metric
-                        )
+            logger.debug("Assertion: %s[%s] :%s - %s"
+                         , r["sheet_idx"], r["coordinate"]
+                         , repr(r["props"]), metric
+                         )
             #  }}} Check Cell Properties # 
 
         else:
@@ -323,6 +338,7 @@ def compare_table(result: str, expected: str, **options) -> float:
     return float(passes)
     #  }}} function compare_table # 
 
+
 def compare_csv(result: str, expected: str, **options) -> float:
     if result is None:
         return 0.
@@ -338,9 +354,10 @@ def compare_csv(result: str, expected: str, **options) -> float:
         result_lines = map(str.lower, result_lines)
         expected_lines = map(str.lower, expected_lines)
 
-    metric: bool = list(result_lines)==list(expected_lines)
+    metric: bool = list(result_lines) == list(expected_lines)
     return float(metric)
 
+
 if __name__ == '__main__':
     import datetime
     import sys
@@ -360,7 +377,8 @@ if __name__ == '__main__':
     stdout_handler.setLevel(logging.INFO)
     sdebug_handler.setLevel(logging.DEBUG)
 
-    formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
+    formatter = logging.Formatter(
+        fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
     file_handler.setFormatter(formatter)
     debug_handler.setFormatter(formatter)
     stdout_handler.setFormatter(formatter)
@@ -376,49 +394,49 @@ if __name__ == '__main__':
 
     path1 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number.xlsx"
     path2 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number_gold.xlsx"
-    rules = [ { "type": "check_cell"
-              , "sheet_idx": 0
-              , "coordinate": "E3"
-              , "props": { "value": { "method": "approx:0.001"
-                                    , "ref": 191.6667
-                                    }
-                         }
+    rules = [{"type": "check_cell"
+                 , "sheet_idx": 0
+                 , "coordinate": "E3"
+                 , "props": {"value": {"method": "approx:0.001"
+            , "ref": 191.6667
+                                       }
+                             }
               }
-            ]
-    print( compare_table( path1, path2
+             ]
+    print(compare_table(path1, path2
                         , rules=rules
                         )
-         )
-    print( compare_table( path2, path2
+          )
+    print(compare_table(path2, path2
                         , rules=rules
                         )
-         )
+          )
 
     # Row Properties
-    #path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx"
-    #path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx"
-    #workbook: Workbook = openpyxl.load_workbook(filename=path1)
-    #worksheet: Worksheet = workbook.active
-    #for r_no, dms in worksheet.column_dimensions.items():
-        #print(r_no, type(r_no), type(dms), dms.hidden)
+    # path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx"
+    # path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx"
+    # workbook: Workbook = openpyxl.load_workbook(filename=path1)
+    # worksheet: Worksheet = workbook.active
+    # for r_no, dms in worksheet.column_dimensions.items():
+    # print(r_no, type(r_no), type(dms), dms.hidden)
 
     # Conditional Formats
-    #import formulas
-    #path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
-    #path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
-    #path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx"
-    #workbook: Workbook = openpyxl.load_workbook(filename=path2)
-    #worksheet: Worksheet = workbook.active
-    #print(worksheet.conditional_formatting)
-    #for itm in worksheet.conditional_formatting:
-        #print(itm.cells)
-        #for r in itm.rules:
-            #print( r.type, r.formula, r.dxf.font.color.rgb
-                 #, r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb
-                 #)
-            #condition = formulas.Parser().ast("=" + r.formula[0])[1].compile()
-            ##print(r.type, r.operator, r.dxfId, r.dxf)
-            #for r in itm.cells:
-                #for c in r.cells:
-                    #value = worksheet.cell(row=c[0], column=c[1]).value
-                    #print(value, condition(str(value)))
+    # import formulas
+    # path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
+    # path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
+    # path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx"
+    # workbook: Workbook = openpyxl.load_workbook(filename=path2)
+    # worksheet: Worksheet = workbook.active
+    # print(worksheet.conditional_formatting)
+    # for itm in worksheet.conditional_formatting:
+    # print(itm.cells)
+    # for r in itm.rules:
+    # print( r.type, r.formula, r.dxf.font.color.rgb
+    # , r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb
+    # )
+    # condition = formulas.Parser().ast("=" + r.formula[0])[1].compile()
+    ##print(r.type, r.operator, r.dxfId, r.dxf)
+    # for r in itm.cells:
+    # for c in r.cells:
+    # value = worksheet.cell(row=c[0], column=c[1]).value
+    # print(value, condition(str(value)))
diff --git a/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json b/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json
index d0c5a47..0e8b6d8 100644
--- a/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json
+++ b/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json
@@ -92,8 +92,9 @@
 			],
 			"dest": [
 				"Padding_Decimals_In_Formular_gold.xlsx",
-				"Padding_Decimals_In_Formular_gold-Sheet1.xlsx"
-			]
+				"Padding_Decimals_In_Formular_gold-Sheet1.csv"
+			],
+			"multi": true
 		},
 		"options": {
 			"rules": [
diff --git a/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json b/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json
index 980aa57..c985a2e 100644
--- a/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json
+++ b/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json
@@ -88,7 +88,7 @@
 			"type": "vm_file",
 			"path": [
 				"/home/user/Keep_Two_decimal_points.xlsx",
-				"/home/user/Keep_Two_decimal_points-Sheet1.xlsx"
+				"/home/user/Keep_Two_decimal_points-Sheet1.csv"
 			],
 			"dest": [
 				"Keep_Two_decimal_points.xlsx",
diff --git a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json
index b19b14a..ce5b308 100644
--- a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json
+++ b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json
@@ -7,7 +7,7 @@
 		{
 			"type": "download",
 			"parameters": {
-				"file": [
+				"files": [
 					{
 						"url": "https://drive.usercontent.google.com/download?id=1uT0axjo9lwkKu6hYVnsAL2FCrdH0DLUv&export=download&authuser=0&confirm=t&uuid=e7da6304-9c7a-4862-8a30-9f2284b843da&at=APZUnTVNHThpAZJmF6IuPckFvslw:1704187618838",
 						"path": "/home/user/Set_Decimal_Separator_Dot.xlsx"
diff --git a/experiment_a11y_tree.py b/experiment_a11y_tree.py
index 5a8a5ad..40836da 100644
--- a/experiment_a11y_tree.py
+++ b/experiment_a11y_tree.py
@@ -186,24 +186,21 @@ if __name__ == '__main__':
     ]
 
     calc_list = [
-        # "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
-        # "0bf05a7d-b28b-44d2-955a-50b41e24012a",
-        # "7a4e4bc8-922c-4c84-865c-25ba34136be1",
-        # "2bd59342-0664-4ccb-ba87-79379096cc08",
-        # "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
-        # "7efeb4b1-3d19-4762-b163-63328d66303b",
-        # "4e6fcf72-daf3-439f-a232-c434ce416af6",
-        # "6054afcb-5bab-4702-90a0-b259b5d3217c",
-        # "abed40dc-063f-4598-8ba5-9fe749c0615d",
-        # "01b269ae-2111-4a07-81fd-3fcd711993b0",
-        # "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
-        # "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
+        "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
+        "0bf05a7d-b28b-44d2-955a-50b41e24012a",
+        "7a4e4bc8-922c-4c84-865c-25ba34136be1",
+        "2bd59342-0664-4ccb-ba87-79379096cc08",
+        "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
+        "7efeb4b1-3d19-4762-b163-63328d66303b",
+        "4e6fcf72-daf3-439f-a232-c434ce416af6",
+        "6054afcb-5bab-4702-90a0-b259b5d3217c",
+        "abed40dc-063f-4598-8ba5-9fe749c0615d",
+        "01b269ae-2111-4a07-81fd-3fcd711993b0",
+        "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
+        "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
         "4188d3a4-077d-46b7-9c86-23e1a036f6c1",
         "51b11269-2ca8-4b2a-9163-f21758420e78",
         "7e429b8d-a3f0-4ed0-9b58-08957d00b127",
-        "f5a90742-3fa2-40fc-a564-f29b054e0337",
-        "22df9241-f8d7-4509-b7f1-37e501a823f7",
-        "1434ca3e-f9e3-4db8-9ca7-b4c653be7d17",
         "347ef137-7eeb-4c80-a3bb-0951f26a8aff",
         "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
         "3aaa4e37-dc91-482e-99af-132a612d40f3",
diff --git a/experiment_screenshot.py b/experiment_screenshot.py
index 28b0f1e..f490d69 100644
--- a/experiment_screenshot.py
+++ b/experiment_screenshot.py
@@ -167,5 +167,35 @@ if __name__ == '__main__':
         "480bcfea-d68f-4aaa-a0a9-2589ef319381",
         "af630914-714e-4a24-a7bb-f9af687d3b91"
     ]
-    for example_id in chrome_list:
-        main("chrome", example_id)
+    calc_list = [
+        "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
+        "0bf05a7d-b28b-44d2-955a-50b41e24012a",
+        "7a4e4bc8-922c-4c84-865c-25ba34136be1",
+        "2bd59342-0664-4ccb-ba87-79379096cc08",
+        "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
+        "7efeb4b1-3d19-4762-b163-63328d66303b",
+        "4e6fcf72-daf3-439f-a232-c434ce416af6",
+        "6054afcb-5bab-4702-90a0-b259b5d3217c",
+        "abed40dc-063f-4598-8ba5-9fe749c0615d",
+        "01b269ae-2111-4a07-81fd-3fcd711993b0",
+        "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
+        "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
+        "4188d3a4-077d-46b7-9c86-23e1a036f6c1",
+        "51b11269-2ca8-4b2a-9163-f21758420e78",
+        "7e429b8d-a3f0-4ed0-9b58-08957d00b127",
+        "347ef137-7eeb-4c80-a3bb-0951f26a8aff",
+        "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
+        "3aaa4e37-dc91-482e-99af-132a612d40f3",
+        "37608790-6147-45d0-9f20-1137bb35703d",
+        "f9584479-3d0d-4c79-affa-9ad7afdd8850",
+        "d681960f-7bc3-4286-9913-a8812ba3261a",
+        "21df9241-f8d7-4509-b7f1-37e501a823f7",
+        "1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
+        "357ef137-7eeb-4c80-a3bb-0951f26a8aff",
+        "aa3a8974-2e85-438b-b29e-a64df44deb4b",
+        "a01fbce3-2793-461f-ab86-43680ccbae25",
+        "4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
+    ]
+
+    for example_id in calc_list:
+        main("libreoffice_calc", example_id)
diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py
index 7656a0a..ff8d0d0 100644
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -505,6 +505,7 @@ class GPT4v_Agent:
     @backoff.on_exception(
         backoff.expo,
         (APIError, RateLimitError, APIConnectionError, ServiceUnavailableError, InvalidRequestError),
+        max_tries=3
     )
     def call_llm(self, payload):
         response = requests.post(
@@ -514,6 +515,18 @@ class GPT4v_Agent:
         )
 
         if response.status_code != 200:
+            if response.json()['error']['code'] == "context_length_exceeded":
+                print("Context length exceeded. Retrying with a smaller context.")
+                payload["messages"] = payload["messages"][-1:]
+                retry_response = requests.post(
+                    "https://api.openai.com/v1/chat/completions",
+                    headers=self.headers,
+                    json=payload
+                )
+                if retry_response.status_code != 200:
+                    print("Failed to call LLM: " + retry_response.text)
+                    return ""
+
             print("Failed to call LLM: " + response.text)
             return ""
         else:

From 532826835d3026d412289e5cb7418fc6d9a35834 Mon Sep 17 00:00:00 2001
From: rhythmcao <ruishengcao@gmail.com>
Date: Mon, 29 Jan 2024 11:16:27 +0800
Subject: [PATCH 11/13] chrome (google drive related) + X multi-app examples
 finished (leaving two emails and thunderbird-profile.tar.gz to be crafted)

---
 desktop_env/evaluators/metrics/__init__.py       |  1 +
 desktop_env/evaluators/metrics/gimp.py           | 16 ++++++++++++++++
 .../46407397-a7d5-4c6b-92c6-dbe038b1457b.json    | 10 +++++-----
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 82b889e..8bd1cdc 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -13,6 +13,7 @@ from .docs import is_first_line_centered, check_file_exists, compare_contains_im
 from .general import check_csv, check_accessibility_tree, run_sqlite3, check_json
 from .general import exact_match, fuzzy_match, check_include_exclude
 from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
+from .gimp import compare_images
 from .libreoffice import check_libre_locale
 from .pdf import check_pdf_pages
 from .slides import check_presenter_console_disable, check_image_stretch_and_center, check_slide_numbers_color, \
diff --git a/desktop_env/evaluators/metrics/gimp.py b/desktop_env/evaluators/metrics/gimp.py
index 1e919ef..6d1a30e 100644
--- a/desktop_env/evaluators/metrics/gimp.py
+++ b/desktop_env/evaluators/metrics/gimp.py
@@ -1,6 +1,22 @@
 import os
+from typing import List, Union
 from PIL import Image, ImageChops, ImageStat
 
+def compare_images(pred_img_path_list: Union[str, List[str]], gold_img_path_list: Union[str, List[str]]) -> float:
+    """ Compare two image lists, only if all images are the same, return 1.0, otherwise return 0.0
+    """
+    if type(pred_img_path_list) != list:
+        pred_img_path_list = [pred_img_path_list]
+        gold_img_path_list = [gold_img_path_list]
+    for pred_img_path, gold_img_path in zip(pred_img_path_list, gold_img_path_list):
+        pred_img = Image.open(pred_img_path)
+        gold_img = Image.open(gold_img_path)
+        diff = ImageChops.difference(pred_img, gold_img)
+        if diff.getbbox():
+            return 0.0
+    return 1.0
+
+
 def get_gimp_export_path():
     # Path to GIMP's configuration file. This example assumes GIMP version 2.10.
     # You need to adjust the path according to the GIMP version and user's file system.
diff --git a/evaluation_examples/examples/multi_apps/46407397-a7d5-4c6b-92c6-dbe038b1457b.json b/evaluation_examples/examples/multi_apps/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
index 5475c70..0bf4bcb 100644
--- a/evaluation_examples/examples/multi_apps/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
+++ b/evaluation_examples/examples/multi_apps/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
@@ -1,7 +1,7 @@
 {
     "id": "46407397-a7d5-4c6b-92c6-dbe038b1457b",
     "snapshot": "chrome",
-    "instruction": "Help me export charts, graph or other images from docx files received in email xxx in Thunderbird and upload them in the figures/ folder in Google Drive for later use (use pure numbers to name them).",
+    "instruction": "Help me export charts, graph or other images from docx files received in email xxx in Thunderbird and upload these png files to the figures/ folder in Google Drive for later use (use numbers to name them).",
     "source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive",
     "config": [
         {
@@ -93,7 +93,7 @@
         "chrome"
     ],
     "evaluator": {
-        "func": "compare_figures",
+        "func": "compare_images",
         "result": {
             "type": "googledrive_file",
             "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
@@ -120,9 +120,9 @@
         "expected": {
             "type": "cloud_file",
             "path": [
-                "file1",
-                "file2",
-                "file3"
+                "https://drive.usercontent.google.com/download?id=19J5tzWjx9hdo-n0MC3upzAntVMa8WUgk&export=download&authuser=0&confirm=t&uuid=be790579-8db9-4bd2-a757-beb27af386af&at=APZUnTVM2PjNDXhlwFZ6WAFdNVsD:1706497547717",
+                "https://drive.usercontent.google.com/download?id=1S04RpR5dk80LylIYGvA4e3sAUBd6wdlQ&export=download&authuser=0&confirm=t&uuid=b302de03-04f7-455c-ab0c-b3cbbeb6929a&at=APZUnTVD8zMZGO1_GWaFUm1cNXul:1706497555463",
+                "https://drive.usercontent.google.com/download?id=11NRLh93RTzEd0Cy-cYwMyNJSFG7-vP9c&export=download&authuser=0&confirm=t&uuid=02500115-dea3-481a-af4f-a723d9a62169&at=APZUnTW9-gENlsyfdIPA4PTA0emh:1706497560874"
             ],
             "dest": [
                 "1_gold.png",

From 37e09a994efa4fe62b221e9cf0a36a33082df795 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Mon, 29 Jan 2024 13:23:06 +0800
Subject: [PATCH 12/13] Fix some errors found in impress and thunderbird
 examples

---
 desktop_env/evaluators/metrics/__init__.py                    | 2 +-
 desktop_env/evaluators/metrics/slides.py                      | 4 ++--
 .../8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json                 | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 82b889e..ae3c2dc 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -10,7 +10,7 @@ from .general import exact_match, fuzzy_match
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
     compare_insert_equation, compare_highlighted_text
 from .docs import is_first_line_centered, check_file_exists, compare_contains_image
-from .general import check_csv, check_accessibility_tree, run_sqlite3, check_json
+from .general import check_csv, check_accessibility_tree, run_sqlite3, check_json, check_list
 from .general import exact_match, fuzzy_match, check_include_exclude
 from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
 from .libreoffice import check_libre_locale
diff --git a/desktop_env/evaluators/metrics/slides.py b/desktop_env/evaluators/metrics/slides.py
index 616e7bc..df5eb7e 100644
--- a/desktop_env/evaluators/metrics/slides.py
+++ b/desktop_env/evaluators/metrics/slides.py
@@ -57,9 +57,9 @@ def check_image_stretch_and_center(modified_ppt, original_ppt):
             abs(the_modified_image.height - original_pres.slide_height) > Inches(0.1) or
             abs(the_modified_image.left - (original_pres.slide_width - the_modified_image.width) / 2) > Inches(0.1) or
             abs(the_modified_image.top - (original_pres.slide_height - the_modified_image.height) / 2) > Inches(0.1)):
-        return False
+        return 0.
 
-    return True
+    return 1.
 
 
 def is_red_color(color):
diff --git a/evaluation_examples/examples/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json b/evaluation_examples/examples/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json
index 1016e37..2c115d0 100644
--- a/evaluation_examples/examples/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json
+++ b/evaluation_examples/examples/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json
@@ -74,7 +74,7 @@
 					"type": "style",
 					"sheet_idx0": 0,
 					"sheet_idx1": "EI0",
-					"props": "bgcolor"
+					"props": ["bgcolor"]
 				}
 			]
 		}

From dfc12430c032069ae6b90a87b310fb28a0a3bfd0 Mon Sep 17 00:00:00 2001
From: rhythmcao <ruishengcao@gmail.com>
Date: Mon, 29 Jan 2024 19:22:22 +0800
Subject: [PATCH 13/13] add multi-app example; drop dbt examples (examples in
 another project added by accident)

---
 desktop_env/evaluators/metrics/docs.py        |  28 ++--
 .../0aa56709-3293-5849-ad47-e377f49fd3a0.json | 119 -----------------
 .../492c2c87-b04a-544a-b5dd-eb808036bf85.json |  89 -------------
 .../49f981ee-f793-5e27-9a53-083d66934ea1.json | 126 ------------------
 .../8aa9e870-b0c9-5417-be80-03154e83c7a3.json | 102 --------------
 .../8ff98608-8e0e-526e-9413-d744554ba708.json |  86 ------------
 .../2b9493d7-49b8-493a-a71b-56cd1f4d6908.json | 101 ++++++++++++++
 .../2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json |  63 +++++++++
 .../51f5801c-18b3-4f25-b0c3-02f85507a078.json |  46 +++++++
 .../settings/googledrive/credentials.json     |   2 +-
 10 files changed, 228 insertions(+), 534 deletions(-)
 delete mode 100644 evaluation_examples/examples/dbt/0aa56709-3293-5849-ad47-e377f49fd3a0.json
 delete mode 100644 evaluation_examples/examples/dbt/492c2c87-b04a-544a-b5dd-eb808036bf85.json
 delete mode 100644 evaluation_examples/examples/dbt/49f981ee-f793-5e27-9a53-083d66934ea1.json
 delete mode 100644 evaluation_examples/examples/dbt/8aa9e870-b0c9-5417-be80-03154e83c7a3.json
 delete mode 100644 evaluation_examples/examples/dbt/8ff98608-8e0e-526e-9413-d744554ba708.json
 create mode 100644 evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
 create mode 100644 evaluation_examples/examples/multi_apps/2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json
 create mode 100644 evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json

diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py
index 3e82e7d..8360c60 100644
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -50,24 +50,30 @@ def contains_page_break(docx_file):
     return 0
 
 
-def compare_docx_files(file1, file2):
+def compare_docx_files(file1, file2, ignore_blanks=True):
     doc1 = Document(file1)
     doc2 = Document(file2)
 
     doc1_paragraphs = [p.text for p in doc1.paragraphs]
     doc2_paragraphs = [p.text for p in doc2.paragraphs]
 
-    if len(doc1_paragraphs) != len(doc2_paragraphs):
-        # print(len(doc1_paragraphs))
-        # print(len(doc2_paragraphs))
-        return 0
-
-    # Compare each paragraph
-    for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
-        if p1 != p2:
-            # print(p1)
-            # print(p2)
+    if ignore_blanks:
+        text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
+        text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
+        if text1 != text2:
             return 0
+    else:
+        if len(doc1_paragraphs) != len(doc2_paragraphs):
+            # print(len(doc1_paragraphs))
+            # print(len(doc2_paragraphs))
+            return 0
+
+        # Compare each paragraph
+        for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
+            if p1 != p2:
+                # print(p1)
+                # print(p2)
+                return 0
 
     return 1
 
diff --git a/evaluation_examples/examples/dbt/0aa56709-3293-5849-ad47-e377f49fd3a0.json b/evaluation_examples/examples/dbt/0aa56709-3293-5849-ad47-e377f49fd3a0.json
deleted file mode 100644
index 4e74142..0000000
--- a/evaluation_examples/examples/dbt/0aa56709-3293-5849-ad47-e377f49fd3a0.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
-    "id": "0aa56709-3293-5849-ad47-e377f49fd3a0",
-    "snapshot": "dbt",
-    "instruction": "Update models/schema.yml file to include some description fields. 1) The description fields include what each table is about. 2) For the primary key column of each table, add the description \"Primary key\". 3) Also insert one description for column customers.first_order_date that \"NULL when a customer has not yet placed an order.\". Then, use dbt docs command to generate the documentation for this dbt project and launch the documentation in a local website with port 8020.",
-    "source": [
-      "https://docs.getdbt.com/guides/manual-install?step=13"
-  ],
-    "config": [
-      {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.usercontent.google.com/download?id=1TZmmW7wYnWUQVMCH5JOBCxxdgm_QN-vz&export=download&authuser=0&confirm=t&uuid=43113538-c0e4-4e23-8a18-12b727b9f890&at=APZUnTWgSRXjNNZZIt8ni7rsTxoy:1705910001969",
-                    "path": "/home/user/projects/jaffle_shop.zip"
-                }
-            ]
-        }
-      },
-      {
-        "type": "execute",
-        "parameters": {
-            "command": ["/bin/bash", "-c", "unzip -oq /home/user/projects/jaffle_shop.zip -d /home/user/projects/ && rm /home/user/projects/jaffle_shop.zip && mkdir -p /home/user/.dbt"]
-        }
-      },
-      {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.usercontent.google.com/download?id=1xkXjiFhRNoiX_-0zikfdwzJ8UvdXEVAt&export=download&authuser=0&confirm=t&uuid=bc8d0eb8-99b3-4be7-b110-c0a56246d2d2&at=APZUnTUaPD_Z6ov6uMWDNf5rSy3-:1705978221799",
-                    "path": "/home/user/.dbt/profiles.yml"
-                }
-            ]
-        }
-      }
-    ],
-    "trajectory": "trajectories/",
-    "related_apps": [
-      "dbt",
-      "duckdb"
-    ],
-    "evaluator": {
-      "func": [
-        "check_yaml_file",
-        "check_dbt_command",
-        "check_dbt_command",
-        "check_dbt_command",
-        "check_dbt_command"
-      ],
-      "conj": "and",
-      "result": [
-        {
-          "type": "vm_file",
-          "path": "/home/user/projects/jaffle_shop/models/schema.yml",
-          "dest": "schema.yml"
-        },
-        {
-          "type": "vm_command_line",
-          "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; if [ -s target/catalog.json ] && [ -s target/index.html ] ; then echo \"dbt docs generate succeed\" ; else echo \"dbt docs generate failed\" ; fi"]
-        },
-        {
-          "type": "vm_command_line",
-          "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; curl -o index.html http://localhost:8020; if [ -s index.html ] ; then echo \"dbt docs serve succeed\" ; else echo \"dbt docs serve failed\" ; fi"]
-        },
-        {
-          "type": "vm_command_line",
-          "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; diff index.html target/index.html > diff.log; if [ -s diff.log ] ; then echo \"dbt docs serve failed\" ; else echo \"dbt docs serve succeed\" ; fi ; rm -rf diff.log index.html"]
-        },
-        {
-          "type": "vm_command_line",
-          "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; dbt docs generate"]
-        }
-      ],
-      "expected": [
-        {
-          "type": "rule",
-          "rules": [
-            ["not_null", ["models", ["name", "customers"], "description"], ""],
-            ["not_null", ["models", ["name", "stg_customers"], "description"], ""],
-            ["not_null", ["models", ["name", "stg_orders"], "description"], ""],
-            ["match", ["models", ["name", "customers"], "columns", ["name", "customer_id"], "description"], "Primary key"],
-            ["match", ["models", ["name", "stg_customers"], "columns", ["name", "customer_id"], "description"], "Primary key"],
-            ["match", ["models", ["name", "stg_orders"], "columns", ["name", "order_id"], "description"], "Primary key"],
-            ["match", ["models", ["name", "customers"], "columns", ["name", "first_order_date"], "description"], "NULL when a customer has not yet placed an order."]
-          ]
-        },
-        {
-          "type": "rule",
-          "rules": [
-            ["contains", "dbt docs generate succeed", ""],
-            ["excludes", "dbt docs generate failed", ""]
-          ]
-        },
-        {
-          "type": "rule",
-          "rules": [
-            ["contains", "dbt docs serve succeed", ""],
-            ["excludes", "dbt docs serve failed", ""]
-          ]
-        },
-        {
-          "type": "rule",
-          "rules": [
-            ["contains", "dbt docs serve succeed", ""],
-            ["excludes", "dbt docs serve failed", ""]
-          ]
-        },
-        {
-          "type": "rule",
-          "rules": [
-            ["excludes", "error", ""],
-            ["excludes", "Error", ""],
-            ["contains", "Catalog written to", ""]
-          ]
-        }
-      ]
-    }
-}
\ No newline at end of file
diff --git a/evaluation_examples/examples/dbt/492c2c87-b04a-544a-b5dd-eb808036bf85.json b/evaluation_examples/examples/dbt/492c2c87-b04a-544a-b5dd-eb808036bf85.json
deleted file mode 100644
index 9dbfe00..0000000
--- a/evaluation_examples/examples/dbt/492c2c87-b04a-544a-b5dd-eb808036bf85.json
+++ /dev/null
@@ -1,89 +0,0 @@
-{
-    "id": "492c2c87-b04a-544a-b5dd-eb808036bf85",
-    "snapshot": "dbt",
-    "instruction": "Separate the single dbt model customers into separate ones and use the ``ref`` function to build models on top of others. 1) Create a new SQL file, models/stg_customers.sql, with the SQL from the customers common table expressions (CTE) in the original query. 2) Create a second new SQL file, models/stg_orders.sql, with the SQL from the orders CTE in the original query. These two stg models are materialized as view. 3) Edit the SQL in models/customers.sql file to refer two these two staged models and execute dbt run. Please ensure the running succeeds.",
-    "source": [
-      "https://docs.getdbt.com/guides/manual-install?step=11"
-  ],
-    "config": [
-        {
-          "type": "download",
-          "parameters": {
-              "files": [
-                  {
-                      "url": "https://drive.usercontent.google.com/download?id=1-Nn0leqKdVnA1gQ6s50wkGSa2xZauEUe&export=download&authuser=0&confirm=t&uuid=d7596b15-ae39-43cb-a940-03bee8a0c961&at=APZUnTXwp5xMjHpi5AYVF3Z_bfw2:1705582080467",
-                      "path": "/home/user/projects/jaffle_shop.zip"
-                  }
-              ]
-          }
-        },
-        {
-          "type": "execute",
-          "parameters": {
-              "command": ["/bin/bash", "-c", "unzip -oq /home/user/projects/jaffle_shop.zip -d /home/user/projects/ && rm /home/user/projects/jaffle_shop.zip && mkdir -p /home/user/.dbt"]
-          }
-        },
-        {
-          "type": "download",
-          "parameters": {
-              "files": [
-                  {
-                      "url": "https://drive.usercontent.google.com/download?id=1xkXjiFhRNoiX_-0zikfdwzJ8UvdXEVAt&export=download&authuser=0&confirm=t&uuid=bc8d0eb8-99b3-4be7-b110-c0a56246d2d2&at=APZUnTUaPD_Z6ov6uMWDNf5rSy3-:1705978221799",
-                      "path": "/home/user/.dbt/profiles.yml"
-                  }
-              ]
-          }
-        }
-    ],
-    "trajectory": "trajectories/",
-    "related_apps": [
-      "dbt",
-      "duckdb"
-    ],
-    "evaluator": {
-      "func": [
-        "check_database",
-        "check_dbt_command"
-      ],
-      "conj": "and",
-      "result": [
-        {
-          "type": "vm_file",
-          "path": "/home/user/projects/jaffle_shop/jaffle_shop.duckdb",
-          "dest": "jaffle_shop.duckdb"
-        },
-        {
-          "type": "vm_command_line",
-          "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; dbt run"]
-        }
-      ],
-      "expected": [
-        {
-          "type": "cloud_file",
-          "path": "https://drive.usercontent.google.com/download?id=1rtRXtgKiwyWSVPCbDVkjYXh8AMni7t1W&export=download&authuser=0&confirm=t&uuid=6e4331f7-4e46-4894-910f-9af77450534d&at=APZUnTXySypK2OcEdOb8FVHQnbd7:1705583620817",
-          "dest": "gold_jaffle_shop.duckdb"
-        },
-        {
-          "type": "rule",
-          "rules": [
-            ["contains", "of 3 OK created sql view model .*\\.stg_customers", ""],
-            ["contains", "of 3 OK created sql view model .*\\.stg_orders", ""],
-            ["contains", "3 of 3 OK created sql table model .*\\.customers", ""],
-            ["contains", "Completed successfully", ""],
-            ["contains", "PASS=3", ""],
-            ["contains", "TOTAL=3", ""]
-          ]
-        }
-      ],
-      "options": [
-        {
-          "db_type": "duckdb", 
-          "check_type": [
-            "table-schema-content",
-            "view-schema-content"
-          ]
-        },
-        {}
-      ]
-    }
-}
\ No newline at end of file
diff --git a/evaluation_examples/examples/dbt/49f981ee-f793-5e27-9a53-083d66934ea1.json b/evaluation_examples/examples/dbt/49f981ee-f793-5e27-9a53-083d66934ea1.json
deleted file mode 100644
index 5b4a21e..0000000
--- a/evaluation_examples/examples/dbt/49f981ee-f793-5e27-9a53-083d66934ea1.json
+++ /dev/null
@@ -1,126 +0,0 @@
-{
-    "id": "49f981ee-f793-5e27-9a53-083d66934ea1",
-    "snapshot": "dbt",
-    "instruction": "Add dbt test for the three models in jaffle_shop project. 1) Create a YAML file in the models directory, named models/schema.yml. 2) Ensure that customer_id and order_id are unique and not empty in related tables or views. Column status must be value from placed, shipped, completed, return_pending, returned. And customer_id in stg_orders is not null and is a foreign key in the referenced stg_customers. 3) Run dbt test, and confirm that all tests passed.",
-    "source": [
-      "https://docs.getdbt.com/guides/manual-install?step=12"
-  ],
-    "config": [
-      {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.usercontent.google.com/download?id=1QhPSdctnfYk0O5Tuo2NzldMDw0cKZi16&export=download&authuser=0&confirm=t&uuid=d68dcd34-304d-4e66-a215-99abd3954c24&at=APZUnTXihMvZPodIOVVxg3S0tUs4:1705587177507",
-                    "path": "/home/user/projects/jaffle_shop.zip"
-                }
-            ]
-        }
-      },
-      {
-        "type": "execute",
-        "parameters": {
-            "command": ["/bin/bash", "-c", "unzip -oq /home/user/projects/jaffle_shop.zip -d /home/user/projects/ && rm /home/user/projects/jaffle_shop.zip && mkdir -p /home/user/.dbt"]
-        }
-      },
-      {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.usercontent.google.com/download?id=1xkXjiFhRNoiX_-0zikfdwzJ8UvdXEVAt&export=download&authuser=0&confirm=t&uuid=bc8d0eb8-99b3-4be7-b110-c0a56246d2d2&at=APZUnTUaPD_Z6ov6uMWDNf5rSy3-:1705978221799",
-                    "path": "/home/user/.dbt/profiles.yml"
-                }
-            ]
-        }
-      }
-    ],
-    "trajectory": "trajectories/",
-    "related_apps": [
-      "dbt",
-      "duckdb"
-    ],
-    "evaluator": {
-      "func": "check_dbt_command",
-      "result": {
-        "type": "dbt_test_result",
-        "pre-processing": ["mv", "/home/user/projects/jaffle_shop/jaffle_shop.duckdb", "/home/user/Desktop/jaffle_shop.duckdb"],
-        "path": [
-          "https://drive.usercontent.google.com/download?id=1AZ6hCtbyN8Ypzf0e2nkkxivxdAmKNHZQ&export=download&authuser=0&confirm=t&uuid=dffccf1e-a42b-45e7-966d-72d289a1062e&at=APZUnTUsGPahiUmvgXxji9x9Ii7o:1705668446168",
-          "https://drive.usercontent.google.com/download?id=1z0hApNSqvs2oUwJiBQmFXrsomAxa1RhJ&export=download&authuser=0&confirm=t&uuid=0a2ba1be-1e15-4e8d-8458-0a661eaeef6f&at=APZUnTVZMe3y1OpU6ipsm0U6Ryb6:1705668535249",
-          "https://drive.usercontent.google.com/download?id=1OYsLQSYAdaAyu0sa6Y8IstmWz7wXneUN&export=download&authuser=0&confirm=t&uuid=33818f3c-a125-44d4-b9ba-7c5465976250&at=APZUnTXWFrNUtydf460ZWA-2jJrg:1705668651799",
-          "https://drive.usercontent.google.com/download?id=1KgGuJMeCXpIG2_TAKIJsT-YoIbSWswYp&export=download&authuser=0&confirm=t&uuid=4f2aadaa-2bc0-4ecf-b725-19c73375e370&at=APZUnTVt5cFNMzCsRxekJUaBPfhw:1705668748132",
-          "https://drive.usercontent.google.com/download?id=1OJ0xgAF1KqhovIvkimp3J5ZM-crfZU6g&export=download&authuser=0&confirm=t&uuid=48bcfac9-d40c-40c5-ac07-da9288a4ebea&at=APZUnTVUl1FaKtXy4oVpkYnGKIdt:1705669212860",
-          "https://drive.usercontent.google.com/download?id=10mr1jhdA52_bKOgoDgG0mAal1nzXkG21&export=download&authuser=0&confirm=t&uuid=5adde4ce-59d5-4da0-ada8-9c7df3e7434c&at=APZUnTVwJ4WDHSnfUv8yB08jr1b7:1705669280449",
-          "https://drive.usercontent.google.com/download?id=1_Dh9lwVDo8TfB0jg8QsdemRZ14r9O-uY&export=download&authuser=0&confirm=t&uuid=a8445889-8d13-4b66-88e7-4b675a943bec&at=APZUnTWrfFiAMJVLNFuteOctpjzs:1705669328524",
-          "https://drive.usercontent.google.com/download?id=1n4fLzFWj9dEqRdLOSwSB5SB8O3kHyEBZ&export=download&authuser=0&confirm=t&uuid=78cd4358-cc9e-4d2c-b74f-93b7760de4d3&at=APZUnTXbEj7FEQL5DEU5g4wsDFHs:1705669396969",
-          "https://drive.usercontent.google.com/download?id=1sjwB1aoi5UzbHQI-mb0hph2qd4qRKZR_&export=download&authuser=0&confirm=t&uuid=2fe93a52-d820-462e-9cce-36daa3a5ce30&at=APZUnTXPsOgN81L1RkenHg__oXUB:1705669474430",
-          "https://drive.usercontent.google.com/download?id=1ujsyfS7F3A6YCgZJ7N9ixcRlgNdAAdXJ&export=download&authuser=0&confirm=t&uuid=7e1b73c9-a14b-4d8b-800c-77f16ac5f897&at=APZUnTWBVbznnKO-zYj7UI6qk-qz:1705669501449",
-          "https://drive.usercontent.google.com/download?id=1tujswM-r4GKav5CpmciF-H3zYAdEq-uL&export=download&authuser=0&confirm=t&uuid=32253ad7-d343-4961-bc14-1e4cf4fee244&at=APZUnTWiRpT1pHF6Qp17y1VqcT7Z:1705676183100"
-        ],
-        "dest": "/home/user/projects/jaffle_shop/jaffle_shop.duckdb",
-        "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; dbt test"],
-        "post-processing": ["/bin/bash", "-c", "mv /home/user/Desktop/jaffle_shop.duckdb /home/user/projects/jaffle_shop/jaffle_shop.duckdb"]
-      },
-      "expected": {
-          "type": "rule",
-          "rules": [
-            [
-              ["excludes", "Nothing to do", ""],
-              ["contains", "Completed successfully", ""],
-              ["contains", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["excludes", "Completed successfully", ""],
-              ["excludes", "ERROR=0", ""]
-            ],
-            [
-              ["excludes", "Nothing to do", ""],
-              ["contains", "Completed successfully", ""],
-              ["contains", "ERROR=0", ""]
-            ]
-          ]
-        }
-    }
-}
\ No newline at end of file
diff --git a/evaluation_examples/examples/dbt/8aa9e870-b0c9-5417-be80-03154e83c7a3.json b/evaluation_examples/examples/dbt/8aa9e870-b0c9-5417-be80-03154e83c7a3.json
deleted file mode 100644
index 042c2c5..0000000
--- a/evaluation_examples/examples/dbt/8aa9e870-b0c9-5417-be80-03154e83c7a3.json
+++ /dev/null
@@ -1,102 +0,0 @@
-{
-    "id": "8aa9e870-b0c9-5417-be80-03154e83c7a3",
-    "snapshot": "dbt",
-    "instruction": "Create a dbt project called jaffle_shop under ~/projects and configure the connection to duckdb with path jaffle_shop/jaffle_shop.duckdb for the target dev and prod. Load data from jaffle-shop-data.zip into that database using dbt.",
-    "source": [
-      "https://docs.getdbt.com/docs/core/connect-data-platform/duckdb-setup#",
-      "https://docs.getdbt.com/guides/manual-install?step=3",
-      "https://docs.getdbt.com/reference/commands/seed"
-  ],
-    "config": [
-      {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.usercontent.google.com/download?id=18pbzQOnAZEZ2psFLWJJQIyLlQBSNRxWs&export=download&authuser=0&confirm=t&uuid=d8045faa-0f74-4c34-ab98-55ab41bcc2c9&at=APZUnTUoi3u-jMpwO8osMEll4gDr:1705548224569",
-                    "path": "/home/user/projects/jaffle-shop-data.zip"
-                }
-            ]
-        }
-      }
-    ],
-    "trajectory": "trajectories/",
-    "related_apps": [
-      "dbt",
-      "duckdb"
-    ],
-    "evaluator": {
-      "func": [
-        "check_yaml_file",
-        "check_dbt_command",
-        "check_database",
-        "check_dbt_command"
-      ],
-      "conj": "and",
-      "result": [
-        {
-          "type": "dbt_profiles",
-          "paths": [
-            "/home/user/.dbt/profiles.yml",
-            "/home/user/projects/jaffle_shop/profiles.yml",
-            "$DBT_PROFILES_DIR"
-          ],
-          "dest": "profiles.yml"
-        },
-        {
-          "type": "vm_command_line",
-          "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; dbt debug"]
-        },
-        {
-          "type": "vm_file",
-          "path": "/home/user/projects/jaffle_shop/jaffle_shop.duckdb",
-          "dest": "jaffle_shop.duckdb"
-        },
-        {
-          "type": "vm_command_line",
-          "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; dbt seed"]
-        }
-      ],
-      "expected": [
-        {
-          "type": "rule",
-          "rules": [
-            ["match", ["jaffle_shop", "outputs", "dev", "type"], "duckdb"],
-            ["match", ["jaffle_shop", "outputs", "prod", "type"], "duckdb"],
-            ["in", ["jaffle_shop", "outputs", "dev", "path"], ["jaffle_shop.duckdb", "/home/user/projects/jaffle_shop/jaffle_shop.duckdb"]],
-            ["in", ["jaffle_shop", "outputs", "prod", "path"], ["jaffle_shop.duckdb", "/home/user/projects/jaffle_shop/jaffle_shop.duckdb"]]
-          ]
-        },
-        {
-          "type": "rule",
-          "rules": [
-            ["contains", "OK connection ok", ""],
-            ["contains", "All checks passed", ""]
-          ]
-        },
-        {
-          "type": "cloud_file",
-          "path": "https://drive.usercontent.google.com/download?id=12xJuEcBxGqPHjJriUPnkuGRkw9HsZniJ&export=download&authuser=0&confirm=t&uuid=f2b7147f-5114-461d-a939-486750c19029&at=APZUnTVBf4QJMyDPk5BmfZzTecU5:1705548962476",
-          "dest": "gold_jaffle_shop.duckdb"
-        },
-        {
-          "type": "rule",
-          "rules": [
-            ["excludes", "Nothing to do", ""],
-            ["contains", "Completed successfully", ""],
-            ["contains", "PASS=3", ""],
-            ["contains", "TOTAL=3", ""]
-          ]
-        }
-      ],
-      "options": [
-        {},
-        {},
-        {
-          "db_type": "duckdb",
-          "check_type": ["table-schema-content"]
-        },
-        {}
-      ]
-    }
-  }
\ No newline at end of file
diff --git a/evaluation_examples/examples/dbt/8ff98608-8e0e-526e-9413-d744554ba708.json b/evaluation_examples/examples/dbt/8ff98608-8e0e-526e-9413-d744554ba708.json
deleted file mode 100644
index 33117cb..0000000
--- a/evaluation_examples/examples/dbt/8ff98608-8e0e-526e-9413-d744554ba708.json
+++ /dev/null
@@ -1,86 +0,0 @@
-{
-    "id": "8ff98608-8e0e-526e-9413-d744554ba708",
-    "snapshot": "dbt",
-    "instruction": "Delete the dbt example models and write one dbt model \"customers\" from existing tables raw_customers and raw_orders. It contains columns customer_ids, first_name, last_name, first_order_date, most_recent_order_date and number_of_orders (default to 0). Set the materialization method as table and build it with dbt run to make it work.",
-    "source": [
-      "https://docs.getdbt.com/guides/manual-install?step=10",
-      "https://docs.getdbt.com/guides/manual-install?step=8",
-      "https://docs.getdbt.com/guides/manual-install?step=9"
-  ],
-    "config": [
-        {
-          "type": "download",
-          "parameters": {
-              "files": [
-                  {
-                      "url": "https://drive.usercontent.google.com/download?id=1iqimXoJUa2fd16I_qCZGHbVMHABJAh7D&export=download&authuser=0&confirm=t&uuid=65b73e29-0b30-415d-9928-d1ccf3be9880&at=APZUnTWCBNMro4gX8whSahDtx05S:1705563480661",
-                      "path": "/home/user/projects/jaffle_shop.zip"
-                  }
-              ]
-          }
-        },
-        {
-          "type": "execute",
-          "parameters": {
-              "command": ["/bin/bash", "-c", "unzip -oq /home/user/projects/jaffle_shop.zip -d /home/user/projects/ && rm /home/user/projects/jaffle_shop.zip && mkdir -p /home/user/.dbt"]
-          }
-        },
-        {
-          "type": "download",
-          "parameters": {
-              "files": [
-                  {
-                      "url": "https://drive.usercontent.google.com/download?id=1xkXjiFhRNoiX_-0zikfdwzJ8UvdXEVAt&export=download&authuser=0&confirm=t&uuid=bc8d0eb8-99b3-4be7-b110-c0a56246d2d2&at=APZUnTUaPD_Z6ov6uMWDNf5rSy3-:1705978221799",
-                      "path": "/home/user/.dbt/profiles.yml"
-                  }
-              ]
-          }
-        }
-    ],
-    "trajectory": "trajectories/",
-    "related_apps": [
-      "dbt",
-      "duckdb"
-    ],
-    "evaluator": {
-      "func": [
-        "check_database",
-        "check_dbt_command"
-      ],
-      "conj": "and",
-      "result": [
-        {
-          "type": "vm_file",
-          "path": "/home/user/projects/jaffle_shop/jaffle_shop.duckdb",
-          "dest": "jaffle_shop.duckdb"
-        },
-        {
-          "type": "vm_command_line",
-          "command": ["/bin/bash", "-c", "cd /home/user/projects/jaffle_shop; source ~/anaconda3/etc/profile.d/conda.sh && conda activate dbt; dbt run"]
-        }
-      ],
-      "expected": [
-        {
-          "type": "cloud_file",
-          "path": "https://drive.usercontent.google.com/download?id=1BnTQOiDIKriWGhLRj7C8MPgfk4JH-uf9&export=download&authuser=0&confirm=t&uuid=cee74971-d66b-4da0-8cdd-cb9f1b9da8e7&at=APZUnTWI7GqSBLnQ_g-NNsS7kDxq:1705567576749",
-          "dest": "gold_jaffle_shop.duckdb"
-        },
-        {
-          "type": "rule",
-          "rules": [
-            ["contains", "1 of 1 OK created sql table model .*\\.customers", ""],
-            ["contains", "Completed successfully", ""],
-            ["contains", "PASS=1", ""],
-            ["contains", "TOTAL=1", ""]
-          ]
-        }
-      ],
-      "options": [
-        {
-          "db_type": "duckdb",
-          "check_type": ["table-schema-content"]
-        },
-        {}
-      ]
-    }
-  }
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json b/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
new file mode 100644
index 0000000..d83ed1b
--- /dev/null
+++ b/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
@@ -0,0 +1,101 @@
+{
+    "id": "2b9493d7-49b8-493a-a71b-56cd1f4d6908",
+    "snapshot": "libreoffice_writer",
+    "instruction": "Hey, my LibreOffice Writer seems to have frozen and I can't get it to close normally. Can you help me force quit the application from the command line? I'm on Ubuntu and I don't want to restart my computer or lose any other work I have open.",
+    "source": "https://devicetests.com/kill-libreoffice-writer-command-line-ubuntu",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+              "files": [
+                {
+                  "url": "https://drive.usercontent.google.com/download?id=104pg3yochKyH2Uvlp3BdvKmHgYmSIESu&export=download&authuser=0&confirm=t&uuid=d1926366-4e54-4a44-8dcd-fc49ed6524d7&at=APZUnTXcBFV9kcacsA0toU83lMKJ:1706505549057d",
+                  "path": "/home/user/Desktop/15-MB-docx-file-download.docx"
+                }
+              ]
+            }
+        },
+        {
+            "type": "open",
+            "parameters": {
+                "path": "/home/user/Desktop/15-MB-docx-file-download.docx"
+            }
+        },
+        {
+            "type": "execute",
+            "parameters": {
+                "command": [
+                    "/bin/bash",
+                    "-c",
+                    "history -c && echo > ~/.bash_history && sleep 3"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "gnome-terminal",
+                    "--maximize"
+                ]
+            }
+        }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "libreoffice_writer",
+        "terminal"
+    ],
+    "evaluator": {
+        "postconfig": [
+            {
+              "type": "execute",
+              "parameters": {
+                "command": [
+                    "/bin/bash",
+                    "-c",
+                    "killall gnome-terminal-server"
+                ]
+              }
+            }
+        ],
+        "func": ["check_include_exclude", "check_include_exclude"],
+        "conj": "and",
+        "result": [
+            {
+                "type": "vm_command_line",
+                "command": [
+                    "/bin/bash",
+                    "-c",
+                    "output=$(ps aux | grep \"[s]office\"]); if [ -z \"$output\" ]; then echo \"true\"; else echo \"false\"; fi"
+                ]
+            },
+            {
+                "type": "vm_command_line",
+                "command": [
+                    "/bin/bash",
+                    "-c",
+                    "output=$(cat ~/.bash_history | grep \"[k]ill\"); if [ -z \"$output\" ]; then echo \"false\"; else echo \"true\"; fi"
+                ]
+            }
+        ],
+        "expected": [
+            {
+                "type": "rule",
+                "rules": {
+                    "include": [
+                        "true\n"
+                    ]
+                }
+            },
+            {
+                "type": "rule",
+                "rules": {
+                    "include": [
+                        "true\n"
+                    ]
+                }
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json b/evaluation_examples/examples/multi_apps/2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json
new file mode 100644
index 0000000..3926120
--- /dev/null
+++ b/evaluation_examples/examples/multi_apps/2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json
@@ -0,0 +1,63 @@
+{
+    "id": "2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
+    "snapshot": "os",
+    "instruction": "Could you help me push the changes from commandline in current project to origin main, with the commit message \"daily update\"?",
+    "source": "https://nikki-ricks.medium.com/how-to-use-git-add-commit-and-push-in-vs-code-and-command-line-35c0e8c47b62",
+    "config": [
+        {
+            "type": "download",
+            "path": "",
+        },
+        {
+            "type": "execute",
+            "parameters": {
+                "command": [
+                    "/bin/bash",
+                    "-c",
+                    "git config --global user.name \"xlang\" && git config --global user.email \"xlang2024anonym@gmail.com\" && mkdir -p /home/user/projects/remote_project && cd /home/user/projects/remote_project && git init --initial-branch=main --bare .git"
+                ]
+            }
+        },
+        {
+            "type": "execute",
+            "parameters": {
+                "command": [
+                    "/bin/bash",
+                    "-c",
+                    "mkdir -p /home/user/projects/hello_world && cd /home/user/projects/hello_world && git init --initial-branch main && git remote add origin /home/user/projects/remote_project && echo \"Hello World!\" > README.md"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "gnome-terminal",
+                    "--maximize",
+                    "--working-directory=/home/user/projects/hello_world"
+                ]
+            }
+        }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "os",
+        "terminal"
+    ],
+    "evaluator": {
+        "func": "compare_docx_files",
+        "result": {
+            "type": "vm_file",
+            "path": "/home/user/Desktop/notes.docx",
+            "dest": "notes.docx"
+        },
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=1Xl6tgQ0K5qA1BDA2fKTK2xFLzXwbtkZ6&export=download",
+            "dest": "notes_gold.docx"
+        },
+        "options": {
+            "ignore_blanks": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json b/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json
new file mode 100644
index 0000000..3d32ee5
--- /dev/null
+++ b/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json
@@ -0,0 +1,46 @@
+{
+    "id": "51f5801c-18b3-4f25-b0c3-02f85507a078",
+    "snapshot": "libreoffice_impress",
+    "instruction": "I've been working on this presentation in LibreOffice Impress and I've added a bunch of speaker notes for my upcoming talk. I'd like to have those notes handy in a separate document when I rehearse. Could you assist me in extracting all the presenter notes from the Impress file and saving them as a Word document? Just keep the text of the notes, do not add any formatting or page number information. I'd like the file to be named 'notes.docx' and placed on my Desktop for easy access.",
+    "source": "https://github.com/danielrcollins1/ImpressExtractNotes",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+              "files": [
+                {
+                  "url": "https://drive.usercontent.google.com/download?id=1e12nL_V7bffaLSocQ86EiGCdygzggWeu&export=download",
+                  "path": "/home/user/Desktop/Dickinson_Slides.pptx"
+                }
+              ]
+            }
+        },
+        {
+            "type": "open",
+            "parameters": {
+                "path": "/home/user/Desktop/Dickinson_Slides.pptx"
+            }
+        }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "libreoffice_impress",
+        "libreoffice_writer"
+    ],
+    "evaluator": {
+        "func": "compare_docx_files",
+        "result": {
+            "type": "vm_file",
+            "path": "/home/user/Desktop/notes.docx",
+            "dest": "notes.docx"
+        },
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=1Xl6tgQ0K5qA1BDA2fKTK2xFLzXwbtkZ6&export=download",
+            "dest": "notes_gold.docx"
+        },
+        "options": {
+            "ignore_blanks": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/evaluation_examples/settings/googledrive/credentials.json b/evaluation_examples/settings/googledrive/credentials.json
index 1174a00..ef8905d 100644
--- a/evaluation_examples/settings/googledrive/credentials.json
+++ b/evaluation_examples/settings/googledrive/credentials.json
@@ -1 +1 @@
-{"access_token": "ya29.a0AfB_byCwgV4hMbm-I9xpB2y3R5GGCfe63GrdDTE22Wv_upDecjO_Ey86lJtENTD_AWVgpB6iR4aidDsq44sJNOBVIMvZtPOfu1Hib3tiyXjS-P4ID0i0NHi21-ZGaewUe1RYBRp_1klpd5KAttaAZC8R-Tn-uSbhKXI0HwaCgYKAaASARISFQHGX2MiD2nlZt98Zb8FQIBcLc7n-A0173", "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0e0qXy4xW1Ud5CgYIARAAGA4SNwF-L9IrWfaomed_CK0R7zZffcpT-GIXf3y2ZjqqAD0UP6UkbaMV9F_OEC6pBVaaX4TYnBKx3os", "token_expiry": "2024-01-25T16:47:34Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byCwgV4hMbm-I9xpB2y3R5GGCfe63GrdDTE22Wv_upDecjO_Ey86lJtENTD_AWVgpB6iR4aidDsq44sJNOBVIMvZtPOfu1Hib3tiyXjS-P4ID0i0NHi21-ZGaewUe1RYBRp_1klpd5KAttaAZC8R-Tn-uSbhKXI0HwaCgYKAaASARISFQHGX2MiD2nlZt98Zb8FQIBcLc7n-A0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
\ No newline at end of file
+{"access_token": "ya29.a0AfB_byB77Ran1kP3F1FKu9xL-zMeffAX-m3Z8JFvP2UD6iMM8_s4FQoNlOK2gstGSrW0G9seRlOmDG129Qq6XwI5BiwfxB1ZDGUKuikOYl6ZFgS69tzNXXzuKbLUivkQqoBZl28njdWUsVBFKjy_IvFzlDQAC6-YOrPkPAaCgYKAVwSARISFQHGX2Mi8GhWBz1GC2iqsEtbet6ETA0173", "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0e0qXy4xW1Ud5CgYIARAAGA4SNwF-L9IrWfaomed_CK0R7zZffcpT-GIXf3y2ZjqqAD0UP6UkbaMV9F_OEC6pBVaaX4TYnBKx3os", "token_expiry": "2024-01-29T05:13:41Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byB77Ran1kP3F1FKu9xL-zMeffAX-m3Z8JFvP2UD6iMM8_s4FQoNlOK2gstGSrW0G9seRlOmDG129Qq6XwI5BiwfxB1ZDGUKuikOYl6ZFgS69tzNXXzuKbLUivkQqoBZl28njdWUsVBFKjy_IvFzlDQAC6-YOrPkPAaCgYKAVwSARISFQHGX2Mi8GhWBz1GC2iqsEtbet6ETA0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
\ No newline at end of file