From 13e60401480f73679d5e33fa6f7773f475b426e0 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Fri, 15 Mar 2024 22:50:22 +0800
Subject: [PATCH 01/40] ver Mar15thv2

fixed a bug
---
 mm_agents/accessibility_tree_wrap/heuristic_retrieve.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
index 191eaa7..e37f614 100644
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -69,11 +69,11 @@ def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:
     keeps = keeps and coordinates[0]>0 and coordinates[1]>0 and sizes[0]>0 and sizes[1]>0
     return keeps
 
-def filter_nodes(root: ET, platform="ubuntu"):
+def filter_nodes(root: ET, platform="ubuntu", check_image=False):
     filtered_nodes = []
 
     for node in root.iter():
-        if judge_node(node, platform):
+        if judge_node(node, platform, check_image):
             filtered_nodes.append(node)
             #print(ET.tostring(node, encoding="unicode"))
 

From 7feeab8f6b067fb131459afae73564e07106a592 Mon Sep 17 00:00:00 2001
From: rhythmcao <ruishengcao@gmail.com>
Date: Sun, 17 Mar 2024 01:42:43 +0800
Subject: [PATCH 02/40] add missing file

---
 .../multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json b/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
index 99e148b..fd85e1b 100644
--- a/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
+++ b/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
@@ -9,7 +9,7 @@
             "parameters": {
               "files": [
                 {
-                  "url": "https://drive.usercontent.google.com/download?id=104pg3yochKyH2Uvlp3BdvKmHgYmSIESu&export=download&authuser=0&confirm=t&uuid=d1926366-4e54-4a44-8dcd-fc49ed6524d7&at=APZUnTXcBFV9kcacsA0toU83lMKJ:1706505549057d",
+                  "url": "https://drive.usercontent.google.com/download?id=1gqqY56robX1tb4YPa3Yk1d72T_k-Rgz3&export=download&authuser=0&confirm=t",
                   "path": "/home/user/Desktop/15-MB-docx-file-download.docx"
                 }
               ]

From e156a20e3dd47c969df35a13fa6a0d0d29a9c1c2 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sun, 17 Mar 2024 22:25:13 +0800
Subject: [PATCH 03/40] Update new func

---
 mm_agents/agent.py | 22 +++++++++++-----------
 run.py             | 42 ++++++++++++++++++++++++++++++------------
 2 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index 7599b02..744ee9c 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -5,19 +5,20 @@ import os
 import re
 import time
 import uuid
-import openai
 import xml.etree.ElementTree as ET
 from http import HTTPStatus
 from io import BytesIO
 from typing import Dict, List
-from google.api_core.exceptions import InvalidArgument
+
 import backoff
 import dashscope
 import google.generativeai as genai
+import openai
 import requests
 from PIL import Image
+from google.api_core.exceptions import InvalidArgument
 
-from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes, draw_bounding_boxes
+from mm_agents.accessibility_tree_wrap.heuristic_retrieve import filter_nodes, draw_bounding_boxes
 from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION, \
     SYS_PROMPT_IN_A11Y_OUT_CODE, SYS_PROMPT_IN_A11Y_OUT_ACTION, \
     SYS_PROMPT_IN_BOTH_OUT_CODE, SYS_PROMPT_IN_BOTH_OUT_ACTION, \
@@ -422,7 +423,6 @@ class PromptAgent:
         # with open("messages.json", "w") as f:
         #     f.write(json.dumps(messages, indent=4))
 
-        logger.info("Generating content with GPT model: %s", self.model)
         response = self.call_llm({
             "model": self.model,
             "messages": messages,
@@ -461,7 +461,7 @@ class PromptAgent:
                 "Content-Type": "application/json",
                 "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
             }
-            # logger.info("Generating content with GPT model: %s", self.model)
+            logger.info("Generating content with GPT model: %s", self.model)
             response = requests.post(
                 "https://api.openai.com/v1/chat/completions",
                 headers=headers,
@@ -495,7 +495,7 @@ class PromptAgent:
             temperature = payload["temperature"]
 
             claude_messages = []
-            
+
             for i, message in enumerate(messages):
                 claude_message = {
                     "role": message["role"],
@@ -503,17 +503,17 @@ class PromptAgent:
                 }
                 assert len(message["content"]) in [1, 2], "One text, or one text with one image"
                 for part in message["content"]:
-                    
+
                     if part['type'] == "image_url":
                         image_source = {}
                         image_source["type"] = "base64"
                         image_source["media_type"] = "image/png"
                         image_source["data"] = part['image_url']['url'].replace("data:image/png;base64,", "")
                         claude_message['content'].append({"type": "image", "source": image_source})
-                        
+
                     if part['type'] == "text":
                         claude_message['content'].append({"type": "text", "text": part['text']})
-            
+
                 claude_messages.append(claude_message)
 
             # the claude not support system message in our endpoint, so we concatenate it at the first user message
@@ -522,7 +522,6 @@ class PromptAgent:
                 claude_messages[1]['content'].insert(0, claude_system_message_item)
                 claude_messages.pop(0)
 
-
             headers = {
                 "x-api-key": os.environ["ANTHROPIC_API_KEY"],
                 "anthropic-version": "2023-06-01",
@@ -540,7 +539,7 @@ class PromptAgent:
                 headers=headers,
                 json=payload
             )
-            
+
             if response.status_code != 200:
 
                 logger.error("Failed to call LLM: " + response.text)
@@ -674,6 +673,7 @@ class PromptAgent:
             try:
                 return response.text
             except Exception as e:
+                logger.error("Meet exception when calling Gemini API, " + str(e))
                 return ""
         elif self.model.startswith("qwen"):
             messages = payload["messages"]
diff --git a/run.py b/run.py
index 3014e87..5e8e664 100644
--- a/run.py
+++ b/run.py
@@ -6,6 +6,7 @@ import datetime
 import json
 import logging
 import os
+import random
 import sys
 
 from tqdm import tqdm
@@ -69,7 +70,7 @@ def config() -> argparse.Namespace:
             "screenshot_a11y_tree",
             "som"
         ],
-        default="som",
+        default="a11y_tree",
         help="Observation type",
     )
     parser.add_argument("--screen_width", type=int, default=1920)
@@ -82,7 +83,7 @@ def config() -> argparse.Namespace:
     parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples")
 
     # lm config
-    parser.add_argument("--model", type=str, default="gpt-4-vision-preview")
+    parser.add_argument("--model", type=str, default="gpt-4-0125-preview")
     parser.add_argument("--temperature", type=float, default=1.0)
     parser.add_argument("--top_p", type=float, default=0.9)
     parser.add_argument("--max_tokens", type=int, default=1500)
@@ -147,7 +148,7 @@ def test(
             try:
                 lib_run_single.run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir,
                                                   scores)
-            except Exception as e:
+            except TimeoutError as e:
                 env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
                 logger.error(f"Time limit exceeded in {domain}/{example_id}")
                 with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
@@ -155,6 +156,14 @@ def test(
                         "Error": f"Time limit exceeded in {domain}/{example_id}"
                     }))
                     f.write("\n")
+            except Exception as e:
+                env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+                logger.error(f"Exception in {domain}/{example_id}" + str(e))
+                with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+                    f.write(json.dumps({
+                        "Error": f"Exception in {domain}/{example_id}" + str(e)
+                    }))
+                    f.write("\n")
 
     env.close()
     logger.info(f"Average score: {sum(scores) / len(scores)}")
@@ -193,15 +202,13 @@ def get_unfinished(action_space, use_model, observation_type, result_dir, total_
 
 def get_result(action_space, use_model, observation_type, result_dir, total_file_json):
     target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+    if not os.path.exists(target_dir):
+        print("New experiment, no result yet.")
+        return None
 
     all_result = []
 
-    if not os.path.exists(target_dir):
-        return total_file_json
-
-    finished = {}
     for domain in os.listdir(target_dir):
-        finished[domain] = []
         domain_path = os.path.join(target_dir, domain)
         if os.path.isdir(domain_path):
             for example_id in os.listdir(domain_path):
@@ -209,10 +216,17 @@ def get_result(action_space, use_model, observation_type, result_dir, total_file
                 if os.path.isdir(example_path):
                     if "result.txt" in os.listdir(example_path):
                         # empty all files under example_id
-                        all_result.append(float(open(os.path.join(example_path, "result.txt"), "r").read()))
+                        try:
+                            all_result.append(float(open(os.path.join(example_path, "result.txt"), "r").read()))
+                        except:
+                            all_result.append(0.0)
 
-    print("Success Rate:", sum(all_result) / len(all_result) * 100, "%")
-    return all_result
+    if not all_result:
+        print("New experiment, no result yet.")
+        return None
+    else:
+        print("Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
+        return all_result
 
 
 if __name__ == '__main__':
@@ -242,4 +256,8 @@ if __name__ == '__main__':
         test_all_meta
     )
 
-    # test(args, test_all_meta)
+    # make the order of key random in test_all_meta
+    for domain in test_all_meta:
+        random.shuffle(test_all_meta[domain])
+
+    test(args, test_all_meta)

From acc2d41bdb8c0eed1bc7cc095fece0bee052b888 Mon Sep 17 00:00:00 2001
From: lfy79001 <843265183@qq.com>
Date: Sun, 17 Mar 2024 22:27:59 +0800
Subject: [PATCH 04/40] add mixtral cogagent

---
 mm_agents/agent.py | 144 ++++++++++++++++++++++++++++++---------------
 1 file changed, 95 insertions(+), 49 deletions(-)

diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index 744ee9c..cf140d8 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -549,55 +549,101 @@ class PromptAgent:
                 return response.json()['content'][0]['text']
 
 
-        # elif self.model.startswith("mistral"):
-        #     print("Call mistral")
-        #     messages = payload["messages"]
-        #     max_tokens = payload["max_tokens"]
-        #
-        #     misrtal_messages = []
-        #
-        #     for i, message in enumerate(messages):
-        #         mistral_message = {
-        #             "role": message["role"],
-        #             "content": []
-        #         }
-        #
-        #         for part in message["content"]:
-        #             mistral_message['content'] = part['text'] if part['type'] == "text" else None
-        #
-        #         misrtal_messages.append(mistral_message)
-        #
-        #     # the mistral not support system message in our endpoint, so we concatenate it at the first user message
-        #     if misrtal_messages[0]['role'] == "system":
-        #         misrtal_messages[1]['content'] = misrtal_messages[0]['content'] + "\n" + misrtal_messages[1]['content']
-        #         misrtal_messages.pop(0)
-        #
-        #     # openai.api_base = "http://localhost:8000/v1"
-        #     # openai.api_key = "test"
-        #     # response = openai.ChatCompletion.create(
-        #     #     messages=misrtal_messages,
-        #     #     model="Mixtral-8x7B-Instruct-v0.1"
-        #     # )
-        #
-        #     from openai import OpenAI
-        #     TOGETHER_API_KEY = "d011650e7537797148fb6170ec1e0be7ae75160375686fae02277136078e90d2"
-        #
-        #     client = OpenAI(api_key=TOGETHER_API_KEY,
-        #                     base_url='https://api.together.xyz',
-        #                     )
-        #     logger.info("Generating content with Mistral model: %s", self.model)
-        #     response = client.chat.completions.create(
-        #         messages=misrtal_messages,
-        #         model="mistralai/Mixtral-8x7B-Instruct-v0.1",
-        #         max_tokens=1024
-        #     )
-        #
-        #     try:
-        #         # return response['choices'][0]['message']['content']
-        #         return response.choices[0].message.content
-        #     except Exception as e:
-        #         print("Failed to call LLM: " + str(e))
-        #         return ""
+        elif self.model.startswith("mistral"):
+            print("Call mistral")
+            messages = payload["messages"]
+            max_tokens = payload["max_tokens"]
+            top_p = payload["top_p"]
+            temperature = payload["temperature"]
+        
+            misrtal_messages = []
+
+            for i, message in enumerate(messages):
+                mistral_message = {
+                    "role": message["role"],
+                    "content": ""
+                }
+
+                for part in message["content"]:
+                    mistral_message['content'] = part['text'] if part['type'] == "text" else ""
+                
+                
+                misrtal_messages.append(mistral_message)
+    
+        
+            # openai.api_base = "http://localhost:8000/v1"
+            # response = openai.ChatCompletion.create(
+            #     messages=misrtal_messages,
+            #     model="Mixtral-8x7B-Instruct-v0.1"
+            # )
+        
+            from openai import OpenAI
+
+            client = OpenAI(api_key=os.environ["TOGETHER_API_KEY"],
+                            base_url='https://api.together.xyz',
+                            )
+            logger.info("Generating content with Mistral model: %s", self.model)
+            
+            response = client.chat.completions.create(
+                messages=misrtal_messages,
+                model=self.model,
+                max_tokens=max_tokens
+            )
+        
+            try:
+                return response.choices[0].message.content
+            except Exception as e:
+                print("Failed to call LLM: " + str(e))
+                return ""
+            
+        elif self.model.startswith("THUDM"):
+            # THUDM/cogagent-chat-hf
+            print("Call CogAgent")
+            messages = payload["messages"]
+            max_tokens = payload["max_tokens"]
+            top_p = payload["top_p"]
+            temperature = payload["temperature"]
+            
+            cog_messages = []
+            
+            for i, message in enumerate(messages):
+                cog_message = {
+                    "role": message["role"],
+                    "content": []
+                }
+
+                for part in message["content"]:
+                    if part['type'] == "image_url":
+                        cog_message['content'].append({"type": "image_url", "image_url": {"url": part['image_url']['url'] }  })
+                        
+                    if part['type'] == "text":
+                        cog_message['content'].append({"type": "text", "text": part['text']})
+            
+                cog_messages.append(cog_message)
+
+            # the cogagent not support system message in our endpoint, so we concatenate it at the first user message
+            if cog_messages[0]['role'] == "system":
+                cog_system_message_item = cog_messages[0]['content'][0]
+                cog_messages[1]['content'].insert(0, cog_system_message_item)
+                cog_messages.pop(0)
+            
+            payload = {
+                "model": self.model,
+                "max_tokens": max_tokens,
+                "messages": cog_messages
+            }
+
+            base_url = "http://127.0.0.1:8000"
+            
+            response = requests.post(f"{base_url}/v1/chat/completions", json=payload, stream=False)
+            if response.status_code == 200:
+                decoded_line = response.json()
+                content = decoded_line.get("choices", [{}])[0].get("message", "").get("content", "")
+                return content
+            else:
+                print("Failed to call LLM: ", response.status_code)
+                return ""
+
 
         elif self.model.startswith("gemini"):
             def encoded_img_to_pil_img(data_str):

From 48aedb09a788731d1b08957a4e1a4fd333798cb7 Mon Sep 17 00:00:00 2001
From: Jason Lee <lixiaochuan20@gmail.com>
Date: Sun, 17 Mar 2024 22:30:29 +0800
Subject: [PATCH 05/40] add wandb settings, remember to set WANDB_KEY

---
 desktop_env/controllers/python.py | 23 +++++++------
 lib_run_single.py                 | 19 ++++++-----
 mm_agents/agent.py                |  3 +-
 run.py                            | 55 ++++++++++++++++++++++++++-----
 settings.json                     |  2 +-
 5 files changed, 73 insertions(+), 29 deletions(-)

diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py
index 60a4bb4..4159cde 100644
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -263,16 +263,19 @@ class PythonController:
         """
         Ends recording the screen.
         """
-        response = requests.post(self.http_server + "/end_recording")
-        if response.status_code == 200:
-            logger.info("Recording stopped successfully")
-            with open(dest, 'wb') as f:
-                for chunk in response.iter_content(chunk_size=8192):
-                    if chunk:
-                        f.write(chunk)
-        else:
-            logger.error("Failed to stop recording. Status code: %d", response.status_code)
-            return None
+        try:
+            response = requests.post(self.http_server + "/end_recording")
+            if response.status_code == 200:
+                logger.info("Recording stopped successfully")
+                with open(dest, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        if chunk:
+                            f.write(chunk)
+            else:
+                logger.error("Failed to stop recording. Status code: %d", response.status_code)
+                return None
+        except Exception as e:
+            logger.error("An error occurred while trying to download the recording: %s", e)
 
     # Additional info
     def get_vm_platform(self):
diff --git a/lib_run_single.py b/lib_run_single.py
index e492736..ff9972d 100644
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -2,6 +2,7 @@ import datetime
 import json
 import logging
 import os
+import wandb
 
 from wrapt_timeout_decorator import *
 
@@ -13,7 +14,6 @@ with open("./settings.json", "r") as file:
     data = json.load(file)
 time_limit = data["time_limit"]
 
-
 @timeout(time_limit, use_signals=False)
 def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
     agent.reset()
@@ -21,9 +21,9 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
     done = False
     step_idx = 0
     env.controller.start_recording()
-
+    str_table = wandb.Table(columns=["Screenshot", "A11T", "Modle Response", "Action", "Action timestamp", "Done"])
     while not done and step_idx < max_steps:
-        actions = agent.predict(
+        response, actions = agent.predict(
             instruction,
             obs
         )
@@ -31,20 +31,22 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
             # Capture the timestamp before executing the action
             action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
             logger.info("Step %d: %s", step_idx + 1, action)
-
             obs, reward, done, info = env.step(action, args.sleep_after_execution)
 
             logger.info("Reward: %.2f", reward)
             logger.info("Done: %s", done)
-            logger.info("Info: %s", info)
-
             # Save screenshot and trajectory information
             with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"),
                       "wb") as _f:
                 with open(obs['screenshot'], "rb") as __f:
                     screenshot = __f.read()
                 _f.write(screenshot)
-
+            # get a11tree and save to wandb
+            thisrun_a11tree = env.controller.get_accessibility_tree()
+            str_table.add_data(wandb.Image(data_or_path=os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), caption=f"step_{step_idx + 1}_{action_timestamp}"),
+                            thisrun_a11tree,
+                            response, action, action_timestamp, done)
+            wandb.log({"Reward": reward}) 
             with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                 f.write(json.dumps({
                     "step_num": step_idx + 1,
@@ -56,14 +58,15 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
                     "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
                 }))
                 f.write("\n")
-
             if done:
                 logger.info("The episode is done.")
                 break
         step_idx += 1
+    wandb.log({"str_trajectory": str_table})
     result = env.evaluate()
     logger.info("Result: %.2f", result)
     scores.append(result)
     with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
         f.write(f"{result}\n")
     env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+    wandb.log({"Result": result})
diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index 7599b02..cb0ba85 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -15,6 +15,7 @@ import backoff
 import dashscope
 import google.generativeai as genai
 import requests
+import wandb
 from PIL import Image
 
 from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes, draw_bounding_boxes
@@ -441,7 +442,7 @@ class PromptAgent:
             actions = None
             self.thoughts.append("")
 
-        return actions
+        return response, actions
 
     @backoff.on_exception(
         backoff.expo,
diff --git a/run.py b/run.py
index 3014e87..28563c8 100644
--- a/run.py
+++ b/run.py
@@ -7,6 +7,7 @@ import json
 import logging
 import os
 import sys
+import wandb
 
 from tqdm import tqdm
 
@@ -48,6 +49,11 @@ logger.addHandler(sdebug_handler)
 
 logger = logging.getLogger("desktopenv.experiment")
 
+# wandb config
+### set your wandb api key here
+os.environ["WANDB_API_KEY"] = ""
+wandb.login(key=os.environ["WANDB_API_KEY"])
+
 
 def config() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
@@ -104,6 +110,25 @@ def test(
 
     # log args
     logger.info("Args: %s", args)
+    # set wandb project
+    cfg_args = \
+    {
+        "path_to_vm": args.path_to_vm,
+        "headless": args.headless,
+        "action_space": args.action_space,
+        "observation_type": args.observation_type,
+        "screen_width": args.screen_width,
+        "screen_height": args.screen_height,
+        "sleep_after_execution": args.sleep_after_execution,
+        "max_steps": args.max_steps,
+        "max_trajectory_length": args.max_trajectory_length,
+        "model": args.model,
+        "temperature": args.temperature,
+        "top_p": args.top_p,
+        "max_tokens": args.max_tokens,
+        "stop_token": args.stop_token,
+        "result_dir": args.result_dir
+    }
 
     agent = PromptAgent(
         model=args.model,
@@ -122,6 +147,8 @@ def test(
 
     for domain in tqdm(test_all_meta, desc="Domain"):
         for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
+            wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}", 
+                    name=f"{example_id}")
             # example setting
             config_file = os.path.join(args.test_config_base_dir, f"examples/{domain}/{example_id}.json")
             with open(config_file, "r", encoding="utf-8") as f:
@@ -133,6 +160,10 @@ def test(
             instruction = example["instruction"]
 
             logger.info(f"[Instruction]: {instruction}")
+            # wandb each example config settings
+            cfg_args["instruction"] = instruction
+            cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
+            wandb.config.update(cfg_args)   
 
             example_result_dir = os.path.join(
                 args.result_dir,
@@ -148,13 +179,20 @@ def test(
                 lib_run_single.run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir,
                                                   scores)
             except Exception as e:
+                logger.error(f"Exception in {domain}/{example_id}: {e}")
+                wandb.log({"Exception": wandb.Table(data=[[f"Exception in {domain}/{example_id}: {e}"]], columns=["Error"])})
                 env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
-                logger.error(f"Time limit exceeded in {domain}/{example_id}")
                 with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                     f.write(json.dumps({
                         "Error": f"Time limit exceeded in {domain}/{example_id}"
                     }))
                     f.write("\n")
+            # wandb settings
+            os.mkdir(os.path.join(wandb.run.dir, "results/"))
+            for file in os.listdir(example_result_dir):
+                # move file to just under the root dir
+                os.rename(os.path.join(example_result_dir, file), os.path.join(wandb.run.dir, f"./results/{file}"))
+            wandb.finish()
 
     env.close()
     logger.info(f"Average score: {sum(scores) / len(scores)}")
@@ -235,11 +273,10 @@ if __name__ == '__main__':
         left_info += f"{domain}: {len(test_file_list[domain])}\n"
     logger.info(f"Left tasks:\n{left_info}")
 
-    get_result(args.action_space,
-        args.model,
-        args.observation_type,
-        args.result_dir,
-        test_all_meta
-    )
-
-    # test(args, test_all_meta)
+    # get_result(args.action_space,
+    #     args.model,
+    #     args.observation_type,
+    #     args.result_dir,
+    #     test_all_meta
+    # )
+    test(args, test_file_list)
diff --git a/settings.json b/settings.json
index 469579c..23bab77 100644
--- a/settings.json
+++ b/settings.json
@@ -1,3 +1,3 @@
 {
-    "time_limit": "1200"
+    "time_limit": "10"
 }
\ No newline at end of file

From 9bafe093724c2a3ecd976b96afba0bc41b50207f Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Sun, 17 Mar 2024 23:01:50 +0800
Subject: [PATCH 06/40] ver Mar17th

fixed an error in task config
---
 branch-config/filelist                            |  5 -----
 .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json     |  4 ++--
 main.py                                           | 15 +++++++--------
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/branch-config/filelist b/branch-config/filelist
index 513343c..a34a418 100644
--- a/branch-config/filelist
+++ b/branch-config/filelist
@@ -5,10 +5,5 @@ evaluation_examples
 logs
 
 mm_agents
-experiment_a11y_tree.py
-experiment_screenshot.py
-experiment_screenshot_a11y_tree.py
-experiment_screenshot_seeact.py
-experiment_screenshot_som.py
 
 quick_evaluate.py
diff --git a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
index 7b7a0d5..c869428 100644
--- a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
+++ b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
@@ -38,7 +38,7 @@
 			}
 		},
 		{
-			"type": "execute",
+			"type": "launch",
 			"parameters": {
 				"command": [
 					"nautilus",
@@ -109,4 +109,4 @@
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/main.py b/main.py
index 93282ec..bdb2e6a 100644
--- a/main.py
+++ b/main.py
@@ -47,17 +47,16 @@ def human_agent():
     Runs the Gym environment with human input.
     """
 
-    with open("evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json", "r", encoding="utf-8") as f:
+    with open("evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json", "r", encoding="utf-8") as f:
         example = json.load(f)
-    example["snapshot"] = "exp_v5"
+    example["snapshot"] = "Snapshot 35"
 
-    env = DesktopEnv(
-        path_to_vm=r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu3\Ubuntu3.vmx",
-        action_space="computer_13",
-        task_config=example
-    )
+    env = DesktopEnv( path_to_vm=r"/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx"
+                    , snapshot_name="Snapshot 35"
+                    , action_space="computer_13"
+                    )
     # reset the environment to certain snapshot
-    observation = env.reset()
+    observation = env.reset(task_config=example)
     done = False
 
     trajectory = [

From 8080828a84e945297cfdd92b3591cdfc409044ba Mon Sep 17 00:00:00 2001
From: Jason Lee <lixiaochuan20@gmail.com>
Date: Mon, 18 Mar 2024 00:02:41 +0800
Subject: [PATCH 07/40] update wandb settings

---
 lib_run_single.py | 12 +++++++-----
 run.py            |  6 +++---
 settings.json     |  2 +-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/lib_run_single.py b/lib_run_single.py
index ff9972d..ab9816e 100644
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -14,8 +14,8 @@ with open("./settings.json", "r") as file:
     data = json.load(file)
 time_limit = data["time_limit"]
 
-@timeout(time_limit, use_signals=False)
-def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
+# @timeout(time_limit, use_signals=False)
+def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores, run):
     agent.reset()
     obs = env.reset(task_config=example)
     done = False
@@ -46,7 +46,7 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
             str_table.add_data(wandb.Image(data_or_path=os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), caption=f"step_{step_idx + 1}_{action_timestamp}"),
                             thisrun_a11tree,
                             response, action, action_timestamp, done)
-            wandb.log({"Reward": reward}) 
+            run.log({"Reward": reward}) 
             with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                 f.write(json.dumps({
                     "step_num": step_idx + 1,
@@ -62,11 +62,13 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
                 logger.info("The episode is done.")
                 break
         step_idx += 1
-    wandb.log({"str_trajectory": str_table})
+    # wandb.log({"str_trajectory": str_table})
+    run.log({"str_trajectory": str_table})
     result = env.evaluate()
     logger.info("Result: %.2f", result)
     scores.append(result)
     with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
         f.write(f"{result}\n")
     env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
-    wandb.log({"Result": result})
+    run.log({"Result": result})
+    # wandb.log({"Result": result})
diff --git a/run.py b/run.py
index 4284169..505ae54 100644
--- a/run.py
+++ b/run.py
@@ -148,7 +148,7 @@ def test(
 
     for domain in tqdm(test_all_meta, desc="Domain"):
         for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
-            wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}", 
+            run = wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}", 
                     name=f"{example_id}")
             # example setting
             config_file = os.path.join(args.test_config_base_dir, f"examples/{domain}/{example_id}.json")
@@ -164,7 +164,7 @@ def test(
             # wandb each example config settings
             cfg_args["instruction"] = instruction
             cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
-            wandb.config.update(cfg_args)   
+            run.config.update(cfg_args)   
 
             example_result_dir = os.path.join(
                 args.result_dir,
@@ -178,7 +178,7 @@ def test(
             # example start running
             try:
                 lib_run_single.run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir,
-                                                  scores)
+                                                  scores, run)
             except Exception as e:
                 logger.error(f"Exception in {domain}/{example_id}: {e}")
                 wandb.log({"Exception": wandb.Table(data=[[f"Exception in {domain}/{example_id}: {e}"]], columns=["Error"])})
diff --git a/settings.json b/settings.json
index 23bab77..7ee7a21 100644
--- a/settings.json
+++ b/settings.json
@@ -1,3 +1,3 @@
 {
-    "time_limit": "10"
+    "time_limit": "600"
 }
\ No newline at end of file

From b067d5a840501dedb658c91fe297bb61f5087362 Mon Sep 17 00:00:00 2001
From: lfy79001 <843265183@qq.com>
Date: Mon, 18 Mar 2024 00:22:57 +0800
Subject: [PATCH 08/40] add cogagent server

---
 mm_agents/llm_server/CogAgent/CogAgent.py | 405 ++++++++++++++++++++++
 mm_agents/llm_server/CogAgent/README.md   |   7 +
 2 files changed, 412 insertions(+)
 create mode 100644 mm_agents/llm_server/CogAgent/CogAgent.py
 create mode 100644 mm_agents/llm_server/CogAgent/README.md

diff --git a/mm_agents/llm_server/CogAgent/CogAgent.py b/mm_agents/llm_server/CogAgent/CogAgent.py
new file mode 100644
index 0000000..1b4cd53
--- /dev/null
+++ b/mm_agents/llm_server/CogAgent/CogAgent.py
@@ -0,0 +1,405 @@
+import os
+import gc
+import time
+import base64
+
+from contextlib import asynccontextmanager
+from typing import List, Literal, Union, Tuple, Optional
+import torch
+import uvicorn
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from loguru import logger
+from pydantic import BaseModel, Field
+from sse_starlette.sse import EventSourceResponse
+from transformers import AutoModelForCausalLM, LlamaTokenizer, PreTrainedModel, PreTrainedTokenizer, \
+    TextIteratorStreamer
+from PIL import Image
+from io import BytesIO
+
+MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/cogvlm-chat-hf')
+TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", 'lmsys/vicuna-7b-v1.5')
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+if os.environ.get('QUANT_ENABLED'):
+    QUANT_ENABLED = True
+else:
+    with torch.cuda.device(DEVICE):
+        __, total_bytes = torch.cuda.mem_get_info()
+        total_gb = total_bytes / (1 << 30)
+        if total_gb < 40:
+            QUANT_ENABLED = True
+        else:
+            QUANT_ENABLED = False
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    An asynchronous context manager for managing the lifecycle of the FastAPI app.
+    It ensures that GPU memory is cleared after the app's lifecycle ends, which is essential for efficient resource management in GPU environments.
+    """
+    yield
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+
+
+app = FastAPI(lifespan=lifespan)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+class ModelCard(BaseModel):
+    """
+    A Pydantic model representing a model card, which provides metadata about a machine learning model.
+    It includes fields like model ID, owner, and creation time.
+    """
+    id: str
+    object: str = "model"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    owned_by: str = "owner"
+    root: Optional[str] = None
+    parent: Optional[str] = None
+    permission: Optional[list] = None
+
+
+class ModelList(BaseModel):
+    object: str = "list"
+    data: List[ModelCard] = []
+
+
+class ImageUrl(BaseModel):
+    url: str
+
+
+class TextContent(BaseModel):
+    type: Literal["text"]
+    text: str
+
+
+class ImageUrlContent(BaseModel):
+    type: Literal["image_url"]
+    image_url: ImageUrl
+
+
+ContentItem = Union[TextContent, ImageUrlContent]
+
+
+class ChatMessageInput(BaseModel):
+    role: Literal["user", "assistant", "system"]
+    content: Union[str, List[ContentItem]]
+    name: Optional[str] = None
+
+
+class ChatMessageResponse(BaseModel):
+    role: Literal["assistant"]
+    content: str = None
+    name: Optional[str] = None
+
+
+class DeltaMessage(BaseModel):
+    role: Optional[Literal["user", "assistant", "system"]] = None
+    content: Optional[str] = None
+
+
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessageInput]
+    temperature: Optional[float] = 0.8
+    top_p: Optional[float] = 0.8
+    max_tokens: Optional[int] = None
+    stream: Optional[bool] = False
+    # Additional parameters
+    repetition_penalty: Optional[float] = 1.0
+
+
+class ChatCompletionResponseChoice(BaseModel):
+    index: int
+    message: ChatMessageResponse
+
+
+class ChatCompletionResponseStreamChoice(BaseModel):
+    index: int
+    delta: DeltaMessage
+
+
+class UsageInfo(BaseModel):
+    prompt_tokens: int = 0
+    total_tokens: int = 0
+    completion_tokens: Optional[int] = 0
+
+
+class ChatCompletionResponse(BaseModel):
+    model: str
+    object: Literal["chat.completion", "chat.completion.chunk"]
+    choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]]
+    created: Optional[int] = Field(default_factory=lambda: int(time.time()))
+    usage: Optional[UsageInfo] = None
+
+
+@app.get("/v1/models", response_model=ModelList)
+async def list_models():
+    """
+    An endpoint to list available models. It returns a list of model cards.
+    This is useful for clients to query and understand what models are available for use.
+    """
+    model_card = ModelCard(id="cogvlm-chat-17b")  # can be replaced by your model id like cogagent-chat-18b
+    return ModelList(data=[model_card])
+
+
+@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
+async def create_chat_completion(request: ChatCompletionRequest):
+    global model, tokenizer
+
+    if len(request.messages) < 1 or request.messages[-1].role == "assistant":
+        raise HTTPException(status_code=400, detail="Invalid request")
+
+    gen_params = dict(
+        messages=request.messages,
+        temperature=request.temperature,
+        top_p=request.top_p,
+        max_tokens=request.max_tokens or 1024,
+        echo=False,
+        stream=request.stream,
+    )
+
+    if request.stream:
+        generate = predict(request.model, gen_params)
+        return EventSourceResponse(generate, media_type="text/event-stream")
+    response = generate_cogvlm(model, tokenizer, gen_params)
+
+    usage = UsageInfo()
+
+    message = ChatMessageResponse(
+        role="assistant",
+        content=response["text"],
+    )
+    logger.debug(f"==== message ====\n{message}")
+    choice_data = ChatCompletionResponseChoice(
+        index=0,
+        message=message,
+    )
+    task_usage = UsageInfo.model_validate(response["usage"])
+    for usage_key, usage_value in task_usage.model_dump().items():
+        setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
+    return ChatCompletionResponse(model=request.model, choices=[choice_data], object="chat.completion", usage=usage)
+
+
+async def predict(model_id: str, params: dict):
+    """
+    Handle streaming predictions. It continuously generates responses for a given input stream.
+    This is particularly useful for real-time, continuous interactions with the model.
+    """
+
+    global model, tokenizer
+
+    choice_data = ChatCompletionResponseStreamChoice(
+        index=0,
+        delta=DeltaMessage(role="assistant"),
+        finish_reason=None
+    )
+    chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
+    yield "{}".format(chunk.model_dump_json(exclude_unset=True))
+
+    previous_text = ""
+    for new_response in generate_stream_cogvlm(model, tokenizer, params):
+        decoded_unicode = new_response["text"]
+        delta_text = decoded_unicode[len(previous_text):]
+        previous_text = decoded_unicode
+        delta = DeltaMessage(
+            content=delta_text,
+            role="assistant",
+        )
+        choice_data = ChatCompletionResponseStreamChoice(
+            index=0,
+            delta=delta,
+        )
+        chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
+        yield "{}".format(chunk.model_dump_json(exclude_unset=True))
+    choice_data = ChatCompletionResponseStreamChoice(
+        index=0,
+        delta=DeltaMessage(),
+    )
+    chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
+    yield "{}".format(chunk.model_dump_json(exclude_unset=True))
+
+
+def generate_cogvlm(model: PreTrainedModel, tokenizer: PreTrainedTokenizer, params: dict):
+    """
+    Generates a response using the CogVLM model. It processes the chat history and image data, if any,
+    and then invokes the model to generate a response.
+    """
+
+    for response in generate_stream_cogvlm(model, tokenizer, params):
+        pass
+    return response
+
+
+def process_history_and_images(messages: List[ChatMessageInput]) -> Tuple[
+    Optional[str], Optional[List[Tuple[str, str]]], Optional[List[Image.Image]]]:
+    """
+    Process history messages to extract text, identify the last user query,
+    and convert base64 encoded image URLs to PIL images.
+
+    Args:
+        messages(List[ChatMessageInput]): List of ChatMessageInput objects.
+    return: A tuple of three elements:
+             - The last user query as a string.
+             - Text history formatted as a list of tuples for the model.
+             - List of PIL Image objects extracted from the messages.
+    """
+    formatted_history = []
+    image_list = []
+    last_user_query = ''
+
+    for i, message in enumerate(messages):
+        role = message.role
+        content = message.content
+
+        if isinstance(content, list):  # text
+            text_content = ' '.join(item.text for item in content if isinstance(item, TextContent))
+        else:
+            text_content = content
+
+        if isinstance(content, list):  # image
+            for item in content:
+                if isinstance(item, ImageUrlContent):
+                    image_url = item.image_url.url
+                    if image_url.startswith("data:image/jpeg;base64,"):
+                        base64_encoded_image = image_url.split("data:image/jpeg;base64,")[1]
+                        image_data = base64.b64decode(base64_encoded_image)
+                        image = Image.open(BytesIO(image_data)).convert('RGB')
+                        image_list.append(image)
+                    elif image_url.startswith("data:image/png;base64,"):
+                        base64_encoded_image = image_url.split("data:image/png;base64,")[1]
+                        image_data = base64.b64decode(base64_encoded_image)
+                        image = Image.open(BytesIO(image_data)).convert('RGB')
+                        image_list.append(image)
+
+        if role == 'user':
+            if i == len(messages) - 1:  # 最后一条用户消息
+                last_user_query = text_content
+            else:
+                formatted_history.append((text_content, ''))
+        elif role == 'assistant':
+            if formatted_history:
+                if formatted_history[-1][1] != '':
+                    assert False, f"the last query is answered. answer again. {formatted_history[-1][0]}, {formatted_history[-1][1]}, {text_content}"
+                formatted_history[-1] = (formatted_history[-1][0], text_content)
+            else:
+                assert False, f"assistant reply before user"
+        else:
+            assert False, f"unrecognized role: {role}"
+
+    return last_user_query, formatted_history, image_list
+
+
+@torch.inference_mode()
+def generate_stream_cogvlm(model: PreTrainedModel, tokenizer: PreTrainedTokenizer, params: dict):
+    """
+    Generates a stream of responses using the CogVLM model in inference mode.
+    It's optimized to handle continuous input-output interactions with the model in a streaming manner.
+    """
+    messages = params["messages"]
+    temperature = float(params.get("temperature", 1.0))
+    repetition_penalty = float(params.get("repetition_penalty", 1.0))
+    top_p = float(params.get("top_p", 1.0))
+    max_new_tokens = int(params.get("max_tokens", 256))
+    query, history, image_list = process_history_and_images(messages)
+
+    logger.debug(f"==== request ====\n{query}")
+
+    input_by_model = model.build_conversation_input_ids(tokenizer, query=query, history=history,
+                                                        images=[image_list[-1]])
+    inputs = {
+        'input_ids': input_by_model['input_ids'].unsqueeze(0).to(DEVICE),
+        'token_type_ids': input_by_model['token_type_ids'].unsqueeze(0).to(DEVICE),
+        'attention_mask': input_by_model['attention_mask'].unsqueeze(0).to(DEVICE),
+        'images': [[input_by_model['images'][0].to(DEVICE).to(torch_type)]],
+    }
+    if 'cross_images' in input_by_model and input_by_model['cross_images']:
+        inputs['cross_images'] = [[input_by_model['cross_images'][0].to(DEVICE).to(torch_type)]]
+
+    input_echo_len = len(inputs["input_ids"][0])
+    streamer = TextIteratorStreamer(
+        tokenizer=tokenizer,
+        timeout=60.0,
+        skip_prompt=True,
+        skip_special_tokens=True
+)
+    gen_kwargs = {
+        "repetition_penalty": repetition_penalty,
+        "max_new_tokens": max_new_tokens,
+        "do_sample": True if temperature > 1e-5 else False,
+        "top_p": top_p if temperature > 1e-5 else 0,
+        'streamer': streamer,
+    }
+    if temperature > 1e-5:
+        gen_kwargs["temperature"] = temperature
+
+    total_len = 0
+    generated_text = ""
+    with torch.no_grad():
+        model.generate(**inputs, **gen_kwargs)
+        for next_text in streamer:
+            generated_text += next_text
+            yield {
+                "text": generated_text,
+                "usage": {
+                    "prompt_tokens": input_echo_len,
+                    "completion_tokens": total_len - input_echo_len,
+                    "total_tokens": total_len,
+                },
+            }
+    ret = {
+        "text": generated_text,
+        "usage": {
+            "prompt_tokens": input_echo_len,
+            "completion_tokens": total_len - input_echo_len,
+            "total_tokens": total_len,
+        },
+    }
+    yield ret
+
+
+gc.collect()
+torch.cuda.empty_cache()
+
+if __name__ == "__main__":
+    tokenizer = LlamaTokenizer.from_pretrained(
+        TOKENIZER_PATH,
+        trust_remote_code=True)
+
+    if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8:
+        torch_type = torch.bfloat16
+    else:
+        torch_type = torch.float16
+
+    print("========Use torch type as:{} with device:{}========\n\n".format(torch_type, DEVICE))
+
+    if 'cuda' in DEVICE:
+        if QUANT_ENABLED:
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_PATH,
+                load_in_4bit=True,
+                trust_remote_code=True,
+                torch_dtype=torch_type,
+                low_cpu_mem_usage=True
+            ).eval()
+        else:
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_PATH,
+                load_in_4bit=False,
+                trust_remote_code=True,
+                torch_dtype=torch_type,
+                low_cpu_mem_usage=True
+            ).to(DEVICE).eval()
+            
+    else:
+        model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, trust_remote_code=True).float().to(DEVICE).eval()
+    uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
diff --git a/mm_agents/llm_server/CogAgent/README.md b/mm_agents/llm_server/CogAgent/README.md
new file mode 100644
index 0000000..b6f61d2
--- /dev/null
+++ b/mm_agents/llm_server/CogAgent/README.md
@@ -0,0 +1,7 @@
+## Deploy CogAgent as server
+
+```
+python CogAgent.py
+```
+
+The CogAgent LLM will be deployed on http://127.0.0.1:8000
\ No newline at end of file

From 576248ae182b26b7f2dd30af93395308e965d7ac Mon Sep 17 00:00:00 2001
From: Jason Lee <lixiaochuan20@gmail.com>
Date: Mon, 18 Mar 2024 12:02:34 +0800
Subject: [PATCH 09/40] uncomment timer

---
 lib_run_single.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_run_single.py b/lib_run_single.py
index ab9816e..bcf2496 100644
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -14,7 +14,7 @@ with open("./settings.json", "r") as file:
     data = json.load(file)
 time_limit = data["time_limit"]
 
-# @timeout(time_limit, use_signals=False)
+@timeout(time_limit, use_signals=False)
 def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores, run):
     agent.reset()
     obs = env.reset(task_config=example)

From 204a2b949fc5f3930e187c62b3b0762cee29bdeb Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Mon, 18 Mar 2024 14:56:23 +0800
Subject: [PATCH 10/40] Update claude endpoint

---
 mm_agents/agent.py | 56 +++++++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index 263e5ee..ff92673 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -15,7 +15,6 @@ import dashscope
 import google.generativeai as genai
 import openai
 import requests
-import wandb
 from PIL import Image
 from google.api_core.exceptions import InvalidArgument
 
@@ -523,20 +522,30 @@ class PromptAgent:
                 claude_messages[1]['content'].insert(0, claude_system_message_item)
                 claude_messages.pop(0)
 
+            # headers = {
+            #     "x-api-key": os.environ["ANTHROPIC_API_KEY"],
+            #     "anthropic-version": "2023-06-01",
+            #     "content-type": "application/json"
+            # }
+
             headers = {
-                "x-api-key": os.environ["ANTHROPIC_API_KEY"],
-                "anthropic-version": "2023-06-01",
-                "content-type": "application/json"
+                "Accept": "application / json",
+                "Authorization": "Bearer " + os.environ["ANTHROPIC_API_KEY"],
+                "User-Agent": "Apifox/1.0.0 (https://apifox.com)",
+                "Content-Type": "application/json"
             }
 
             payload = {
                 "model": self.model,
                 "max_tokens": max_tokens,
-                "messages": claude_messages
+                "messages": claude_messages,
+                "temperature": temperature,
+                "top_p": top_p
             }
 
             response = requests.post(
-                "https://api.anthropic.com/v1/messages",
+                # "https://chat.claude.com/v1/chat/completions",
+                "https://api.aigcbest.top/v1/chat/completions",
                 headers=headers,
                 json=payload
             )
@@ -546,8 +555,10 @@ class PromptAgent:
                 logger.error("Failed to call LLM: " + response.text)
                 time.sleep(5)
                 return ""
+            # else:
+            #     return response.json()['content'][0]['text']
             else:
-                return response.json()['content'][0]['text']
+                return response.json()['choices'][0]['message']['content']
 
 
         elif self.model.startswith("mistral"):
@@ -556,7 +567,7 @@ class PromptAgent:
             max_tokens = payload["max_tokens"]
             top_p = payload["top_p"]
             temperature = payload["temperature"]
-        
+
             misrtal_messages = []
 
             for i, message in enumerate(messages):
@@ -567,36 +578,34 @@ class PromptAgent:
 
                 for part in message["content"]:
                     mistral_message['content'] = part['text'] if part['type'] == "text" else ""
-                
-                
+
                 misrtal_messages.append(mistral_message)
-    
-        
+
             # openai.api_base = "http://localhost:8000/v1"
             # response = openai.ChatCompletion.create(
             #     messages=misrtal_messages,
             #     model="Mixtral-8x7B-Instruct-v0.1"
             # )
-        
+
             from openai import OpenAI
 
             client = OpenAI(api_key=os.environ["TOGETHER_API_KEY"],
                             base_url='https://api.together.xyz',
                             )
             logger.info("Generating content with Mistral model: %s", self.model)
-            
+
             response = client.chat.completions.create(
                 messages=misrtal_messages,
                 model=self.model,
                 max_tokens=max_tokens
             )
-        
+
             try:
                 return response.choices[0].message.content
             except Exception as e:
                 print("Failed to call LLM: " + str(e))
                 return ""
-            
+
         elif self.model.startswith("THUDM"):
             # THUDM/cogagent-chat-hf
             print("Call CogAgent")
@@ -604,9 +613,9 @@ class PromptAgent:
             max_tokens = payload["max_tokens"]
             top_p = payload["top_p"]
             temperature = payload["temperature"]
-            
+
             cog_messages = []
-            
+
             for i, message in enumerate(messages):
                 cog_message = {
                     "role": message["role"],
@@ -615,11 +624,12 @@ class PromptAgent:
 
                 for part in message["content"]:
                     if part['type'] == "image_url":
-                        cog_message['content'].append({"type": "image_url", "image_url": {"url": part['image_url']['url'] }  })
-                        
+                        cog_message['content'].append(
+                            {"type": "image_url", "image_url": {"url": part['image_url']['url']}})
+
                     if part['type'] == "text":
                         cog_message['content'].append({"type": "text", "text": part['text']})
-            
+
                 cog_messages.append(cog_message)
 
             # the cogagent not support system message in our endpoint, so we concatenate it at the first user message
@@ -627,7 +637,7 @@ class PromptAgent:
                 cog_system_message_item = cog_messages[0]['content'][0]
                 cog_messages[1]['content'].insert(0, cog_system_message_item)
                 cog_messages.pop(0)
-            
+
             payload = {
                 "model": self.model,
                 "max_tokens": max_tokens,
@@ -635,7 +645,7 @@ class PromptAgent:
             }
 
             base_url = "http://127.0.0.1:8000"
-            
+
             response = requests.post(f"{base_url}/v1/chat/completions", json=payload, stream=False)
             if response.status_code == 200:
                 decoded_line = response.json()

From c1c7ac298f56beb3a5661666a40453d1a564c511 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Mon, 18 Mar 2024 14:59:02 +0800
Subject: [PATCH 11/40] Update claude endpoint

---
 lib_run_single.py |  4 +---
 run.py            | 19 +++++++++----------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/lib_run_single.py b/lib_run_single.py
index bcf2496..d60fd7a 100644
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -46,7 +46,7 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
             str_table.add_data(wandb.Image(data_or_path=os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), caption=f"step_{step_idx + 1}_{action_timestamp}"),
                             thisrun_a11tree,
                             response, action, action_timestamp, done)
-            run.log({"Reward": reward}) 
+            run.log({"Reward": reward})
             with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                 f.write(json.dumps({
                     "step_num": step_idx + 1,
@@ -62,7 +62,6 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
                 logger.info("The episode is done.")
                 break
         step_idx += 1
-    # wandb.log({"str_trajectory": str_table})
     run.log({"str_trajectory": str_table})
     result = env.evaluate()
     logger.info("Result: %.2f", result)
@@ -71,4 +70,3 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
         f.write(f"{result}\n")
     env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
     run.log({"Result": result})
-    # wandb.log({"Result": result})
diff --git a/run.py b/run.py
index 505ae54..728bea4 100644
--- a/run.py
+++ b/run.py
@@ -52,8 +52,7 @@ logger = logging.getLogger("desktopenv.experiment")
 
 # wandb config
 ### set your wandb api key here
-os.environ["WANDB_API_KEY"] = ""
-wandb.login(key=os.environ["WANDB_API_KEY"])
+wandb.login(key=os.get("WANDB_API_KEY", None))
 
 
 def config() -> argparse.Namespace:
@@ -148,7 +147,7 @@ def test(
 
     for domain in tqdm(test_all_meta, desc="Domain"):
         for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
-            run = wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}", 
+            run = wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}",
                     name=f"{example_id}")
             # example setting
             config_file = os.path.join(args.test_config_base_dir, f"examples/{domain}/{example_id}.json")
@@ -164,7 +163,7 @@ def test(
             # wandb each example config settings
             cfg_args["instruction"] = instruction
             cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
-            run.config.update(cfg_args)   
+            run.config.update(cfg_args)
 
             example_result_dir = os.path.join(
                 args.result_dir,
@@ -279,10 +278,10 @@ if __name__ == '__main__':
         left_info += f"{domain}: {len(test_file_list[domain])}\n"
     logger.info(f"Left tasks:\n{left_info}")
 
-    # get_result(args.action_space,
-    #     args.model,
-    #     args.observation_type,
-    #     args.result_dir,
-    #     test_all_meta
-    # )
+    get_result(args.action_space,
+        args.model,
+        args.observation_type,
+        args.result_dir,
+        test_all_meta
+    )
     test(args, test_file_list)

From a145b97bd038b419df24763d6f4668c6eaef0cfe Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Mon, 18 Mar 2024 15:02:22 +0800
Subject: [PATCH 12/40] Minor fix

---
 run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run.py b/run.py
index 728bea4..5212bc0 100644
--- a/run.py
+++ b/run.py
@@ -52,7 +52,7 @@ logger = logging.getLogger("desktopenv.experiment")
 
 # wandb config
 ### set your wandb api key here
-wandb.login(key=os.get("WANDB_API_KEY", None))
+wandb.login(key=os.environ.get("WANDB_API_KEY", None))
 
 
 def config() -> argparse.Namespace:

From 3db0591868321688685f400e45cf5d26de6cf55a Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Mon, 18 Mar 2024 17:42:13 +0800
Subject: [PATCH 13/40] ver Mar18th

checked Claude agent
---
 branch-config/filelist | 3 +++
 branch_flag            | 2 +-
 mm_agents/agent.py     | 7 ++++++-
 run.py                 | 1 +
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/branch-config/filelist b/branch-config/filelist
index a34a418..c6aac47 100644
--- a/branch-config/filelist
+++ b/branch-config/filelist
@@ -5,5 +5,8 @@ evaluation_examples
 logs
 
 mm_agents
+run.py
+lib_run_single.py
+settings.json
 
 quick_evaluate.py
diff --git a/branch_flag b/branch_flag
index 9daeafb..cb4898e 100644
--- a/branch_flag
+++ b/branch_flag
@@ -1 +1 @@
-test
+claude
diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index ff92673..7a454cc 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -256,7 +256,6 @@ class PromptAgent:
             if self.observation_type == "screenshot_a11y_tree":
                 _screenshot = previous_obs["screenshot"]
                 _linearized_accessibility_tree = previous_obs["accessibility_tree"]
-                logger.debug("LINEAR AT: %s", _linearized_accessibility_tree)
 
                 messages.append({
                     "role": "user",
@@ -343,6 +342,7 @@ class PromptAgent:
         if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
             base64_image = encode_image(obs["screenshot"])
             linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+            logger.debug("LINEAR AT: %s", linearized_accessibility_tree)
 
             if self.observation_type == "screenshot_a11y_tree":
                 self.observations.append({
@@ -376,6 +376,7 @@ class PromptAgent:
             })
         elif self.observation_type == "a11y_tree":
             linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+            logger.debug("LINEAR AT: %s", linearized_accessibility_tree)
 
             self.observations.append({
                 "screenshot": None,
@@ -423,6 +424,8 @@ class PromptAgent:
         # with open("messages.json", "w") as f:
         #     f.write(json.dumps(messages, indent=4))
 
+        #logger.info("PROMPT: %s", messages)
+
         response = self.call_llm({
             "model": self.model,
             "messages": messages,
@@ -522,6 +525,8 @@ class PromptAgent:
                 claude_messages[1]['content'].insert(0, claude_system_message_item)
                 claude_messages.pop(0)
 
+            logger.debug("CLAUDE MESSAGE: %s", repr(claude_messages))
+
             # headers = {
             #     "x-api-key": os.environ["ANTHROPIC_API_KEY"],
             #     "anthropic-version": "2023-06-01",
diff --git a/run.py b/run.py
index 5212bc0..2277d11 100644
--- a/run.py
+++ b/run.py
@@ -140,6 +140,7 @@ def test(
 
     env = DesktopEnv(
         path_to_vm=args.path_to_vm,
+        snapshot_name="Snapshot 35",
         action_space=agent.action_space,
         screen_size=(args.screen_width, args.screen_height),
         headless=args.headless,

From eeae1442cd209cb7fb7937fd94d6c1daf873346c Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Mon, 18 Mar 2024 20:42:57 +0800
Subject: [PATCH 14/40] Add execute timeout to server; Fix error examples

---
 desktop_env/evaluators/metrics/vscode.py      |  6 ++++++
 desktop_env/server/main.py                    |  2 +-
 .../ac9bb6cb-1888-43ab-81e4-a98a547918cd.json |  6 ++++++
 .../4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json |  4 ++--
 .../examples/multi_apps/demo.py               | 19 -------------------
 5 files changed, 15 insertions(+), 22 deletions(-)
 delete mode 100644 evaluation_examples/examples/multi_apps/demo.py

diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py
index 61976f1..d207aae 100644
--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -236,6 +236,9 @@ def check_html_background_image(src_path: str, rule: Dict = None) -> float:
     Check if the background image is correctly set.
     multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
     """
+    if not src_path:
+        return 0.0
+
     from bs4 import BeautifulSoup
     with open(src_path, 'r') as f:
         html_content = f.read()
@@ -252,6 +255,9 @@ def compare_result_files(src_path, tgt_path):
     Compare whether the content of two files are the same.
     multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85
     """
+    if not src_path or not tgt_path:
+        return 0.0
+
     with open(src_path, 'r') as f:
         src_content = f.read().strip()
     with open(tgt_path, 'r') as f:
diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index efa62c7..d53232e 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -63,7 +63,7 @@ def execute_command():
 
     # Execute the command without any safety checks.
     try:
-        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True)
+        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, timeout=120)
         return jsonify({
             'status': 'success',
             'output': result.stdout,
diff --git a/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json b/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json
index c0d6ba0..053421c 100644
--- a/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json
+++ b/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json
@@ -63,6 +63,12 @@
       "type": "vm_file",
       "path": "/home/user/Desktop/saa-format-guide.pptx",
       "dest": "saa-format-guide.pptx"
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "color": "red"
+      }
     }
   }
 }
diff --git a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
index 0a70b11..447a862 100644
--- a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
+++ b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
@@ -30,12 +30,12 @@
     ],
     "evaluator": {
         "func": "check_brightness_decrease_and_structure_sim",
-        "expected": {
+        "result": {
             "type": "vm_file",
             "path": "/home/user/Desktop/background.png",
             "dest": "background.png"
         },
-        "result": {
+        "expected": {
             "type": "cloud_file",
             "path": "https://drive.usercontent.google.com/download?id=13if1UwZ5ay6ADAVW2jp3rcyvAEBse6MJ&export=download&authuser=0&confirm=t&uuid=2ea03068-1874-4240-baa1-f8bb2f917a99&at=APZUnTXq6dVlASg819jCaI1A-rm2:1710136385956",
             "dest": "image_original.png"
diff --git a/evaluation_examples/examples/multi_apps/demo.py b/evaluation_examples/examples/multi_apps/demo.py
deleted file mode 100644
index ffa2b85..0000000
--- a/evaluation_examples/examples/multi_apps/demo.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import pandas as pd
-
-file_path = "/Users/lxc/Downloads/Speedtest.csv"
-# 找到csv第二行的第二个数据格里的值
-# with open(file_path, "r") as f:
-#     for i, line in enumerate(f):
-#         if i == 1:
-#             data = line.split(",")[1]
-#             break
-# print(data)
-
-with open(file_path, "r") as f:
-    reader = pd.read_csv(f, sep=',', header=None)
-    # for column in reader.columns:
-    #     if column.startswith("TEST_DATE"):
-    #         data_col = column
-    #         break
-    for data in reader['TEST_DATE']:
-        print(data)
\ No newline at end of file

From 1c9c5fd2ad8fdbe5d124b2a12818ff8770ef0cb4 Mon Sep 17 00:00:00 2001
From: rhythmcao <ruishengcao@gmail.com>
Date: Mon, 18 Mar 2024 20:51:53 +0800
Subject: [PATCH 15/40] fix
 multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json missing file problems:
 who delete it on googledrive???

---
 .../multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json b/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json
index 3d32ee5..0f1c8ac 100644
--- a/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json
+++ b/evaluation_examples/examples/multi_apps/51f5801c-18b3-4f25-b0c3-02f85507a078.json
@@ -9,7 +9,7 @@
             "parameters": {
               "files": [
                 {
-                  "url": "https://drive.usercontent.google.com/download?id=1e12nL_V7bffaLSocQ86EiGCdygzggWeu&export=download",
+                  "url": "https://drive.usercontent.google.com/download?id=1epTcblcYh8j_wFtA-aiXPIF2Oo1IVw8A&export=download",
                   "path": "/home/user/Desktop/Dickinson_Slides.pptx"
                 }
               ]
@@ -36,7 +36,7 @@
         },
         "expected": {
             "type": "cloud_file",
-            "path": "https://drive.usercontent.google.com/download?id=1Xl6tgQ0K5qA1BDA2fKTK2xFLzXwbtkZ6&export=download",
+            "path": "https://drive.usercontent.google.com/download?id=1vUvaQLJUtFgbZi7lSzl0y0TS_WecFczm&export=download",
             "dest": "notes_gold.docx"
         },
         "options": {

From f992d1f694540b9f2ccdc8f5adcf21262803a194 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Mon, 18 Mar 2024 21:43:35 +0800
Subject: [PATCH 16/40] Disable a11y tree temporarily

---
 lib_run_single.py | 18 +++++++++---------
 run.py            | 25 +++++++++++++------------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/lib_run_single.py b/lib_run_single.py
index d60fd7a..82b2dd3 100644
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -2,7 +2,7 @@ import datetime
 import json
 import logging
 import os
-import wandb
+# import wandb
 
 from wrapt_timeout_decorator import *
 
@@ -15,13 +15,13 @@ with open("./settings.json", "r") as file:
 time_limit = data["time_limit"]
 
 @timeout(time_limit, use_signals=False)
-def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores, run):
+def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
     agent.reset()
     obs = env.reset(task_config=example)
     done = False
     step_idx = 0
     env.controller.start_recording()
-    str_table = wandb.Table(columns=["Screenshot", "A11T", "Modle Response", "Action", "Action timestamp", "Done"])
+    # str_table = wandb.Table(columns=["Screenshot", "A11T", "Modle Response", "Action", "Action timestamp", "Done"])
     while not done and step_idx < max_steps:
         response, actions = agent.predict(
             instruction,
@@ -43,10 +43,10 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
                 _f.write(screenshot)
             # get a11tree and save to wandb
             thisrun_a11tree = env.controller.get_accessibility_tree()
-            str_table.add_data(wandb.Image(data_or_path=os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), caption=f"step_{step_idx + 1}_{action_timestamp}"),
-                            thisrun_a11tree,
-                            response, action, action_timestamp, done)
-            run.log({"Reward": reward})
+            # str_table.add_data(wandb.Image(data_or_path=os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), caption=f"step_{step_idx + 1}_{action_timestamp}"),
+            #                 thisrun_a11tree,
+            #                 response, action, action_timestamp, done)
+            # run.log({"Reward": reward})
             with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                 f.write(json.dumps({
                     "step_num": step_idx + 1,
@@ -62,11 +62,11 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
                 logger.info("The episode is done.")
                 break
         step_idx += 1
-    run.log({"str_trajectory": str_table})
+    # run.log({"str_trajectory": str_table})
     result = env.evaluate()
     logger.info("Result: %.2f", result)
     scores.append(result)
     with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
         f.write(f"{result}\n")
     env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
-    run.log({"Result": result})
+    # run.log({"Result": result})
diff --git a/run.py b/run.py
index 5212bc0..92e989a 100644
--- a/run.py
+++ b/run.py
@@ -8,7 +8,7 @@ import logging
 import os
 import random
 import sys
-import wandb
+# import wandb
 
 from tqdm import tqdm
 
@@ -52,7 +52,8 @@ logger = logging.getLogger("desktopenv.experiment")
 
 # wandb config
 ### set your wandb api key here
-wandb.login(key=os.environ.get("WANDB_API_KEY", None))
+# os.environ["WANDB_API_KEY"] = "48ec18fb4da7087238c6d6833eab9907565adbf3"
+# wandb.login(key=os.environ.get("WANDB_API_KEY", None))
 
 
 def config() -> argparse.Namespace:
@@ -147,8 +148,8 @@ def test(
 
     for domain in tqdm(test_all_meta, desc="Domain"):
         for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
-            run = wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}",
-                    name=f"{example_id}")
+            # run = wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}",
+            #         name=f"{example_id}")
             # example setting
             config_file = os.path.join(args.test_config_base_dir, f"examples/{domain}/{example_id}.json")
             with open(config_file, "r", encoding="utf-8") as f:
@@ -163,7 +164,7 @@ def test(
             # wandb each example config settings
             cfg_args["instruction"] = instruction
             cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
-            run.config.update(cfg_args)
+            # run.config.update(cfg_args)
 
             example_result_dir = os.path.join(
                 args.result_dir,
@@ -177,10 +178,10 @@ def test(
             # example start running
             try:
                 lib_run_single.run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir,
-                                                  scores, run)
+                                                  scores)
             except Exception as e:
                 logger.error(f"Exception in {domain}/{example_id}: {e}")
-                wandb.log({"Exception": wandb.Table(data=[[f"Exception in {domain}/{example_id}: {e}"]], columns=["Error"])})
+                # wandb.log({"Exception": wandb.Table(data=[[f"Exception in {domain}/{example_id}: {e}"]], columns=["Error"])})
                 env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
                 with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                     f.write(json.dumps({
@@ -188,11 +189,11 @@ def test(
                     }))
                     f.write("\n")
             # wandb settings
-            os.mkdir(os.path.join(wandb.run.dir, "results/"))
-            for file in os.listdir(example_result_dir):
-                # move file to just under the root dir
-                os.rename(os.path.join(example_result_dir, file), os.path.join(wandb.run.dir, f"./results/{file}"))
-            wandb.finish()
+            # os.mkdir(os.path.join(wandb.run.dir, "results/"))
+            # for file in os.listdir(example_result_dir):
+            #     # move file to just under the root dir
+            #     os.rename(os.path.join(example_result_dir, file), os.path.join(wandb.run.dir, f"./results/{file}"))
+            # wandb.finish()
 
     env.close()
     logger.info(f"Average score: {sum(scores) / len(scores)}")

From 866ac3fbd9c6ce9255c503a9a1f52e7023511f5c Mon Sep 17 00:00:00 2001
From: Fangyu Lei <55661995+lfy79001@users.noreply.github.com>
Date: Mon, 18 Mar 2024 21:43:59 +0800
Subject: [PATCH 17/40] Update requirements.txt  add wandb and
 wrapt_timeout_decorator

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 2c595b9..9faae48 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -48,4 +48,5 @@ easyocr
 borb
 pypdf2
 pdfplumber
-
+wandb
+wrapt_timeout_decorator

From 4671455b567c438522704d9f09ae74213dd73789 Mon Sep 17 00:00:00 2001
From: BlankCheng <913501223@qq.com>
Date: Mon, 18 Mar 2024 22:16:04 +0800
Subject: [PATCH 18/40] Fix eval func

---
 desktop_env/evaluators/metrics/gimp.py           | 16 ++++++++++++----
 .../d16c99dc-2a1e-46f2-b350-d97c86c85c15.json    | 16 ++++++++--------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/desktop_env/evaluators/metrics/gimp.py b/desktop_env/evaluators/metrics/gimp.py
index e46f5d5..c1208af 100644
--- a/desktop_env/evaluators/metrics/gimp.py
+++ b/desktop_env/evaluators/metrics/gimp.py
@@ -199,7 +199,7 @@ def structure_check_by_mse(img1, img2, threshold=0.03):
 
 def structure_check_by_ssim(img1, img2, threshold=0.9):
     """Check if two images are approximately the same by SSIM"""
-    similarity = ssim(np.array(img1), np.array(img2), multichannel=True)
+    similarity = ssim(np.array(img1), np.array(img2), multichannel=True, channel_axis=-1)
     print("SSIM: ", similarity)
     return similarity >= threshold
 
@@ -430,11 +430,11 @@ def check_image_size(src_path, rule):
     img = Image.open(src_path)
 
     # Check the size
-    if rule["height"] is not None:
+    if rule.get("height", None) is not None:
         height_same = img.size[1] == rule["height"]
     else:
         height_same = True
-    if rule["width"] is not None:
+    if rule.get("width", None) is not None:
         width_same = img.size[0] == rule["width"]
     else:
         width_same = True
@@ -607,4 +607,12 @@ if __name__ == "__main__":
     rule = {
         "max_size": 500000
     }
-    print(check_image_file_size(src_path, rule))
\ No newline at end of file
+    print(check_image_file_size(src_path, rule))
+
+    src_path = "../../../cache/d16c99dc-2a1e-46f2-b350-d97c86c85c15/resized.png"
+    tgt_path = "../../../cache/d16c99dc-2a1e-46f2-b350-d97c86c85c15/dog_with_background.png"
+    rule = {
+        "height": 512
+    }
+    print(check_image_size(src_path, rule))
+    print(check_structure_sim_resized(src_path, tgt_path))
\ No newline at end of file
diff --git a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json
index 3029c0c..ca22630 100644
--- a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json
+++ b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json
@@ -86,13 +86,14 @@
     ],
     "func": [
       "check_image_size",
-      "check_structure_sim"
+      "check_structure_sim_resized"
     ],
     "expected": [
       {
-        "type": "vm_file",
-        "path": "/home/user/Desktop/dog_with_background.png",
-        "dest": "dog_with_background.png"
+        "type": "rule",
+        "rules": {
+          "height": 512
+        }
       },
       {
         "type": "vm_file",
@@ -102,10 +103,9 @@
     ],
     "result": [
       {
-        "type": "rule",
-        "rules": {
-          "height": 512
-        }
+        "type": "vm_file",
+        "path": "/home/user/Desktop/dog_with_background.png",
+        "dest": "dog_with_background.png"
       },
       {
         "type": "vm_file",

From 8e760fd45045d9556a9a46fafcf2b995e5ff8006 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Tue, 19 Mar 2024 08:57:05 +0800
Subject: [PATCH 19/40] Disable wandb temporarily, speedup the environment step
 speed by remove useless a11y tree re-get and terminal output

---
 desktop_env/envs/desktop_env.py | 2 +-
 lib_run_single.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index fee3f37..7dd70b6 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -285,7 +285,7 @@ class DesktopEnv(gym.Env):
         observation = {
             "screenshot": self._get_obs(),
             "accessibility_tree": self.controller.get_accessibility_tree(),
-            "terminal": self.controller.get_terminal_output(),
+            # "terminal": self.controller.get_terminal_output(),
             "instruction": self.instruction
         }
 
diff --git a/lib_run_single.py b/lib_run_single.py
index 82b2dd3..daa374e 100644
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -42,7 +42,7 @@ def run_single_example(agent, env, example, max_steps, instruction, args, exampl
                     screenshot = __f.read()
                 _f.write(screenshot)
             # get a11tree and save to wandb
-            thisrun_a11tree = env.controller.get_accessibility_tree()
+            # thisrun_a11tree = env.controller.get_accessibility_tree()
             # str_table.add_data(wandb.Image(data_or_path=os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), caption=f"step_{step_idx + 1}_{action_timestamp}"),
             #                 thisrun_a11tree,
             #                 response, action, action_timestamp, done)

From 41db4b44e78596351e99fb76f0af17fdf1abd9ed Mon Sep 17 00:00:00 2001
From: Fangyu Lei <55661995+lfy79001@users.noreply.github.com>
Date: Tue, 19 Mar 2024 12:06:33 +0800
Subject: [PATCH 20/40] Update agent.py mixtral

---
 mm_agents/agent.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index ff92673..c769827 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -568,7 +568,7 @@ class PromptAgent:
             top_p = payload["top_p"]
             temperature = payload["temperature"]
 
-            misrtal_messages = []
+            mistral_messages = []
 
             for i, message in enumerate(messages):
                 mistral_message = {
@@ -579,13 +579,8 @@ class PromptAgent:
                 for part in message["content"]:
                     mistral_message['content'] = part['text'] if part['type'] == "text" else ""
 
-                misrtal_messages.append(mistral_message)
+                mistral_messages.append(mistral_message)
 
-            # openai.api_base = "http://localhost:8000/v1"
-            # response = openai.ChatCompletion.create(
-            #     messages=misrtal_messages,
-            #     model="Mixtral-8x7B-Instruct-v0.1"
-            # )
 
             from openai import OpenAI
 
@@ -593,12 +588,23 @@ class PromptAgent:
                             base_url='https://api.together.xyz',
                             )
             logger.info("Generating content with Mistral model: %s", self.model)
-
-            response = client.chat.completions.create(
-                messages=misrtal_messages,
-                model=self.model,
-                max_tokens=max_tokens
-            )
+            
+            flag = 0
+            while True:
+                try:
+                    if flag > 20: break
+                    response = client.chat.completions.create(
+                        messages=mistral_messages,
+                        model=self.model,
+                        max_tokens=max_tokens
+                    )
+                    break
+                except:
+                    if flag == 0:
+                        mistral_messages = [mistral_messages[0]] + mistral_messages[-1:]
+                    else:
+                        mistral_messages[-1]["content"] = ' '.join(mistral_messages[-1]["content"].split()[:-500])
+                    flag = flag + 1
 
             try:
                 return response.choices[0].message.content

From b5d58b8ecd12cf873b240dcbeb86f65a3f3fa80b Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Tue, 19 Mar 2024 17:43:34 +0800
Subject: [PATCH 21/40] ver Mar19th

a tiny fix
---
 mm_agents/accessibility_tree_wrap/heuristic_retrieve.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
index e37f614..9611ea3 100644
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -40,7 +40,7 @@ def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:
                or node.tag.endswith("textfield")\
                or node.tag.endswith("textarea")\
                or node.tag.endswith("menu")\
-               or node.tag in [ "alert", "canvas", "check-box"
+               or node.tag in { "alert", "canvas", "check-box"
                               , "combo-box", "entry", "icon"
                               , "image", "paragraph", "scroll-bar"
                               , "section", "slider", "static"
@@ -48,7 +48,7 @@ def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:
                               , "netuiribbontab", "start", "trayclockwclass"
                               , "traydummysearchcontrol", "uiimage", "uiproperty"
                               , "uiribboncommandbar"
-                              ]
+                              }
     keeps = keeps and ( platform=="ubuntu"\
                         and node.get("{{{:}}}showing".format(state_ns), "false")=="true"\
                         and node.get("{{{:}}}visible".format(state_ns), "false")=="true"\

From 4df088e2ad982f857fc5276b08ac3d57eca971bc Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Tue, 19 Mar 2024 18:41:55 +0800
Subject: [PATCH 22/40] ver Mar19thv2

supplemented at info back for som setting
---
 .../heuristic_retrieve.py                     | 35 +++++++++++---
 mm_agents/agent.py                            | 48 ++++++++++++-------
 mm_agents/prompts.py                          |  2 +-
 3 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
index 9611ea3..934d8fd 100644
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -2,7 +2,7 @@ import xml.etree.ElementTree as ET
 
 from PIL import Image, ImageDraw, ImageFont
 
-from typing import Tuple
+from typing import Tuple, List
 
 def find_leaf_nodes(xlm_file_str):
     if not xlm_file_str:
@@ -66,7 +66,7 @@ def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:
 
     coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(component_ns), "(-1, -1)"))
     sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(component_ns), "(-1, -1)"))
-    keeps = keeps and coordinates[0]>0 and coordinates[1]>0 and sizes[0]>0 and sizes[1]>0
+    keeps = keeps and coordinates[0]>=0 and coordinates[1]>=0 and sizes[0]>0 and sizes[1]>0
     return keeps
 
 def filter_nodes(root: ET, platform="ubuntu", check_image=False):
@@ -86,6 +86,7 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
     draw = ImageDraw.Draw(image)
     marks = []
     drew_nodes = []
+    text_informations: List[str] = ["index\ttag\tname\ttext"]
 
     try:
         # Adjust the path to the font file you have or use a default one
@@ -135,18 +136,38 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
                 #draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
                 draw.rectangle(text_bbox, fill='black')
                 draw.text(text_position, str(index), font=font, anchor="lb", fill="white")
-                index += 1
 
                 # each mark is an x, y, w, h tuple
                 marks.append([coords[0], coords[1], size[0], size[1]])
                 drew_nodes.append(_node)
 
+                if _node.text:
+                    node_text = ( _node.text if '"' not in _node.text\
+                             else '"{:}"'.format(_node.text.replace('"', '""'))
+                                )
+                elif _node.get("{uri:deskat:uia.windows.microsoft.org}class", "").endswith("EditWrapper") \
+                        and _node.get("{uri:deskat:value.at-spi.gnome.org}value"):
+                    node_text: str = _node.get("{uri:deskat:value.at-spi.gnome.org}value")
+                    node_text = (node_text if '"' not in node_text\
+                             else '"{:}"'.format(node_text.replace('"', '""'))
+                                )
+                else:
+                    node_text = '""'
+                text_information: str = "{:d}\t{:}\t{:}\t{:}"\
+                                            .format( index, _node.tag
+                                                   , _node.get("name", "")
+                                                   , node_text
+                                                   )
+                text_informations.append(text_information)
+
+                index += 1
+
             except ValueError:
                 pass
 
     # Save the result
     image.save(output_image_file_path)
-    return marks, drew_nodes
+    return marks, drew_nodes, "\n".join(text_informations)
 
 
 def print_nodes_with_indent(nodes, indent=0):
@@ -157,12 +178,12 @@ def print_nodes_with_indent(nodes, indent=0):
 
 if __name__ == '__main__':
     import json
-    with open('selection_sorted(imaged).xml', 'r', encoding='utf-8') as f:
+    with open('3.xml', 'r', encoding='utf-8') as f:
         xml_file_str = f.read()
     filtered_nodes = filter_nodes(ET.fromstring(xml_file_str))
     print(len(filtered_nodes))
-    masks = draw_bounding_boxes( filtered_nodes, 'selection_sorted(imaged).png'
-                               , 'selection_sorted(imaged).ai.png'
+    masks = draw_bounding_boxes( filtered_nodes, '3.a.png'
+                               , '3.png'
                                )
 
     # print(masks)
diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index 9613f44..f2d4b5c 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -37,27 +37,36 @@ def linearize_accessibility_tree(accessibility_tree):
     # leaf_nodes = find_leaf_nodes(accessibility_tree)
     filtered_nodes = filter_nodes(ET.fromstring(accessibility_tree))
 
-    linearized_accessibility_tree = "tag\tname\ttext\tposition (top-left x&y)\tsize (w&h)\n"
+    linearized_accessibility_tree = ["tag\tname\ttext\tposition (top-left x&y)\tsize (w&h)"]
     # Linearize the accessibility tree nodes into a table format
 
     for node in filtered_nodes:
-        linearized_accessibility_tree += node.tag + "\t"
-        linearized_accessibility_tree += node.attrib.get('name') + "\t"
+        #linearized_accessibility_tree += node.tag + "\t"
+        #linearized_accessibility_tree += node.attrib.get('name') + "\t"
         if node.text:
-            linearized_accessibility_tree += (node.text if '"' not in node.text else '"{:}"'.format(
-                node.text.replace('"', '""'))) + "\t"
+            text = ( node.text if '"' not in node.text\
+                else '"{:}"'.format(node.text.replace('"', '""'))
+                   )
         elif node.get("{uri:deskat:uia.windows.microsoft.org}class", "").endswith("EditWrapper") \
                 and node.get("{uri:deskat:value.at-spi.gnome.org}value"):
             text: str = node.get("{uri:deskat:value.at-spi.gnome.org}value")
-            linearized_accessibility_tree += (text if '"' not in text else '"{:}"'.format(
-                text.replace('"', '""'))) + "\t"
+            text = (text if '"' not in text\
+                else '"{:}"'.format(text.replace('"', '""'))
+                   )
         else:
-            linearized_accessibility_tree += '""\t'
-        linearized_accessibility_tree += node.attrib.get(
-            '{uri:deskat:component.at-spi.gnome.org}screencoord', "") + "\t"
-        linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size', "") + "\n"
+            text = '""'
+        #linearized_accessibility_tree += node.attrib.get(
+                #, "") + "\t"
+        #linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size', "") + "\n"
+        linearized_accessibility_tree.append(
+                "{:}\t{:}\t{:}\t{:}\t{:}".format(
+                    node.tag, node.get("name", ""), text
+                  , node.get('{uri:deskat:component.at-spi.gnome.org}screencoord', "")
+                  , node.get('{uri:deskat:component.at-spi.gnome.org}size', "")
+                  )
+              )
 
-    return linearized_accessibility_tree
+    return "\n".join(linearized_accessibility_tree)
 
 
 def tag_screenshot(screenshot, accessibility_tree):
@@ -68,9 +77,9 @@ def tag_screenshot(screenshot, accessibility_tree):
     # nodes = filter_nodes(find_leaf_nodes(accessibility_tree))
     nodes = filter_nodes(ET.fromstring(accessibility_tree), check_image=True)
     # Make tag screenshot
-    marks, drew_nodes = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)
+    marks, drew_nodes, element_list = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)
 
-    return marks, drew_nodes, tagged_screenshot_file_path
+    return marks, drew_nodes, tagged_screenshot_file_path, element_list
 
 
 def parse_actions_from_string(input_string):
@@ -395,11 +404,13 @@ class PromptAgent:
             })
         elif self.observation_type == "som":
             # Add som to the screenshot
-            masks, drew_nodes, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
+            masks, drew_nodes, tagged_screenshot, linearized_accessibility_tree = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
             base64_image = encode_image(tagged_screenshot)
+            logger.debug("LINEAR AT: %s", linearized_accessibility_tree)
 
             self.observations.append({
-                "screenshot": base64_image
+                "screenshot": base64_image,
+                "accessibility_tree": linearized_accessibility_tree
             })
 
             messages.append({
@@ -407,7 +418,8 @@ class PromptAgent:
                 "content": [
                     {
                         "type": "text",
-                        "text": "Given the tagged screenshot as below. What's the next step that you will do to help with the task?"
+                        "text": "Given the tagged screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                            linearized_accessibility_tree)
                     },
                     {
                         "type": "image_url",
@@ -774,7 +786,7 @@ class PromptAgent:
             if response.status_code == HTTPStatus.OK:
                 try:
                     return response.json()['output']['choices'][0]['message']['content']
-                except Exception as e:
+                except Exception:
                     return ""
             else:
                 print(response.code)  # The error code.
diff --git a/mm_agents/prompts.py b/mm_agents/prompts.py
index 462aac7..c609a66 100644
--- a/mm_agents/prompts.py
+++ b/mm_agents/prompts.py
@@ -801,7 +801,7 @@ You CAN predict multiple actions at one step, but you should only return one act
 SYS_PROMPT_IN_SOM_OUT_TAG = """
 You are an agent which follow my instruction and perform desktop computer tasks as instructed.
 You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
-For each step, you will get an observation of the desktop by a screenshot with interact-able elements marked with numerical tags. And you will predict the action of the computer based on the image.
+For each step, you will get an observation of the desktop by 1) a screenshot with interact-able elements marked with numerical tags; and 2) accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the image and test information.
 
 You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot.
 You can replace x, y in the code with the tag of the element you want to operate with. such as:

From ace5842505fa836214d7451712d4a7c71923ee2c Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Tue, 19 Mar 2024 18:57:47 +0800
Subject: [PATCH 23/40] Fix typo

---
 mm_agents/download_ckpt.sh | 3 ---
 mm_agents/prompts.py       | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)
 delete mode 100644 mm_agents/download_ckpt.sh

diff --git a/mm_agents/download_ckpt.sh b/mm_agents/download_ckpt.sh
deleted file mode 100644
index 146fcea..0000000
--- a/mm_agents/download_ckpt.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-wget https://github.com/UX-Decoder/Semantic-SAM/releases/download/checkpoint/swinl_only_sam_many2many.pth
-wget https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v1.pt
-wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
\ No newline at end of file
diff --git a/mm_agents/prompts.py b/mm_agents/prompts.py
index c609a66..3a916d9 100644
--- a/mm_agents/prompts.py
+++ b/mm_agents/prompts.py
@@ -801,7 +801,7 @@ You CAN predict multiple actions at one step, but you should only return one act
 SYS_PROMPT_IN_SOM_OUT_TAG = """
 You are an agent which follow my instruction and perform desktop computer tasks as instructed.
 You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
-For each step, you will get an observation of the desktop by 1) a screenshot with interact-able elements marked with numerical tags; and 2) accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the image and test information.
+For each step, you will get an observation of the desktop by 1) a screenshot with interact-able elements marked with numerical tags; and 2) accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the image and text information.
 
 You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot.
 You can replace x, y in the code with the tag of the element you want to operate with. such as:

From 080035f2100a3442390bfd1ed7b2a1a26f19db6c Mon Sep 17 00:00:00 2001
From: Tianbao Xie <47296835+Timothyxxx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 19:49:02 +0800
Subject: [PATCH 24/40] Update README.md

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 6262044..948c13b 100644
--- a/README.md
+++ b/README.md
@@ -23,8 +23,10 @@ Please refer to [guidance](https://docs.google.com/document/d/1KBdeZwmZs2Vi_Wsnn
 2. Install the environment package, download the examples and the virtual machine image.
 For x86_64 Linux or Windows, you can install the environment package and download the examples and the virtual machine image by running the following commands:
 ```bash
-pip install desktop-env
-gdown xxxx
+git clone https://github.com/xlang-ai/DesktopEnv
+cd DesktopEnv
+pip install -r requirements.txt
+gdown --id 1HX5gcf7UeyR-2UmiA15Q9U-Wr6E6Gio8 --file
 vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
 vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
 ```
@@ -89,4 +91,4 @@ If you find this environment useful, please consider citing our work:
   journal={arXiv preprint arXiv:xxxx.xxxx},
   year={2024}
 }
-```
\ No newline at end of file
+```

From 25dae64fa6c22bc672b48bb0bdaa1b3f58742f3c Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Tue, 19 Mar 2024 22:31:11 +0800
Subject: [PATCH 25/40] ver Mar19th

partial windows task configs
---
 .../46407397-a7d5-4c6b-92c6-dbe038b1457b.json | 132 +++++++++++++
 .../6d72aad6-187a-4392-a4c4-ed87269c51cf.json |  18 ++
 .../74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json | 174 ++++++++++++++++++
 .../897e3b53-5d4d-444b-85cb-2cdc8a97d903.json |  97 ++++++++++
 .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json | 112 +++++++++++
 .../c867c42d-a52d-4a24-8ae3-f75d256b5618.json |  98 ++++++++++
 .../da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json |  73 ++++++++
 .../f918266a-b3e0-4914-865d-4faa564f1aef.json |  47 +++++
 8 files changed, 751 insertions(+)
 create mode 100644 evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/6d72aad6-187a-4392-a4c4-ed87269c51cf.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json

diff --git a/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
new file mode 100644
index 0000000..567d642
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
@@ -0,0 +1,132 @@
+{
+    "id": "46407397-a7d5-4c6b-92c6-dbe038b1457b",
+    "snapshot": "chrome",
+    "instruction": "Help me export charts, graph or other images from docx files received in email \"Lecture Document\" in Notes folder and upload these png files to the figures/ folder in Google Drive for later use (use numbers to name them).",
+    "source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive",
+    "config": [
+        {
+            "type": "googledrive",
+            "parameters": {
+                "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+                "operation": ["delete"],
+                "args": [
+                    {
+                        "query": "title = 'figures' and 'root' in parents and mimeType = 'application/vnd.google-apps.folder'",
+                        "trash": false
+                    }
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                "google-chrome",
+                "--remote-debugging-port=1337"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": "nc -l -p 9222 |nc 127.0.0.1 1337",
+				"shell": true
+            }
+        },
+        {
+            "type": "chrome_open_tabs",
+            "parameters": {
+                "urls_to_open": [
+                    "https://news.google.com",
+                    "https://x.com"
+                ]
+            }
+        },
+        {
+            "type": "login",
+            "parameters": {
+                "settings_file": "evaluation_examples/settings/google/settings.json",
+                "platform": "googledrive"
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=18jdi0OanMtAQenm4ODTivsxTSzdj4HUV&export=download&authuser=0&confirm=t&uuid=e858d3cc-4535-4419-a651-8856ac517d19&at=APZUnTW7g4ygfrkKTPBWCO13twRj:1706611460571",
+                        "path": "/home/user/thunderbird-profile.tar.gz"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "execute",
+            "parameters": {
+                "command": [
+                "tar",
+                "-xz",
+                "--recursive-unlink",
+                "-f",
+                "/home/user/thunderbird-profile.tar.gz",
+                "-C",
+                "/home/user/"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "/usr/bin/thunderbird"
+                ]
+            }
+        }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "thunderbird",
+        "chrome"
+    ],
+    "evaluator": {
+        "func": "compare_image_list",
+        "result": {
+            "type": "googledrive_file",
+            "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+            "query_list": [
+                [
+                    "title = 'figures' and trashed = false and 'root' in parents and mimeType = 'application/vnd.google-apps.folder'",
+                    "title = '1.png' and trashed = false"
+                ],
+                [
+                    "title = 'figures' and trashed = false and 'root' in parents and mimeType = 'application/vnd.google-apps.folder'",
+                    "title = '2.png' and trashed = false"
+                ],
+                [
+                    "title = 'figures' and trashed = false and 'root' in parents and mimeType = 'application/vnd.google-apps.folder'",
+                    "title = '3.png' and trashed = false"
+                ]
+            ],
+            "dest": [
+                "1.png",
+                "2.png",
+                "3.png"
+            ]
+        },
+        "expected": {
+            "type": "cloud_file",
+            "path": [
+                "https://drive.usercontent.google.com/download?id=19J5tzWjx9hdo-n0MC3upzAntVMa8WUgk&export=download&authuser=0&confirm=t&uuid=be790579-8db9-4bd2-a757-beb27af386af&at=APZUnTVM2PjNDXhlwFZ6WAFdNVsD:1706497547717",
+                "https://drive.usercontent.google.com/download?id=1S04RpR5dk80LylIYGvA4e3sAUBd6wdlQ&export=download&authuser=0&confirm=t&uuid=b302de03-04f7-455c-ab0c-b3cbbeb6929a&at=APZUnTVD8zMZGO1_GWaFUm1cNXul:1706497555463",
+                "https://drive.usercontent.google.com/download?id=11NRLh93RTzEd0Cy-cYwMyNJSFG7-vP9c&export=download&authuser=0&confirm=t&uuid=02500115-dea3-481a-af4f-a723d9a62169&at=APZUnTW9-gENlsyfdIPA4PTA0emh:1706497560874"
+            ],
+            "dest": [
+                "1_gold.png",
+                "2_gold.png",
+                "3_gold.png"
+            ],
+            "multi": true,
+            "gives": [0, 1, 2]
+        }
+    }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/6d72aad6-187a-4392-a4c4-ed87269c51cf.json b/evaluation_examples/examples/Windows/multi_app/6d72aad6-187a-4392-a4c4-ed87269c51cf.json
new file mode 100644
index 0000000..991909a
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/6d72aad6-187a-4392-a4c4-ed87269c51cf.json
@@ -0,0 +1,18 @@
+{
+	"id": "6d72aad6-187a-4392-a4c4-ed87269c51cf",
+	"snapshot": "libreoffice_calc",
+	"instruction": "Could you please converting MS Office PowerPoint presentation to video and play it with VLC?",
+	"source": "https://superuser.com/questions/923171/converting-openoffice-impress-presentation-to-video-without-screen-recording",
+	"config": [
+    ],
+	"trajectory": "trajectories/6d72aad6-187a-4392-a4c4-ed87269c51cf",
+	"related_apps": [
+		"excel",
+		"powerpoint",
+		"word",
+		"vlc"
+    ],
+	"evaluator": {
+		"func": "infeasible"
+	}
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json b/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
new file mode 100644
index 0000000..abe1425
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
@@ -0,0 +1,174 @@
+{
+	"id": "74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
+	"snapshot": "chrome",
+	"instruction": "Help me to set up an initial web extension project with help of the web tool, tagging it \"happy-extension v0.0.1\". Leave description blank for now. Include a background script and browser action, while other features are not required. Remember to unzip the auto-generated folder into \"~/Projects\".",
+	"source": "authors",
+	"config": [
+		{
+			"type": "launch",
+			"parameters": {
+				"command": [
+					"google-chrome",
+					"--remote-debugging-port=1337"
+				]
+			}
+		},
+		{
+			"type": "launch",
+			"parameters": {
+				"command": [
+					"socat",
+					"tcp-listen:9222,fork",
+					"tcp:localhost:1337"
+				]
+			}
+		},
+		{
+			"type": "chrome_open_tabs",
+			"parameters": {
+				"urls_to_open": [
+					"https://webext.eu"
+				]
+			}
+		},
+		{
+			"type": "execute",
+			"parameters": {
+				"command": [
+					"mkdir",
+					"-p",
+					"/home/user/Projects"
+				]
+			}
+		},
+		{
+			"type": "launch",
+			"parameters": {
+				"command": [
+					"nautilus",
+					"/home/user/Projects"
+				]
+			}
+		}
+	],
+	"trajectory": "trajectories/74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
+	"related_apps": [
+		"chrome",
+		"os"
+	],
+	"evaluator": {
+		"func": [
+			"check_json",
+			"diff_text_file",
+			"diff_text_file",
+			"diff_text_file",
+			"diff_text_file"
+		],
+		"result": [
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/manifest.json",
+				"dest": "manifest.json"
+			},
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/background_script.js",
+				"dest": "background_script.js"
+			},
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/browserAction/index.html",
+				"dest": "index.html"
+			},
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/browserAction/style.css",
+				"dest": "style.css"
+			},
+			{
+				"type": "vm_file",
+				"path": "/home/user/Projects/happy-extension/browserAction/script.js",
+				"dest": "script.js"
+			}
+		],
+		"expected": [
+			{
+				"type": "rule",
+				"rules": {
+					"expect": [
+						{
+							"key": [
+								"name"
+							],
+							"method": "eq",
+							"ref": "happy-extension"
+						},
+						{
+							"key": [
+								"version"
+							],
+							"method": "eq",
+							"ref": "0.0.1"
+						},
+						{
+							"key": [
+								"background",
+								"scripts"
+							],
+							"method": "eq",
+							"ref": [
+								"background_script.js"
+							]
+						},
+						{
+							"key": [
+								"browser_action",
+								"default_icon"
+							],
+							"method": "eq",
+							"ref": {
+								"64": "icons/icon.png"
+							}
+						},
+						{
+							"key": [
+								"browser_action",
+								"default_popup"
+							],
+							"method": "eq",
+							"ref": "browserAction/index.html"
+						},
+						{
+							"key": [
+								"browser_action",
+								"default_title"
+							],
+							"method": "eq",
+							"ref": "happy-extension"
+						}
+					]
+				}
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=1t5Llhn6seDUXVs-eILu6CjwFEQL9Z5Qm&export=download",
+				"dest": "background_script.js"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=19fMAsWd6q4ElLdOceJ-otHbxRJA_pc_U&export=download",
+				"dest": "index.html"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=1fwfiRPjdug8uh6z23RFO1JtlGH_L_Hl_&export=download",
+				"dest": "style.css"
+			},
+			{
+				"type": "cloud_file",
+				"path": "https://drive.google.com/uc?id=14YYnhCfRtHQNk8M4fBPaUQeteoFMGBsA&export=download",
+				"dest": "script.js"
+			}
+		]
+	}
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json b/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
new file mode 100644
index 0000000..184049d
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
@@ -0,0 +1,97 @@
+{
+    "id": "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
+    "snapshot": "chrome",
+    "instruction": "I have a LibreOffice Writer file form.docx on the desktop. Help me convert it to PDF format and store the PDF in the forms/ folder in my Google Drive.",
+    "source": "https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive",
+    "config": [
+        {
+            "type": "googledrive",
+            "parameters": {
+                "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+                "operation": ["delete"],
+                "args": [
+                    {
+                        "query": "title = 'form.pdf' or title = 'form.docx' or title = 'form.docx.pdf' or title = 'forms'",
+                        "trash": false
+                    }
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "google-chrome",
+                    "--remote-debugging-port=1337"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "socat",
+                    "tcp-listen:9222,fork",
+                    "tcp:localhost:1337"
+                ]
+            }
+        },
+        {
+            "type": "chrome_open_tabs",
+            "parameters": {
+                "urls_to_open": [
+                    "https://www.zhihu.com/",
+                    "https://www.coursera.org/",
+                    "https://www.deepl.com",
+                    "https://www.wikidata.org/wiki/Wikidata:Main_Page"
+                ]
+            }
+        },
+        {
+            "type": "login",
+            "parameters": {
+                "settings_file": "evaluation_examples/settings/google/settings.json",
+                "platform": "googledrive"
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+              "files": [
+                {
+                  "url": "https://drive.usercontent.google.com/download?id=18TvzE8jnULU2g9XJsT-TaPEKcLGNVfu0&export=download&authuser=0&confirm=t&uuid=d914e031-9aa6-431b-81c0-73fcb87af027&at=APZUnTUx56WM_I3gnhHo-eZX__kx:1706158167271",
+                  "path": "/home/user/Desktop/form.docx"
+                }
+              ]
+            }
+          },
+          {
+            "type": "open",
+            "parameters": {
+              "path": "/home/user/Desktop/form.docx"
+            }
+          }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "libreoffice_writer",
+        "chrome"
+    ],
+    "evaluator": {
+        "func": "compare_pdfs",
+        "result": {
+            "type": "googledrive_file",
+            "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
+            "query": [
+                "title = 'forms' and mimeType = 'application/vnd.google-apps.folder' and trashed = false",
+                "( title = 'form.pdf' or title = 'form.docx.pdf' ) and trashed = false"
+            ],
+            "dest": "form.pdf"
+        },
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=118wb7zmG8yP7DS1cImP9-GcOeKib3fLp&export=download&authuser=0&confirm=t&uuid=b82542fa-7731-4014-8ebc-d940f0fb83fe&at=APZUnTVkmL9rk3EpA0Ak5JLPEnJZ:1706101389421",
+            "dest": "form_gold.pdf"
+        }
+    }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json b/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
new file mode 100644
index 0000000..c869428
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
@@ -0,0 +1,112 @@
+{
+	"id": "b5062e3e-641c-4e3a-907b-ac864d2e7652",
+	"snapshot": "libreoffice_calc",
+	"instruction": "Please help me to extract the name, e-mail, and affiliation of the first author from each paper in the folder and organize them in an Excel table. Include headers for each field. Sort the authors by their full names alphabetically and save the table as \"~/authors.xlsx\".",
+	"source": "authors",
+	"config": [
+		{
+			"type": "command",
+			"parameters": {
+				"command": [
+					"mkdir",
+					"-p",
+					"/home/user/Documents/Papers"
+				]
+			}
+		},
+		{
+			"type": "download",
+			"parameters": {
+				"files": [
+					{
+						"path": "/home/user/Documents/Papers/zhang_appagent.pdf",
+						"url": "https://arxiv.org/pdf/2312.13771.pdf"
+					},
+					{
+						"path": "/home/user/Documents/Papers/niu_screenagent.pdf",
+						"url": "https://arxiv.org/pdf/2402.07945.pdf"
+					},
+					{
+						"path": "/home/user/Documents/Papers/koh_visualwebarena.pdf",
+						"url": "https://arxiv.org/pdf/2401.13649.pdf"
+					},
+					{
+						"path": "/home/user/Documents/Papers/deng_mind2web.pdf",
+						"url": "https://papers.nips.cc/paper_files/paper/2023/file/5950bf290a1570ea401bf98882128160-Paper-Datasets_and_Benchmarks.pdf"
+					}
+				]
+			}
+		},
+		{
+			"type": "launch",
+			"parameters": {
+				"command": [
+					"nautilus",
+					"/home/user/Documents/Papers"
+				]
+			}
+		}
+	],
+	"trajectory": "trajectories/b5062e3e-641c-4e3a-907b-ac864d2e7652",
+	"related_apps": [
+		"libreoffice_calc",
+		"os"
+	],
+	"evaluator": {
+		"postconfig": [
+			{
+				"type": "execute",
+				"parameters": {
+					"command": [
+						"libreoffice",
+						"--convert-to",
+						"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
+						"--outdir",
+						"/home/user",
+						"/home/user/authors.xlsx"
+					]
+				}
+			}
+		],
+		"func": "compare_table",
+		"result": {
+			"type": "vm_file",
+			"path": "/home/user/authors.xlsx",
+			"dest": "authors.xlsx"
+		},
+		"expected": {
+			"type": "cloud_file",
+			"path": "https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download",
+			"dest": "authors-gt.xlsx"
+		},
+		"options": {
+			"rules": [
+				{
+					"type": "sheet_fuzzy",
+					"sheet_idx0": "RNSheet1",
+					"sheet_idx1": "ENSheet1",
+					"rules": [
+						{
+							"range": ["A1:C1"],
+							"type": "includes",
+							"ignore_case": true
+						},
+						{
+							"range": ["A2:B5"],
+							"type": "exact_match",
+							"trim_leadings": " ",
+							"trim_trailings": " "
+						},
+						{
+							"range": ["C2:C5"],
+							"type": "exact_match",
+							"trim_leadings": " ",
+							"trim_trailings": " ",
+							"ignore_case": true
+						}
+					]
+				}
+			]
+		}
+	}
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json b/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
new file mode 100644
index 0000000..2a8162d
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
@@ -0,0 +1,98 @@
+{
+    "id": "c867c42d-a52d-4a24-8ae3-f75d256b5618",
+    "snapshot": "thunderbird",
+    "instruction": "Please assist me in exporting my contacts of Personal Address Book from Thunderbird into contacts.csv file in the desktop and convert it to .xlsx with Libreoffice Calc.",
+    "source": "https://www.sync.blue/en/sync/mozilla-thunderbird/google-sheets/",
+    "config": [
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "libreoffice",
+                    "--calc"
+                ]
+            }
+        },
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1wKXmJ14dnxSzdy9ZF_ePWU7zpevY6Dry&export=download&authuser=0&confirm=t&uuid=9b476c95-8eee-4a9a-8cee-c3620d5ce250&at=APZUnTUzDeeeMNr34DB1vEnBK6N7:1706719624132",
+                        "path": "/home/user/thunderbird-profile.tar.gz"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "execute",
+            "parameters": {
+                "command": [
+                    "tar",
+                    "--recursive-unlink",
+                    "-xz",
+                    "-f",
+                    "/home/user/thunderbird-profile.tar.gz",
+                    "-C",
+                    "/home/user/"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+                    "/usr/bin/thunderbird"
+                ]
+            }
+        }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps": [
+        "thunderbird",
+        "libreoffice_calc"
+    ],
+    "evaluator": {
+        "func": [
+            "compare_csv",
+            "compare_table"
+        ],
+        "conj": "and",
+        "result": [
+            {
+                "type": "vm_file",
+                "path": "/home/user/Desktop/contacts.csv",
+                "dest": "contacts.csv"
+            },
+            {
+                "type": "vm_file",
+                "path": "/home/user/Desktop/contacts.xlsx",
+                "dest": "contacts.xlsx"
+            }
+        ],
+        "expected": [
+            {
+                "type": "cloud_file",
+                "path": "https://drive.usercontent.google.com/download?id=1StwASpAR2ALq2Y1vugGsdUJptg6FwjEm&export=download&authuser=0&confirm=t&uuid=56339e19-b889-4da1-ab72-5e0b90f13fff&at=APZUnTVWFF2pBrtWU_hXgzfbrWP2:1706719668676",
+                "dest": "contacts_gold.csv"
+            },
+            {
+                "type": "cloud_file",
+                "path": "https://drive.usercontent.google.com/download?id=1s25eUpvkMzSm6p_WA7O13t6mVqmkxr2C&export=download&authuser=0&confirm=t&uuid=901cbd32-6026-4391-a5cc-989e1047cf7c&at=APZUnTUs27mZceDshB_f9Tx4PFyz:1706719610831",
+                "dest": "contacts_gold.xlsx"
+            }
+        ],
+        "options": [
+            {},
+            {
+                "rules": [
+                    {
+                        "type": "sheet_data",
+                        "sheet_idx0": "RI0",
+                        "sheet_idx1": "EI0"
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json b/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json
new file mode 100644
index 0000000..ee3fa37
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json
@@ -0,0 +1,73 @@
+{
+    "id": "da52d699-e8d2-4dc5-9191-a2199e0b6a9b",
+    "snapshot": "libreoffice_calc",
+    "instruction": "Examine the spreadsheet on the desktop, which contains a record of books read in 2022. Take the website https://howlongtoread.com/ as a reference to identify the book with the slowest reading pace, measured in words per day. I have an empty document named 'book_list_result.docx' on the desktop; please open it and record the title there.",
+    "source": "GAIA",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1JGZNCShtmpu7A8Z8lkjc8hdFEAMXZVvh&export=download&authuser=0&confirm=t&uuid=67063da6-2a72-4ed2-92b2-ade508439ce4&at=APZUnTUgS17YjX-D0oSvALwnPosB:1709368886960",
+                        "path": "/home/user/Desktop/2023_validation_Book_Reading_Rate.xlsx"
+                    },
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1iySmK8zvTzgmERH7KQuESP05NBsMunhV&export=download&authuser=0&confirm=t&uuid=130f6cee-0f9a-4f2e-a84d-89a3b302f350&at=APZUnTXugQOTOApe1_zxUbafo2Sp:1709369519349",
+                        "path": "/home/user/Desktop/book_list_result.docx"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "open",
+            "parameters": {
+                "path": "/home/user/Desktop/2023_validation_Book_Reading_Rate.xlsx"
+            }
+        }
+    ],
+    "trajectory": "trajectories/da52d699-e8d2-4dc5-9191-a2199e0b6a9b",
+    "related_apps": [
+        "libreoffice_calc",
+        "chrome",
+        "libreoffice_writer"
+    ],
+    "evaluator": {
+        "func": "compare_docx_files",
+        "postconfig": [
+            {
+                "type": "activate_window",
+                "parameters": {
+                    "window_name": "book_list_result.docx - LibreOffice Writer",
+                    "strict": true
+                }
+            },
+            {
+                "type": "sleep",
+                "parameters": {
+                    "seconds": 0.5
+                }
+            },
+            {
+                "type": "execute",
+                "parameters": {
+                    "command": [
+                        "python",
+                        "-c",
+                        "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); "
+                    ]
+                }
+            }
+        ],
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=1rpvOlHZO0AqC85od8pJtx8YcDPljcejN&export=download&authuser=0&confirm=t&uuid=24a3a5e3-a188-4a41-ad01-a4709dc1c0b6&at=APZUnTWx56rr8-iTuXkfV5poOK-I:1709369145446",
+            "dest": "book_list_result_Gold.docx"
+        },
+        "result": {
+            "type": "vm_file",
+            "path": "/home/user/Desktop/book_list_result.docx",
+            "dest": "book_list_result.docx"
+        }
+    }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json b/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
new file mode 100644
index 0000000..27adcf8
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
@@ -0,0 +1,47 @@
+{
+    "id": "f918266a-b3e0-4914-865d-4faa564f1aef",
+    "snapshot": "vscode",
+    "instruction": "Please complete the code and retrieve the output from the Python script 'calculator.py' located on the desktop and save it as 'log.txt' in the same directory as the Python file.",
+    "source": "GAIA",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1l09TnSiXo-qOK2UazcIdrT_M6JwTfzq7&export=download&authuser=0&confirm=t&uuid=80bd550f-f3a6-4b69-ae0f-221c12b11fd9&at=APZUnTWgUlKuIDJZmkr0Q9Bze3w_:1709784652645",
+                        "path": "/home/user/Desktop/calculator.zip"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "execute",
+            "parameters": {
+                "command": [
+                    "/bin/bash",
+                    "-c",
+                    "unzip /home/user/Desktop/calculator.zip -d /home/user/Desktop/ && rm -rf /home/user/Desktop/calculator.zip"
+                ]
+            }
+        }
+    ],
+    "trajectory": "trajectories/f918266a-b3e0-4914-865d-4faa564f1aef",
+    "related_apps": [
+        "vscode",
+        "os"
+    ],
+    "evaluator": {
+        "func": "compare_text_file",
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=1-14AgA1nHNL22VD_3QtRzWaMjIBa3RvJ&export=download&authuser=0&confirm=t&uuid=6aa05bf1-4964-4f7b-8983-d28540b4053b&at=APZUnTXuJgDHIYA2FZl3A_OQJEOF:1709881263131",
+            "dest": "log_Gold.txt"
+        },
+        "result": {
+            "type": "vm_file",
+            "path": "/home/user/Desktop/log.txt",
+            "dest": "log.txt"
+        }
+    }
+}

From 7041240d0a1eca736e6d329e0bd7b77893a2ff6b Mon Sep 17 00:00:00 2001
From: Yiheng Xu <github@ranpox.com>
Date: Tue, 19 Mar 2024 22:58:47 +0800
Subject: [PATCH 26/40] Update README.md

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 948c13b..44d8517 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,8 @@ For x86_64 Linux or Windows, you can install the environment package and downloa
 git clone https://github.com/xlang-ai/DesktopEnv
 cd DesktopEnv
 pip install -r requirements.txt
-gdown --id 1HX5gcf7UeyR-2UmiA15Q9U-Wr6E6Gio8 --file
+gdown https://drive.google.com/drive/folders/1HX5gcf7UeyR-2UmiA15Q9U-
+Wr6E6Gio8 -O Ubuntu --folder
 vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
 vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
 ```

From 6b39c0da1870384efc3169f809f2342f9500d774 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Wed, 20 Mar 2024 13:13:58 +0800
Subject: [PATCH 27/40] Remove examples that cannot be runned

---
 evaluation_examples/test_all.json   | 1 -
 evaluation_examples/test_small.json | 1 -
 settings.json                       | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/evaluation_examples/test_all.json b/evaluation_examples/test_all.json
index 7153d86..e530435 100644
--- a/evaluation_examples/test_all.json
+++ b/evaluation_examples/test_all.json
@@ -286,7 +286,6 @@
     "788b3701-3ec9-4b67-b679-418bfa726c22",
     "48c46dc7-fe04-4505-ade7-723cba1aa6f6",
     "42d25c08-fb87-4927-8b65-93631280a26f",
-    "bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
     "e8172110-ec08-421b-a6f5-842e6451911f",
     "42f4d1c7-4521-4161-b646-0a8934e36081",
     "3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
diff --git a/evaluation_examples/test_small.json b/evaluation_examples/test_small.json
index 4c1feb7..aec99fc 100644
--- a/evaluation_examples/test_small.json
+++ b/evaluation_examples/test_small.json
@@ -70,7 +70,6 @@
     "c2751594-0cd5-4088-be1b-b5f2f9ec97c4",
     "48c46dc7-fe04-4505-ade7-723cba1aa6f6",
     "42d25c08-fb87-4927-8b65-93631280a26f",
-    "bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
     "3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
     "d68204bf-11c1-4b13-b48b-d303c73d4bf6",
     "91190194-f406-4cd6-b3f9-c43fac942b22",
diff --git a/settings.json b/settings.json
index 7ee7a21..75ddfa3 100644
--- a/settings.json
+++ b/settings.json
@@ -1,3 +1,3 @@
 {
-    "time_limit": "600"
+    "time_limit": "1800"
 }
\ No newline at end of file

From 614906162109dbd033ad39016fc2220a6c3cf78e Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Wed, 20 Mar 2024 14:25:09 +0800
Subject: [PATCH 28/40] ver Mar20th

fixed a bug in _create_pywinauto_node
---
 desktop_env/server/main.py                                    | 4 +++-
 .../multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json       | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index d53232e..e9ab99f 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -545,12 +545,14 @@ def _create_pywinauto_node(node: BaseWrapper, depth: int = 0, flag: Optional[str
     node_role_name = "".join( map( lambda ch: ch if ch.isidentifier()\
                                                  or ch in {"-"}\
                                                  or ch.isalnum()
-                              else "-"
+                                               else "-"
                                  , node_role_name
                                  )
                             )
     if node_role_name.strip() == "":
         node_role_name = "unknown"
+    if not node_role_name[0].isalpha():
+        node_role_name = "tag" + node_role_name
 
     xml_node = lxml.etree.Element(
         node_role_name,
diff --git a/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
index 567d642..6e179ca 100644
--- a/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
+++ b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
@@ -21,7 +21,7 @@
             "type": "launch",
             "parameters": {
                 "command": [
-                "google-chrome",
+                "C:\Program Files\Google\Chrome\Application\chrome.exe",
                 "--remote-debugging-port=1337"
                 ]
             }

From 2746bcfe24ec07ebd77749bc5179aad61310781e Mon Sep 17 00:00:00 2001
From: tsuky_chen <91684733+chenjix@users.noreply.github.com>
Date: Wed, 20 Mar 2024 20:15:04 +0800
Subject: [PATCH 29/40] Update c6bf789c-ba3a-4209-971d-b63abf0ab733.json

---
 .../vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json b/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json
index e011be9..a292449 100644
--- a/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json
+++ b/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json
@@ -9,7 +9,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/94gda7383revq68jl0c4fu852bb4a375/1709010000000/09ed1496-6945-4b34-b938-8e5f67e64d8f/108888117743638485671/ADt3v-NkzdbsoW3-0iDvDvlUAcCN3jRyAtBZH4ork--gAhv8JfYhMxiHDU7hr0GB-M8y8BSEArd4Z6becXlgNGuv7k50hOVsShmmQ22KgJkGimg6urK1fzkCG0VS_5cfdxRsjKQByRZmmvX675Zw5NQoRMgcJpTqcjIBr01BsSljkvtPU1wu_iVz_w1i2lk6TlTsNNIa3MRCK1zG4Fd7qySq5Tg6TzWhga1uewXlYGfQVwxyBlVX7rPuJBud2CB7UfZFQbd-2DftnZRA3zSYpDmfwc9NutAdmmuvGr6Fj9395yItzi5Vt6sUWHZfSykXy8DdHtsONn32?j=09ed1496-6945-4b34-b938-8e5f67e64d8f&user=6816948370&i=0&authuser=0",
+            "url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/rkf1cuke8058emd04p56h64o2t7s379c/1710936000000/d1078779-8571-4d29-801c-0f0086482fbe/108888117743638485671/ADt3v-NC72fh-yQHWPMJ3fX_ktwFdkqv26g5iaT3C3PjmnbxMl9ceLf1UsJMTGgTOV0EN5qtk9ssBob5WBXX2MAteQAnzk84OgaxghFpZ_sv2Le8MJECZ8wXMB-EbhadIhpp5oz6hA-C9cjeTqjar2CFIo7M430D6PH7e0whvm7eR-do76riBtK8ZqqUdPXAYKbN4uHd8E3snQ4eGdQf7SlKZaJVFqVlxkTxyGdIqpaZi1ov60Pe3WymQw6LTd8N6D5h0mbFRLiNbmby_JN342rTvg7BxOLvwA3wTAvo6dlfeOeERGfLi4oNy-3d423YP0MnJbvdvV5c?j=d1078779-8571-4d29-801c-0f0086482fbe&user=6816948370&i=0&authuser=0",
             "path": "/home/user/Downloads/hw_python_oop-master.zip"
           }
         ]
@@ -63,4 +63,4 @@
       "dest": "settings.json"
     }
   }
-}
\ No newline at end of file
+}

From 21e3ce5cbae96df72c7234b72fae542a831a173b Mon Sep 17 00:00:00 2001
From: tsuky_chen <91684733+chenjix@users.noreply.github.com>
Date: Wed, 20 Mar 2024 20:17:18 +0800
Subject: [PATCH 30/40] Update 70745df8-f2f5-42bd-8074-fbc10334fcc5.json

---
 .../vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json b/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json
index 4d1c86b..b9dba79 100644
--- a/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json
+++ b/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json
@@ -9,7 +9,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/tvf25tcbo8jda5qvbhlr5et7mb3l00qr/1709005500000/767b5ea4-1bdf-4b49-9fa0-c17b53e21f8a/108888117743638485671/ADt3v-PaaonAsKLjIRGjHf-MSjw2YlZrA_AiqizGDB9kBc9aOX8OpnU4AjTlh83sB2TPylr28DyOIJhAt4Wpnvm3DK8bVGBOM7JyLSyFtO_hXXbDtrF2DyWDuYy-9PqaxJuwgPfpXVnTuwOwYbZh5kebA99822_ymo383VWrpSaga6MjZXZFtGdl5r87fxwi5G7KgL_bQFo3QUWadawJzldqrwe6KRIIo0Zru0oIVazeM7LtjFV4WWLozAJ7ZJ3lS6qCKJltKN0wpg6Sdw1rS1VzDq_tYo0n2uR4zDll5cMMA8fW5AhU44PNxnWmGmivzJszfXA4Fn7I?j=767b5ea4-1bdf-4b49-9fa0-c17b53e21f8a&user=6816948370&i=0&authuser=0",
+            "url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/cnup6b6flpoegsolrangjudv2r8eu535/1710936000000/82b28ffd-6625-4027-86f1-f04cc95198a6/108888117743638485671/ADt3v-Po3UMopGHyM5lsm1tV432fKFWzgTlameSobbZNOXnKb8UebE9byN9l2XJCE87VxHTOC3JDZE6Jbh8L45AENAuh2X9VlZjQlThlBU4VWWNxAH8HaRvA0C0A05_LtgKl5qu0PVms54D3Vx2c-RE-wS0_6O3UWNiFQUbNCD07yjic8wqWw8ny817is4-JKflFoyUHkjU7go0NvAChbo_al1djyrZYsb1N594lNMGK5C7NDrUpYr4rZxx4nKCJHblPA_MNqbMSrftEA1JgoFqjesxeA1tXclICqzxLwsFtZrDKdP0ilPIyE66d5g0IN6CT2vb74Dnp?j=82b28ffd-6625-4027-86f1-f04cc95198a6&user=6816948370&i=0&authuser=0",
             "path": "/home/user/Downloads/HW-8-main-20240207T164539Z-001.zip"
           }
         ]
@@ -62,4 +62,4 @@
       "dest": "settings.json"
     }
   }
-}
\ No newline at end of file
+}

From 966339dee0f8c0182ca814b12ed857db59926783 Mon Sep 17 00:00:00 2001
From: tsuky_chen <91684733+chenjix@users.noreply.github.com>
Date: Wed, 20 Mar 2024 20:38:27 +0800
Subject: [PATCH 31/40] Update 70745df8-f2f5-42bd-8074-fbc10334fcc5.json

---
 .../examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json b/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json
index b9dba79..3c41b6c 100644
--- a/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json
+++ b/evaluation_examples/examples/vs_code/70745df8-f2f5-42bd-8074-fbc10334fcc5.json
@@ -9,7 +9,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/cnup6b6flpoegsolrangjudv2r8eu535/1710936000000/82b28ffd-6625-4027-86f1-f04cc95198a6/108888117743638485671/ADt3v-Po3UMopGHyM5lsm1tV432fKFWzgTlameSobbZNOXnKb8UebE9byN9l2XJCE87VxHTOC3JDZE6Jbh8L45AENAuh2X9VlZjQlThlBU4VWWNxAH8HaRvA0C0A05_LtgKl5qu0PVms54D3Vx2c-RE-wS0_6O3UWNiFQUbNCD07yjic8wqWw8ny817is4-JKflFoyUHkjU7go0NvAChbo_al1djyrZYsb1N594lNMGK5C7NDrUpYr4rZxx4nKCJHblPA_MNqbMSrftEA1JgoFqjesxeA1tXclICqzxLwsFtZrDKdP0ilPIyE66d5g0IN6CT2vb74Dnp?j=82b28ffd-6625-4027-86f1-f04cc95198a6&user=6816948370&i=0&authuser=0",
+            "url": "https://drive.usercontent.google.com/download?id=1k1d2UbXvp05gDdV669gNDnbdEv9SsAtN&export=download&authuser=0&confirm=t&uuid=c3d51b38-e061-4198-80cd-3cd251de8dae&at=APZUnTXaiHViMYwtweYPykye7N5u:1710938272734",
             "path": "/home/user/Downloads/HW-8-main-20240207T164539Z-001.zip"
           }
         ]

From d2d4a54a3f7552db9a35f075f052adbb58c9edc1 Mon Sep 17 00:00:00 2001
From: tsuky_chen <91684733+chenjix@users.noreply.github.com>
Date: Wed, 20 Mar 2024 20:45:45 +0800
Subject: [PATCH 32/40] Update c6bf789c-ba3a-4209-971d-b63abf0ab733.json

---
 .../examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json b/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json
index a292449..ce94baf 100644
--- a/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json
+++ b/evaluation_examples/examples/vs_code/c6bf789c-ba3a-4209-971d-b63abf0ab733.json
@@ -9,7 +9,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/rkf1cuke8058emd04p56h64o2t7s379c/1710936000000/d1078779-8571-4d29-801c-0f0086482fbe/108888117743638485671/ADt3v-NC72fh-yQHWPMJ3fX_ktwFdkqv26g5iaT3C3PjmnbxMl9ceLf1UsJMTGgTOV0EN5qtk9ssBob5WBXX2MAteQAnzk84OgaxghFpZ_sv2Le8MJECZ8wXMB-EbhadIhpp5oz6hA-C9cjeTqjar2CFIo7M430D6PH7e0whvm7eR-do76riBtK8ZqqUdPXAYKbN4uHd8E3snQ4eGdQf7SlKZaJVFqVlxkTxyGdIqpaZi1ov60Pe3WymQw6LTd8N6D5h0mbFRLiNbmby_JN342rTvg7BxOLvwA3wTAvo6dlfeOeERGfLi4oNy-3d423YP0MnJbvdvV5c?j=d1078779-8571-4d29-801c-0f0086482fbe&user=6816948370&i=0&authuser=0",
+            "url": "https://drive.usercontent.google.com/download?id=1ITuXkSbTF0BcbTQ3v4A1qnSzbTPrP5ax&export=download&authuser=0&confirm=t&uuid=c6c45cbf-63bc-4cb0-b76c-5a663c0ed820&at=APZUnTVrE-pn_e6HGTp_Eg4ziQhi:1710938673095",
             "path": "/home/user/Downloads/hw_python_oop-master.zip"
           }
         ]

From 15e01e7ccc218f65b1f21a3b61c55e25e0e0c229 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Wed, 20 Mar 2024 22:22:57 +0800
Subject: [PATCH 33/40] ver Mar20thv2

fixed bugs in server/main.py (_create_pywinauto_node and
  get_screen_size)
finished migration of a few task configs to Windows
fixed bug in python.py
---
 branch_flag                                   |  2 +-
 desktop_env/controllers/python.py             |  4 +-
 desktop_env/server/main.py                    | 26 ++++++++++---
 .../46407397-a7d5-4c6b-92c6-dbe038b1457b.json | 39 ++++++++++++-------
 .../897e3b53-5d4d-444b-85cb-2cdc8a97d903.json | 11 +++---
 .../c867c42d-a52d-4a24-8ae3-f75d256b5618.json | 36 ++++++++++-------
 .../da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json | 20 ++++++----
 .../f918266a-b3e0-4914-865d-4faa564f1aef.json | 28 +++++++------
 main.py                                       | 32 ---------------
 9 files changed, 105 insertions(+), 93 deletions(-)

diff --git a/branch_flag b/branch_flag
index cb4898e..9daeafb 100644
--- a/branch_flag
+++ b/branch_flag
@@ -1 +1 @@
-claude
+test
diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py
index 4159cde..ea11644 100644
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -64,7 +64,7 @@ class PythonController:
         It can be used to execute the pyautogui commands, or... any other python command. who knows?
         """
         # command_list = ["python", "-c", self.pkgs_prefix.format(command=command)]
-        command_list = ["python3", "-c", self.pkgs_prefix.format(command=command)]
+        command_list = ["python", "-c", self.pkgs_prefix.format(command=command)]
         payload = json.dumps({"command": command_list, "shell": False})
         headers = {
             'Content-Type': 'application/json'
@@ -344,4 +344,4 @@ class PythonController:
             return response.json()["directory_tree"]
         else:
             logger.error("Failed to get directory tree. Status code: %d", response.status_code)
-            return None
\ No newline at end of file
+            return None
diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index e9ab99f..8e900a3 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -414,9 +414,18 @@ def _create_pywinauto_node(node: BaseWrapper, depth: int = 0, flag: Optional[str
     attribute_dict: Dict[str, Any] = {"name": node.element_info.name}
 
     #  States {{{ # 
-    attribute_dict["{{{:}}}enabled".format(_accessibility_ns_map["st"])] = str(node.is_enabled()).lower()
-    attribute_dict["{{{:}}}visible".format(_accessibility_ns_map["st"])] = str(node.is_visible()).lower()
-    attribute_dict["{{{:}}}active".format(_accessibility_ns_map["st"])] = str(node.is_active()).lower()
+    try:
+        attribute_dict["{{{:}}}enabled".format(_accessibility_ns_map["st"])] = str(node.is_enabled()).lower()
+    except:
+        pass
+    try:
+        attribute_dict["{{{:}}}visible".format(_accessibility_ns_map["st"])] = str(node.is_visible()).lower()
+    except:
+        pass
+    try:
+        attribute_dict["{{{:}}}active".format(_accessibility_ns_map["st"])] = str(node.is_active()).lower()
+    except:
+        pass
 
     if hasattr(node, "is_minimized"):
         try:
@@ -603,9 +612,14 @@ def get_accessibility_tree():
 
 @app.route('/screen_size', methods=['POST'])
 def get_screen_size():
-    d = display.Display()
-    screen_width = d.screen().width_in_pixels
-    screen_height = d.screen().height_in_pixels
+    if platform_name=="Linux":
+        d = display.Display()
+        screen_width = d.screen().width_in_pixels
+        screen_height = d.screen().height_in_pixels
+    elif platform_name=="Windows":
+        user32 = ctypes.windll.user32
+        screen_width: int = user32.GetSystemMetrics(0)
+        screen_height: int = user32.GetSystemMetrics(1)
     return jsonify(
         {
             "width": screen_width,
diff --git a/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
index 6e179ca..2106e9e 100644
--- a/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
+++ b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
@@ -21,16 +21,18 @@
             "type": "launch",
             "parameters": {
                 "command": [
-                "C:\Program Files\Google\Chrome\Application\chrome.exe",
-                "--remote-debugging-port=1337"
+					"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+					"--remote-debugging-port=1337"
                 ]
             }
         },
         {
             "type": "launch",
             "parameters": {
-                "command": "nc -l -p 9222 |nc 127.0.0.1 1337",
-				"shell": true
+                "command": [
+					"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+					"--sh-exec", "ncat.exe 127.0.0.1 1337"
+				]
             }
         },
         {
@@ -54,8 +56,8 @@
             "parameters": {
                 "files": [
                     {
-                        "url": "https://drive.usercontent.google.com/download?id=18jdi0OanMtAQenm4ODTivsxTSzdj4HUV&export=download&authuser=0&confirm=t&uuid=e858d3cc-4535-4419-a651-8856ac517d19&at=APZUnTW7g4ygfrkKTPBWCO13twRj:1706611460571",
-                        "path": "/home/user/thunderbird-profile.tar.gz"
+						"url": "https://drive.google.com/uc?id=1Yy-ZrkMq4pIQq1Y75bD2WVJXxHMTaMqE&export=download",
+                        "path": "C:\\Users\\chenj\\thunderbird-profile.7z"
                     }
                 ]
             }
@@ -64,21 +66,30 @@
             "type": "execute",
             "parameters": {
                 "command": [
-                "tar",
-                "-xz",
-                "--recursive-unlink",
-                "-f",
-                "/home/user/thunderbird-profile.tar.gz",
-                "-C",
-                "/home/user/"
+					"C:\\Program Files\\7-Zip\\7z.exe",
+					"x", "C:\\Users\\chenj\\thunderbird-profile.7z"
                 ]
             }
         },
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "rm -r C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"shell": true
+			}
+		},
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "mv C:\\Users\\chenj\\Thunderbird C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"shell": true
+			}
+		},
         {
             "type": "launch",
             "parameters": {
                 "command": [
-                    "/usr/bin/thunderbird"
+                    "C:\\Program Files\\Mozilla Thunderbird\\thunderbird.exe"
                 ]
             }
         }
diff --git a/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json b/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
index 184049d..948d662 100644
--- a/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
+++ b/evaluation_examples/examples/Windows/multi_app/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json
@@ -21,7 +21,7 @@
             "type": "launch",
             "parameters": {
                 "command": [
-                    "google-chrome",
+                    "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
                     "--remote-debugging-port=1337"
                 ]
             }
@@ -30,9 +30,8 @@
             "type": "launch",
             "parameters": {
                 "command": [
-                    "socat",
-                    "tcp-listen:9222,fork",
-                    "tcp:localhost:1337"
+					"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+					"--sh-exec", "ncat.exe 127.0.0.1 1337"
                 ]
             }
         },
@@ -60,7 +59,7 @@
               "files": [
                 {
                   "url": "https://drive.usercontent.google.com/download?id=18TvzE8jnULU2g9XJsT-TaPEKcLGNVfu0&export=download&authuser=0&confirm=t&uuid=d914e031-9aa6-431b-81c0-73fcb87af027&at=APZUnTUx56WM_I3gnhHo-eZX__kx:1706158167271",
-                  "path": "/home/user/Desktop/form.docx"
+                  "path": "C:\\Users\\chenj\\Desktop\\form.docx"
                 }
               ]
             }
@@ -68,7 +67,7 @@
           {
             "type": "open",
             "parameters": {
-              "path": "/home/user/Desktop/form.docx"
+              "path": "C:\\Users\\chenj\\Desktop\\form.docx"
             }
           }
     ],
diff --git a/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json b/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
index 2a8162d..72d43a0 100644
--- a/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
+++ b/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
@@ -8,8 +8,7 @@
             "type": "launch",
             "parameters": {
                 "command": [
-                    "libreoffice",
-                    "--calc"
+                    "C:\\Program Files\\Microsoft Office\\root\\Office16\\EXCEL.EXE"
                 ]
             }
         },
@@ -18,8 +17,8 @@
             "parameters": {
                 "files": [
                     {
-                        "url": "https://drive.usercontent.google.com/download?id=1wKXmJ14dnxSzdy9ZF_ePWU7zpevY6Dry&export=download&authuser=0&confirm=t&uuid=9b476c95-8eee-4a9a-8cee-c3620d5ce250&at=APZUnTUzDeeeMNr34DB1vEnBK6N7:1706719624132",
-                        "path": "/home/user/thunderbird-profile.tar.gz"
+                        "url": "https://drive.google.com/uc?id=1njAaNiujlh1DZzGK7nL5iZsppsNAMkH7&export=download",
+                        "path": "C:\\Users\\chenj\\thunderbird-profile.7z"
                     }
                 ]
             }
@@ -28,21 +27,30 @@
             "type": "execute",
             "parameters": {
                 "command": [
-                    "tar",
-                    "--recursive-unlink",
-                    "-xz",
-                    "-f",
-                    "/home/user/thunderbird-profile.tar.gz",
-                    "-C",
-                    "/home/user/"
+					"C:\\Program Files\\7-Zip\\7z.exe",
+					"x", "C:\\Users\\chenj\\thunderbird-profile.7z"
                 ]
             }
         },
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "rm -r C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"shell": true
+			}
+		},
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "mv C:\\Users\\chenj\\Thunderbird C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"shell": true
+			}
+		},
         {
             "type": "launch",
             "parameters": {
                 "command": [
-                    "/usr/bin/thunderbird"
+                    "C:\\Program Files\\Mozilla Thunderbird\\thunderbird.exe"
                 ]
             }
         }
@@ -61,12 +69,12 @@
         "result": [
             {
                 "type": "vm_file",
-                "path": "/home/user/Desktop/contacts.csv",
+                "path": "C:\\Users\\chenj\\Desktop\\contacts.csv",
                 "dest": "contacts.csv"
             },
             {
                 "type": "vm_file",
-                "path": "/home/user/Desktop/contacts.xlsx",
+                "path": "C:\\Users\\chenj\\Desktop\\contacts.xlsx",
                 "dest": "contacts.xlsx"
             }
         ],
diff --git a/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json b/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json
index ee3fa37..9138805 100644
--- a/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json
+++ b/evaluation_examples/examples/Windows/multi_app/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json
@@ -10,11 +10,11 @@
                 "files": [
                     {
                         "url": "https://drive.usercontent.google.com/download?id=1JGZNCShtmpu7A8Z8lkjc8hdFEAMXZVvh&export=download&authuser=0&confirm=t&uuid=67063da6-2a72-4ed2-92b2-ade508439ce4&at=APZUnTUgS17YjX-D0oSvALwnPosB:1709368886960",
-                        "path": "/home/user/Desktop/2023_validation_Book_Reading_Rate.xlsx"
+                        "path": "C:\\Users\\chenj\\Desktop\\2023_validation_Book_Reading_Rate.xlsx"
                     },
                     {
                         "url": "https://drive.usercontent.google.com/download?id=1iySmK8zvTzgmERH7KQuESP05NBsMunhV&export=download&authuser=0&confirm=t&uuid=130f6cee-0f9a-4f2e-a84d-89a3b302f350&at=APZUnTXugQOTOApe1_zxUbafo2Sp:1709369519349",
-                        "path": "/home/user/Desktop/book_list_result.docx"
+                        "path": "C:\\Users\\chenj\\Desktop\\book_list_result.docx"
                     }
                 ]
             }
@@ -22,7 +22,7 @@
         {
             "type": "open",
             "parameters": {
-                "path": "/home/user/Desktop/2023_validation_Book_Reading_Rate.xlsx"
+                "path": "C:\\Users\\chenj\\Desktop\\2023_validation_Book_Reading_Rate.xlsx"
             }
         }
     ],
@@ -38,7 +38,7 @@
             {
                 "type": "activate_window",
                 "parameters": {
-                    "window_name": "book_list_result.docx - LibreOffice Writer",
+                    "window_name": "book_list_result - Word",
                     "strict": true
                 }
             },
@@ -54,10 +54,16 @@
                     "command": [
                         "python",
                         "-c",
-                        "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); "
+						"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
                     ]
                 }
-            }
+            },
+			{
+				"type": "sleep",
+				"parameters": {
+					"seconds": 0.5
+				}
+			}
         ],
         "expected": {
             "type": "cloud_file",
@@ -66,7 +72,7 @@
         },
         "result": {
             "type": "vm_file",
-            "path": "/home/user/Desktop/book_list_result.docx",
+            "path": "C:\\Users\\chenj\\Desktop\\book_list_result.docx",
             "dest": "book_list_result.docx"
         }
     }
diff --git a/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json b/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
index 27adcf8..09c8983 100644
--- a/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
+++ b/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
@@ -10,21 +10,27 @@
                 "files": [
                     {
                         "url": "https://drive.usercontent.google.com/download?id=1l09TnSiXo-qOK2UazcIdrT_M6JwTfzq7&export=download&authuser=0&confirm=t&uuid=80bd550f-f3a6-4b69-ae0f-221c12b11fd9&at=APZUnTWgUlKuIDJZmkr0Q9Bze3w_:1709784652645",
-                        "path": "/home/user/Desktop/calculator.zip"
+                        "path": "C:\\Users\\chenj\\Desktop\\calculator.zip"
                     }
                 ]
             }
         },
-        {
-            "type": "execute",
-            "parameters": {
-                "command": [
-                    "/bin/bash",
-                    "-c",
-                    "unzip /home/user/Desktop/calculator.zip -d /home/user/Desktop/ && rm -rf /home/user/Desktop/calculator.zip"
-                ]
-            }
-        }
+		{
+			"type": "execute",
+			"parameters": {
+				"command": [
+					"C:\\Program Files\\7-Zip\\7z.exe",
+					"C:\\Users\\chenj\\Desktop\\calculator.zip"
+				]
+			}
+		},
+		{
+			"type": "execute",
+			"parameters": {
+				"command": "rm C:\\Users\\chenj\\Desktop\\calculator.zip",
+				"shell": true
+			}
+		}
     ],
     "trajectory": "trajectories/f918266a-b3e0-4914-865d-4faa564f1aef",
     "related_apps": [
diff --git a/main.py b/main.py
index 06debec..b6ae310 100644
--- a/main.py
+++ b/main.py
@@ -70,38 +70,6 @@ def human_agent():
     done = False
     logger.info('\x1b[32m[TASK INSTRUCTION]: \x1b[32;3m%s\x1b[0m', example["instruction"])
 
-    trajectory = [
-        {
-            "action_type": "MOVE_TO",        #
-            "parameters": {
-                "x": 754,
-                "y": 1057
-            }
-        },
-        {"action_type": "CLICK", "parameters": {"button": "right", "num_clicks": 1}}
-    ]
-
-    for i in range(len(trajectory)):
-        # action = get_human_action()
-
-        # action = {
-        #     "action_type": 0,
-        #     "click_type": 3,
-        # }
-        logger.info(trajectory[i])
-
-        observation, reward, done, info = env.step(trajectory[i])
-        observation.pop("accessibility_tree")
-        logger.info("Observation: %s", observation)
-        logger.info("Reward: %.2f", reward)
-        logger.info("Info: %s", info)
-
-        logger.info("================================\n")
-
-        if done:
-            logger.info("The episode is done.")
-            break
-
     input("Press Enter to start human operation...")
     human_start_time = time.time()
     input("Press Enter to finish human operation.")

From d1e2b12b41c6efd70881e481a4dd8afe3bcb4eb4 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Wed, 20 Mar 2024 22:22:59 +0800
Subject: [PATCH 34/40] Fix GIMP bug; Speedup the environment, when there is
 not a11y tree needed, we can do no controller.get

---
 desktop_env/envs/desktop_env.py            |  8 +++++---
 desktop_env/evaluators/metrics/__init__.py |  1 +
 mm_agents/agent.py                         |  2 +-
 run.py                                     | 10 +++++++++-
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index 7dd70b6..b443a4a 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -58,7 +58,8 @@ class DesktopEnv(gym.Env):
             tmp_dir: str = "tmp",
             cache_dir: str = "cache",
             screen_size: Tuple[int] = (1920, 1080),
-            headless: bool = False
+            headless: bool = False,
+            require_a11y_tree: bool = True,
     ):
         """
         Args:
@@ -77,6 +78,7 @@ class DesktopEnv(gym.Env):
         self.cache_dir_base: str = cache_dir
         self.vm_screen_size = screen_size  # todo: add the logic to get the screen size from the VM
         self.headless = headless
+        self.require_a11y_tree = require_a11y_tree
 
         os.makedirs(self.tmp_dir_base, exist_ok=True)
 
@@ -248,7 +250,7 @@ class DesktopEnv(gym.Env):
 
         observation = {
             "screenshot": self._get_obs(),
-            "accessibility_tree": self.controller.get_accessibility_tree(),
+            "accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
         }
         return observation
 
@@ -284,7 +286,7 @@ class DesktopEnv(gym.Env):
 
         observation = {
             "screenshot": self._get_obs(),
-            "accessibility_tree": self.controller.get_accessibility_tree(),
+            "accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
             # "terminal": self.controller.get_terminal_output(),
             "instruction": self.instruction
         }
diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 61bb025..341e138 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -77,6 +77,7 @@ from .general import (
     literal_match
 )
 from .gimp import (
+    check_structure_sim_resized,
     check_brightness_decrease_and_structure_sim,
     check_contrast_increase_and_structure_sim,
     check_saturation_increase_and_structure_sim,
diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index f2d4b5c..e9f1147 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -350,7 +350,7 @@ class PromptAgent:
         # {{{1
         if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
             base64_image = encode_image(obs["screenshot"])
-            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"]) if self.observation_type == "screenshot_a11y_tree" else None
             logger.debug("LINEAR AT: %s", linearized_accessibility_tree)
 
             if self.observation_type == "screenshot_a11y_tree":
diff --git a/run.py b/run.py
index 92e989a..e6f67f9 100644
--- a/run.py
+++ b/run.py
@@ -95,6 +95,10 @@ def config() -> argparse.Namespace:
     parser.add_argument("--max_tokens", type=int, default=1500)
     parser.add_argument("--stop_token", type=str, default=None)
 
+    # example config
+    parser.add_argument("--domain", type=str, default="all")
+    parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_all.json")
+
     # logging related
     parser.add_argument("--result_dir", type=str, default="./results")
     args = parser.parse_args()
@@ -144,6 +148,7 @@ def test(
         action_space=agent.action_space,
         screen_size=(args.screen_width, args.screen_height),
         headless=args.headless,
+        require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
     )
 
     for domain in tqdm(test_all_meta, desc="Domain"):
@@ -264,9 +269,12 @@ if __name__ == '__main__':
     os.environ["TOKENIZERS_PARALLELISM"] = "false"
     args = config()
 
-    with open("evaluation_examples/test_all.json", "r", encoding="utf-8") as f:
+    with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
         test_all_meta = json.load(f)
 
+    if args.domain != "all":
+        test_all_meta = {args.domain: test_all_meta[args.domain]}
+
     test_file_list = get_unfinished(
         args.action_space,
         args.model,

From 1f91777f3cafea5f2b76668bb1ffeacb94ac3180 Mon Sep 17 00:00:00 2001
From: rhythmcao <ruishengcao@gmail.com>
Date: Wed, 20 Mar 2024 22:24:35 +0800
Subject: [PATCH 35/40] remove googledrive credentials; update google account
 keys

---
 .gitignore                                                | 3 +++
 evaluation_examples/settings/google/settings.json         | 4 ++--
 evaluation_examples/settings/googledrive/credentials.json | 1 -
 3 files changed, 5 insertions(+), 3 deletions(-)
 delete mode 100644 evaluation_examples/settings/googledrive/credentials.json

diff --git a/.gitignore b/.gitignore
index df58997..f8343ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,9 @@
 *.pth
 *.pt
 
+# Credential files
+evaluation_examples/settings/googledrive/credentials.json
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/evaluation_examples/settings/google/settings.json b/evaluation_examples/settings/google/settings.json
index ae70605..cc20c51 100644
--- a/evaluation_examples/settings/google/settings.json
+++ b/evaluation_examples/settings/google/settings.json
@@ -1,4 +1,4 @@
 {
     "email": "xlang2024anonym@gmail.com",
-    "password": "q]wN~0iD>H:6"
-}
\ No newline at end of file
+    "password": "Evt5LLj!VJ6Y!C$B"
+}
diff --git a/evaluation_examples/settings/googledrive/credentials.json b/evaluation_examples/settings/googledrive/credentials.json
deleted file mode 100644
index 81d22c2..0000000
--- a/evaluation_examples/settings/googledrive/credentials.json
+++ /dev/null
@@ -1 +0,0 @@
-{"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-13T10:09:01Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
\ No newline at end of file

From 6927d9e39d68d8d965a4e3b4c0c57fdeaa0026bf Mon Sep 17 00:00:00 2001
From: Siheng Zhao <1730625285@qq.com>
Date: Wed, 20 Mar 2024 22:41:05 +0800
Subject: [PATCH 36/40] [feature] add image downsample func

---
 mm_agents/agent.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index f2d4b5c..cad6ede 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -6,15 +6,17 @@ import re
 import time
 import uuid
 import xml.etree.ElementTree as ET
+import numpy as np
 from http import HTTPStatus
 from io import BytesIO
-from typing import Dict, List
+from typing import Dict, List, Tuple, Union
 
 import backoff
 import dashscope
 import google.generativeai as genai
 import openai
 import requests
+import cv2
 from PIL import Image
 from google.api_core.exceptions import InvalidArgument
 
@@ -26,6 +28,14 @@ from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_S
 
 logger = logging.getLogger("desktopenv.agent")
 
+def downsample_image(img: Union[str, np.ndarray], ratio: Tuple[float, float]):
+    fx, fy = ratio
+    if isinstance(img, str):
+        img = cv2.imread(img)
+
+    resized = cv2.resize(img, None, fx=fx, fy=fy, interpolation=cv2.INTER_AREA)
+    return resized
+
 
 # Function to encode the image
 def encode_image(image_path):

From 3e581c8108bdcbcb92c4fdd1f41022c694ff0a54 Mon Sep 17 00:00:00 2001
From: Fangyu Lei <55661995+lfy79001@users.noreply.github.com>
Date: Thu, 21 Mar 2024 07:52:58 +0800
Subject: [PATCH 37/40] Update agent.py claude

---
 mm_agents/agent.py | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index 65ce0f1..d7a5586 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -555,13 +555,20 @@ class PromptAgent:
             #     "content-type": "application/json"
             # }
 
+            # headers = {
+            #     "Accept": "application / json",
+            #     "Authorization": "Bearer " + os.environ["ANTHROPIC_API_KEY"],
+            #     "User-Agent": "Apifox/1.0.0 (https://apifox.com)",
+            #     "Content-Type": "application/json"
+            # }
+
             headers = {
-                "Accept": "application / json",
-                "Authorization": "Bearer " + os.environ["ANTHROPIC_API_KEY"],
-                "User-Agent": "Apifox/1.0.0 (https://apifox.com)",
+                "Authorization": os.environ["ANTHROPIC_API_KEY"],
                 "Content-Type": "application/json"
             }
 
+
+
             payload = {
                 "model": self.model,
                 "max_tokens": max_tokens,
@@ -570,22 +577,23 @@ class PromptAgent:
                 "top_p": top_p
             }
 
-            response = requests.post(
-                # "https://chat.claude.com/v1/chat/completions",
-                "https://api.aigcbest.top/v1/chat/completions",
-                headers=headers,
-                json=payload
-            )
-
-            if response.status_code != 200:
-
-                logger.error("Failed to call LLM: " + response.text)
-                time.sleep(5)
-                return ""
-            # else:
-            #     return response.json()['content'][0]['text']
+            max_attempts = 20
+            attempt = 0
+            while attempt < max_attempts:
+                # response = requests.post("https://api.aigcbest.top/v1/chat/completions", headers=headers, json=payload)
+                response = requests.post("https://token.cluade-chat.top/v1/chat/completions", headers=headers, json=payload)
+                if response.status_code == 200:
+                    result = response.json()['choices'][0]['message']['content']
+                    break
+                else:
+                    logger.error(f"Failed to call LLM: {response.text}")
+                    time.sleep(10)
+                    attempt += 1
             else:
-                return response.json()['choices'][0]['message']['content']
+                print("Exceeded maximum attempts to call LLM.")
+                result = ""
+                
+            return result
 
 
         elif self.model.startswith("mistral"):

From 7ca91ca8c92b013f222b48d16a4d9505a102faee Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Thu, 21 Mar 2024 11:16:57 +0800
Subject: [PATCH 38/40] Add action execution timeout for corner cases

---
 desktop_env/controllers/python.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py
index ea11644..b9b3387 100644
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -71,7 +71,7 @@ class PythonController:
         }
 
         try:
-            response = requests.post(self.http_server + "/execute", headers=headers, data=payload)
+            response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=90)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:

From dac44b2c4fe0e1a65eef1361fada921947cea9e5 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Thu, 21 Mar 2024 15:03:21 +0800
Subject: [PATCH 39/40] ver Mar21st

Windows multi_app tasks
---
 .../185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json |  96 +++++++
 .../1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json |  85 ++++++
 .../26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json |  68 +++++
 .../2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json | 127 +++++++++
 .../3a93cae4-ad3e-403e-8c12-65303b271818.json | 162 ++++++++++++
 .../46407397-a7d5-4c6b-92c6-dbe038b1457b.json |   4 +-
 .../6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json |  72 ++++++
 .../74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json |  30 +--
 .../8e116af7-7db7-4e35-a68b-b0939c066c78.json | 241 ++++++++++++++++++
 .../a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json |  91 +++++++
 .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json |  38 +--
 .../c867c42d-a52d-4a24-8ae3-f75d256b5618.json |   4 +-
 .../d1acdb87-bb67-4f30-84aa-990e56a09c92.json | 128 ++++++++++
 .../deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json | 101 ++++++++
 .../e2392362-125e-4f76-a2ee-524b183a3412.json | 121 +++++++++
 .../eb303e01-261e-4972-8c07-c9b4e7a4922a.json |  82 ++++++
 .../f918266a-b3e0-4914-865d-4faa564f1aef.json |   4 +-
 .../6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json | 122 ++++-----
 .../e2392362-125e-4f76-a2ee-524b183a3412.json |   4 +-
 19 files changed, 1466 insertions(+), 114 deletions(-)
 create mode 100644 evaluation_examples/examples/Windows/multi_app/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/3a93cae4-ad3e-403e-8c12-65303b271818.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/8e116af7-7db7-4e35-a68b-b0939c066c78.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/d1acdb87-bb67-4f30-84aa-990e56a09c92.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/e2392362-125e-4f76-a2ee-524b183a3412.json
 create mode 100644 evaluation_examples/examples/Windows/multi_app/eb303e01-261e-4972-8c07-c9b4e7a4922a.json

diff --git a/evaluation_examples/examples/Windows/multi_app/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json b/evaluation_examples/examples/Windows/multi_app/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json
new file mode 100644
index 0000000..203734a
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/185f29bd-5da0-40a6-b69c-ba7f4e0324ef.json
@@ -0,0 +1,96 @@
+{
+  "id": "185f29bd-5da0-40a6-b69c-ba7f4e0324ef",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Transfer the data from our 'Employee Performance Evaluation Summary' Excel sheet into our standardized PDF evaluation forms. Each employee's evaluation data should be accurately filled into the designated fields of the PDF form. It's crucial that the final PDF documents retain a uniform and professional look, ready for distribution to our staff or for filing purposes. Furthermore, please ensure that each PDF file is named according to the employee's name as it appears in the Excel document. This will greatly streamline our evaluation process and enhance our efficiency in managing employee performance records. Oh, use \"√\" as mark on characters.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Employee Performance Evaluation Summary.xlsx",
+            "url": "https://drive.google.com/uc?id=1uOzi66bzO_WUnoS4Oqsodrd7_YPLatEk&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\review_template.pdf",
+            "url": "https://drive.google.com/uc?id=1YJ4RPGFUuS48tBh31gBerA16JSMw498w&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\Employee Performance Evaluation Summary.xlsx"
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 2
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\review_template.pdf"
+      }
+    }
+  ],
+  "trajectory": "trajectories/185f29bd-5da0-40a6-b69c-ba7f4e0324ef",
+  "related_apps": [
+    "libreoffice_calc",
+    "os",
+    "pdf"
+  ],
+  "evaluator": {
+    "func": "compare_pdfs",
+    "result": {
+      "type": "cloud_file",
+      "path": [
+        "https://drive.google.com/uc?id=1kZM90nA1krRmV9ug5_BBe8VlrZRVLiLK&export=download",
+        "https://drive.google.com/uc?id=1zyLzYYThwyit9ciXpfNfPFlBomolOauY&export=download",
+        "https://drive.google.com/uc?id=1gMT7JBftuymajMAO5rwksORpeVq3uGmH&export=download",
+        "https://drive.google.com/uc?id=1x0DdtUSZyBifl1tGIWlWKn255WusJeR4&export=download",
+        "https://drive.google.com/uc?id=1UAcG32WO8XCXElcanjGwbSpJwFuyOkts&export=download",
+        "https://drive.google.com/uc?id=1PRgryg7Y5evKnDG2LPtAttVp9qAf5VyZ&export=download",
+        "https://drive.google.com/uc?id=1JxEDriCS2W7BQLdkIgxu_WFCRa9ib4D7&export=download"
+      ],
+      "dest": [
+        "Alex Lee_Gold.pdf",
+        "David Wilson_Gold.pdf",
+        "Emily Johnson_Gold.pdf",
+        "John Doe_Gold.pdf",
+        "Linda Green_Gold.pdf",
+        "Michael Brown_Gold.pdf",
+        "Sophia Carter_Gold.pdf"
+      ],
+      "multi": true,
+      "gives": [0,1,2,3,4,5,6]
+    },
+    "expected": {
+      "type": "vm_file",
+      "path": [
+        "C:\\Users\\chenj\\Desktop\\Alex Lee.pdf",
+        "C:\\Users\\chenj\\Desktop\\David Wilson.pdf",
+        "C:\\Users\\chenj\\Desktop\\Emily Johnson.pdf",
+        "C:\\Users\\chenj\\Desktop\\John Doe.pdf",
+        "C:\\Users\\chenj\\Desktop\\Linda Green.pdf",
+        "C:\\Users\\chenj\\Desktop\\Michael Brown.pdf",
+        "C:\\Users\\chenj\\Desktop\\Sophia Carter.pdf"
+      ],
+      "dest": [
+        "Alex Lee.pdf",
+        "David Wilson.pdf",
+        "Emily Johnson.pdf",
+        "John Doe.pdf",
+        "Linda Green.pdf",
+        "Michael Brown.pdf",
+        "Sophia Carter.pdf"
+      ],
+      "multi": true,
+      "gives": [0,1,2,3,4,5,6]
+    }
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json b/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json
new file mode 100644
index 0000000..6e49735
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json
@@ -0,0 +1,85 @@
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammer test 3.docx",
+            "url": "https://drive.google.com/uc?id=1QgyQWVOcAJuPaSlrywb9nuFiQDySsTb2&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Answer.docx",
+            "url": "https://drive.google.com/uc?id=1BC2DuWJuZggmf6fXl6Ys9xQMZzU6a1br&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules02.pdf",
+            "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules01.pdf",
+            "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\fragrules.pdf",
+            "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\csfsrules.pdf",
+            "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Public Lecture Teaching Plan.docx",
+            "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+            "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download"
+          }
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/1f18aa87-af6f-41ef-9853-cdb8f32ebdea",
+  "related_apps": [
+    "os",
+    "libreoffice_writer"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "Answer - Word",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      }
+    ],
+    "func": "compare_docx_files",
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1TOMGWC3OFuP6yEGQuRJMEFWdg2NcBPSs&export=download",
+      "dest": "Answer gold.docx"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\Answer.docx",
+      "dest": "Answer.docx"
+    },
+    "options": {
+      "ignore_case": true,
+      "ignore_blanks": true
+    }
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json b/evaluation_examples/examples/Windows/multi_app/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json
new file mode 100644
index 0000000..8e5b3d7
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/26660ad1-6ebb-4f59-8cba-a8432dfe8d38.json
@@ -0,0 +1,68 @@
+{
+    "id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
+    "snapshot": "multiapps",
+    "instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to Documents\\Test\\Speed (if the dir does not exist, create it).",
+    "source": "https://www.speedtest.net/",
+    "config": [
+        {
+            "type": "launch",
+            "parameters": {
+                "command": [
+					"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+                    "--remote-debugging-port=1337"
+                ]
+            }
+        },
+        {
+            "type": "launch",
+            "parameters": {
+              "command": [
+				"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+				"--sh-exec", "ncat.exe 127.0.0.1 1337"
+              ]
+            }            
+        },
+        {
+            "type": "chrome_open_tabs",
+            "parameters": {
+              "urls_to_open": [
+                "https://www.speedtest.net/"
+              ]
+            }
+          },
+          {
+            "type": "activate_window",
+            "parameters": {
+              "window_name": "Google Chrome"
+            }
+          },
+          {
+            "type": "execute",
+            "parameters": {
+              "command": [
+                "python",
+                "-c",
+                "import pyautogui; import time; time.sleep(0.5);"
+              ]
+            }
+          }
+    ],
+    "trajectory": "trajectories/",
+    "related_apps":[
+        "os",
+        "browser"
+    ],
+    "evaluator":{
+        "func": "compare_time_in_speedtest_results",
+        "result":{
+            "type": "vm_file",
+            "path": "C:\\Users\\chenj\\Documents\\Test\\Speed\\Speedtest Results Export-.csv",
+            "dest": "Speedtest Results Export-.csv",
+            "time_suffix": true
+        },
+        "expected":{
+            "type": "time_diff_range",
+            "diff_range_in_minutes": "60"
+        }
+    }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json b/evaluation_examples/examples/Windows/multi_app/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json
new file mode 100644
index 0000000..a4afcf9
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json
@@ -0,0 +1,127 @@
+{
+  "id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. Making the necessary adjustments if it turns out that the current formatting does not align with APA 7 standards or exists some errors.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "command",
+      "parameters": {
+        "command": "mkdir \"C:\\Users\\chenj\\Desktop\\students work\" \"C:\\Users\\chenj\\Desktop\\Lec powerpoint\" \"C:\\Users\\chenj\\Desktop\\Grammar test\" \"C:\\Users\\chenj\\Desktop\\Grammar rules PDF\" C:\\Users\\chenj\\Desktop\\FDI",
+		"shell": true
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\Zheng He .docx",
+            "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\The literature reviews of weekly readings.docx",
+            "url": "https://drive.google.com/uc?id=18zoZCNtP-wTkxXp2FhH3O_NdLZKVMPIr&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\The British Justice System.docx",
+            "url": "https://drive.google.com/uc?id=1z3YHSN4CvC5kN1AwTWB_-plRS4p5GAch&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\quiz2.docx",
+            "url": "https://drive.google.com/uc?id=1R5Bii_kvnv_fZVXV-6DMt6Hgq-1gXMo1&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\quiz.docx",
+            "url": "https://drive.google.com/uc?id=1PvlGMVX7YkricrjoPRe0e5VQlHeozRPD&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\Q1&2&3.docx",
+            "url": "https://drive.google.com/uc?id=1kLQ3lnba6p9lqikHhKDdbqrYagHnZWU_&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\Photo Ethics in Journalism.docx",
+            "url": "https://drive.google.com/uc?id=1V6nG6HP_9Kb5KBCRTpaGsRTdPxnJSmRm&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\cassie.docx",
+            "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\case study.docx",
+            "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules02.pdf",
+            "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules01.pdf",
+            "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\fragrules.pdf",
+            "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\csfsrules.pdf",
+            "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Public Lecture Teaching Plan.docx",
+            "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+            "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download"
+          }
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e",
+  "related_apps": [
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "case study - Word",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      }
+    ],
+    "func": "compare_references",
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1325Qfch0JaJ_wJ20ICxMoHeW8KLpK8v0&export=download",
+      "dest": "case study gold.docx"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\students work\\case study.docx",
+      "dest": "case study.docx"
+    },
+    "options": {
+      "content_only": true,
+      "reference_base_result": 0.92
+    }
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/3a93cae4-ad3e-403e-8c12-65303b271818.json b/evaluation_examples/examples/Windows/multi_app/3a93cae4-ad3e-403e-8c12-65303b271818.json
new file mode 100644
index 0000000..1b77444
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/3a93cae4-ad3e-403e-8c12-65303b271818.json
@@ -0,0 +1,162 @@
+{
+  "id": "3a93cae4-ad3e-403e-8c12-65303b271818",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!",
+  "source": "authors",
+  "config": [
+    {
+      "type": "command",
+      "parameters": {
+        "command": "mkdir \"C:\\Users\\chenj\\Desktop\\students work\" \"C:\\Users\\chenj\\Desktop\\Lec powerpoint\" \"C:\\Users\\chenj\\Desktop\\Grammar test\" \"C:\\Users\\chenj\\Desktop\\Grammar rules PDF\" C:\\Users\\chenj\\Desktop\\FDI",
+		"shell": true
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\Zheng He .docx",
+            "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\cassie.docx",
+            "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\students work\\case study.docx",
+            "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules02.pdf",
+            "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\irregularrules01.pdf",
+            "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\fragrules.pdf",
+            "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammar rules PDF\\csfsrules.pdf",
+            "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Public Lecture Teaching Plan.docx",
+            "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+            "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download"
+          }
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818",
+  "related_apps": [
+    "os",
+    "libreoffice_calc"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "Course Timetable - Excel",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      }
+    ],
+    "func": [
+      "compare_table",
+      "compare_table",
+      "compare_table"
+    ],
+    "result": [
+      {
+        "type": "vm_file",
+        "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+        "dest": "Course Timetable.xlsx"
+      },
+      {
+        "type": "vm_file",
+        "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+        "dest": "Course Timetable.xlsx"
+      },
+      {
+        "type": "vm_file",
+        "path": "C:\\Users\\chenj\\Desktop\\Course Timetable.xlsx",
+        "dest": "Course Timetable.xlsx"
+      }
+    ],
+    "expected": [
+      {
+        "type": "cloud_file",
+        "path": "https://drive.google.com/uc?id=1VMOon8byWuoCW2Uk5etGMJLMzAfwFVyB&export=download",
+        "dest": "Course Timetable gold.xlsx"
+      },
+      {
+        "type": "cloud_file",
+        "path": "https://drive.google.com/uc?id=1jAThiIqILZ5t-RFPHVniSvAL8ZJO1H3P&export=download",
+        "dest": "Course Timetable gold 2.xlsx"
+      },
+      {
+        "type": "cloud_file",
+        "path": "https://drive.google.com/uc?id=1U0THDtPCgsw-Rb0N9fjF8DeOepPeUajP&export=download",
+        "dest": "Course Timetable gold 3.xlsx"
+      }
+    ],
+    "options": [
+      {
+        "rules": [
+          {
+            "type": "sheet_data",
+            "sheet_idx0": "RNSheet1",
+            "sheet_idx1": "ENSheet1",
+            "ignore_case": true
+          }
+        ]
+      },
+      {
+        "rules": [
+          {
+            "type": "sheet_data",
+            "sheet_idx0": "RNSheet1",
+            "sheet_idx1": "ENSheet1",
+            "ignore_case": true
+          }
+        ]
+      },
+      {
+        "rules": [
+          {
+            "type": "sheet_data",
+            "sheet_idx0": "RNSheet1",
+            "sheet_idx1": "ENSheet1",
+            "ignore_case": true
+          }
+        ]
+      }
+    ],
+    "conj": "or"
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
index 2106e9e..ff087fe 100644
--- a/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
+++ b/evaluation_examples/examples/Windows/multi_app/46407397-a7d5-4c6b-92c6-dbe038b1457b.json
@@ -74,14 +74,14 @@
 		{
 			"type": "execute",
 			"parameters": {
-				"command": "rm -r C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"command": "rd /s /q C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
 				"shell": true
 			}
 		},
 		{
 			"type": "execute",
 			"parameters": {
-				"command": "mv C:\\Users\\chenj\\Thunderbird C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"command": "move C:\\Users\\chenj\\Thunderbird C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
 				"shell": true
 			}
 		},
diff --git a/evaluation_examples/examples/Windows/multi_app/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json b/evaluation_examples/examples/Windows/multi_app/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
new file mode 100644
index 0000000..7268754
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
@@ -0,0 +1,72 @@
+{
+	"id": "6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a",
+	"snapshot": "multiapps",
+	"instruction": "I now want to count the meeting cities of the three machine learning conferences in the past ten years from 2013 to 2019(including 2013 and 2019). I have listed the names and years of the conferences in excel. Please fill in the vacant locations.",
+	"source": "author",
+	"config": [
+		{
+			"type": "download",
+			"parameters": {
+				"files": [
+					{
+						"url": "https://drive.google.com/uc?export=download&id=19wUxTQeoKr6ihJWJ_9cu2tzKQH0cnxWH",
+						"path": "C:\\Users\\chenj\\Desktop\\ConferenceCity.xlsx"
+					}
+				]
+			}
+		},
+		{
+			"type": "open",
+			"parameters": {
+				"path": "C:\\Users\\chenj\\Desktop\\ConferenceCity.xlsx"
+			}
+		}
+	],
+	"trajectory": "trajectories/",
+	"related_apps": [
+		"calc", "chrome", "os"
+	],
+	"evaluator": {
+		"postconfig":[
+			{
+				"type": "download",
+				"parameters": {
+					"files": [
+						{
+							"url": "https://drive.google.com/uc?export=download&id=1ZcITkIOs2Z86S5L6MShSohFs3_xVfeCP",
+							"path": "C:\\Users\\chenj\\Desktop\\ConferenceCity_Gold.xlsx"
+						}
+					]
+				}
+			},
+			{
+				"type": "activate_window",
+				"parameters": {
+					"window_name": "ConferenceCity - Excel"
+				}
+			},
+			{
+				"type": "execute",
+				"parameters": {
+					"command": [
+						"python",
+						"-c",
+						"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+					]
+				}
+			}
+		],
+		"func": "compare_conference_city_in_order",
+		"expected": {
+			"type": "rule",
+			"rules":{
+				"expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
+			}
+		},
+		"result": {
+			"type": "vm_file",
+			"path": "C:\\Users\\chenj\\Desktop\\ConferenceCity.xlsx",
+			"dest": "ConferenceCity.xlsx"
+		}
+	}
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json b/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
index abe1425..51a0ae4 100644
--- a/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
+++ b/evaluation_examples/examples/Windows/multi_app/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json
@@ -1,14 +1,14 @@
 {
 	"id": "74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
 	"snapshot": "chrome",
-	"instruction": "Help me to set up an initial web extension project with help of the web tool, tagging it \"happy-extension v0.0.1\". Leave description blank for now. Include a background script and browser action, while other features are not required. Remember to unzip the auto-generated folder into \"~/Projects\".",
+	"instruction": "Help me to set up an initial web extension project with help of the web tool, tagging it \"happy-extension v0.0.1\". Leave description blank for now. Include a background script and browser action, while other features are not required. Remember to unzip the auto-generated folder into \"Documents\\Projects\".",
 	"source": "authors",
 	"config": [
 		{
 			"type": "launch",
 			"parameters": {
 				"command": [
-					"google-chrome",
+					"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
 					"--remote-debugging-port=1337"
 				]
 			}
@@ -17,9 +17,8 @@
 			"type": "launch",
 			"parameters": {
 				"command": [
-					"socat",
-					"tcp-listen:9222,fork",
-					"tcp:localhost:1337"
+					"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+					"--sh-exec", "ncat.exe 127.0.0.1 1337"
 				]
 			}
 		},
@@ -34,19 +33,16 @@
 		{
 			"type": "execute",
 			"parameters": {
-				"command": [
-					"mkdir",
-					"-p",
-					"/home/user/Projects"
-				]
+				"command": "mkdir C:\\Users\\chenj\\Documents\\Projects",
+				"shell": "true"
 			}
 		},
 		{
 			"type": "launch",
 			"parameters": {
 				"command": [
-					"nautilus",
-					"/home/user/Projects"
+					"explorer.exe",
+					"C:\\Users\\chenj\\Documents\\Projects"
 				]
 			}
 		}
@@ -67,27 +63,27 @@
 		"result": [
 			{
 				"type": "vm_file",
-				"path": "/home/user/Projects/happy-extension/manifest.json",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\manifest.json",
 				"dest": "manifest.json"
 			},
 			{
 				"type": "vm_file",
-				"path": "/home/user/Projects/happy-extension/background_script.js",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\background_script.js",
 				"dest": "background_script.js"
 			},
 			{
 				"type": "vm_file",
-				"path": "/home/user/Projects/happy-extension/browserAction/index.html",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\browserAction\\index.html",
 				"dest": "index.html"
 			},
 			{
 				"type": "vm_file",
-				"path": "/home/user/Projects/happy-extension/browserAction/style.css",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\browserAction\\style.css",
 				"dest": "style.css"
 			},
 			{
 				"type": "vm_file",
-				"path": "/home/user/Projects/happy-extension/browserAction/script.js",
+				"path": "C:\\Users\\chenj\\Documents\\Projects\\happy-extension\\browserAction\\script.js",
 				"dest": "script.js"
 			}
 		],
diff --git a/evaluation_examples/examples/Windows/multi_app/8e116af7-7db7-4e35-a68b-b0939c066c78.json b/evaluation_examples/examples/Windows/multi_app/8e116af7-7db7-4e35-a68b-b0939c066c78.json
new file mode 100644
index 0000000..8dba229
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/8e116af7-7db7-4e35-a68b-b0939c066c78.json
@@ -0,0 +1,241 @@
+{
+  "id": "8e116af7-7db7-4e35-a68b-b0939c066c78",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Please update my bookkeeping sheet with the recent transactions from the provided folder, detailing my expenses over the past few days.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\my_bookkeeping.xlsx",
+            "url": "https://drive.google.com/uc?id=1QOSpTZPFzFZeC0tng4Gfws544LFln836&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_0.jpeg",
+            "url": "https://drive.google.com/uc?id=1b0BRc-BzXObVCUEonJfRbDsrgxZugj3U&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_1.jpg",
+            "url": "https://drive.google.com/uc?id=1S-JBDqwEf7Z_JXDItK_F4BOHgScTjlyN&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_2.jpg",
+            "url": "https://drive.google.com/uc?id=1Ys2abZi9_0y8sxuj2vCbC0OhjC6YdrC-&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_3.pdf",
+            "url": "https://drive.google.com/uc?id=1sKvBbGDpmUkv891xTqX7w5dtEvchQahd&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\receipt_4.jpg",
+            "url": "https://drive.google.com/uc?id=1kW7xH5bc2jRaKGDKHDrgSehTrPgkyzkc&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\my_bookkeeping.xlsx"
+      }
+    }
+  ],
+  "trajectory": "trajectories/8e116af7-7db7-4e35-a68b-b0939c066c78",
+  "related_apps": [
+    "libreoffice_calc",
+    "os",
+    "image",
+    "pdf"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "my_bookkeeping - Excel",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 1.0
+        }
+      }
+    ],
+    "func": "compare_table",
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\my_bookkeeping.xlsx",
+      "dest": "my_bookkeeping.xlsx"
+    },
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1ygEDdVlkf2ZyqMxJ_ktqo9G_g--rc6co&export=download",
+      "dest": "my_bookkeeping_gold.xlsx"
+    },
+    "options": {
+      "rules": [
+        {
+          "type": "sheet_fuzzy",
+          "sheet_idx0": "RNSheet1",
+          "sheet_idx1": "ENSheet1",
+          "rules": [
+            {
+              "range": [
+                "A1:A8",
+                "B1:B8",
+                "C1:C8",
+                "D1:D8",
+                "E1:E8"
+              ],
+              "type": "exact_match"
+            }
+          ]
+        },
+        {
+          "type": "sheet_fuzzy",
+          "sheet_idx0": "RNSheet1",
+          "sheet_idx1": "ENSheet1",
+          "rules": [
+            {
+              "range": [
+                "C9:C13"
+              ],
+              "type": "exact_match",
+              "ignore_case": true
+            }
+          ]
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D9",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -186.93
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D10",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3670
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D11",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -5.7
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D12",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -154.06
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "D13",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -8.1
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E9",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": 603.07
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E10",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3066.93
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E11",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3072.63
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E12",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3226.69
+            }
+          }
+        },
+        {
+          "type": "check_cell",
+          "sheet_idx": 0,
+          "coordinate": "E13",
+          "props": {
+            "value": {
+              "method": "approx:0.1",
+              "ref": -3234.79
+            }
+          }
+        }
+      ]
+    }
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json b/evaluation_examples/examples/Windows/multi_app/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json
new file mode 100644
index 0000000..86ff424
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json
@@ -0,0 +1,91 @@
+{
+  "id": "a82b78bb-7fde-4cb3-94a4-035baf10bcf0",
+  "snapshot": "libreoffice_calc",
+  "instruction": "I'm really enjoying this paper. Could you please locate the personal webpages of the initial author and the last three authors? Please include them in a browser bookmark folder titled 'Liked Authors.'",
+  "source": "authors",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+		  "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+		  "ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+		  "--sh-exec", "ncat.exe 127.0.0.1 1337"
+        ]
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 2
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1IlJ8kU5MlR6OqJHchsSUJzLCmcrG-8N7&export=download&authuser=0&confirm=t&uuid=d2a1810f-edea-4bfd-9d79-e668b9f11876&at=APZUnTVv_eqtC86YzkEU8_jIhC9W:1709522229162",
+            "path": "C:\\Users\\chenj\\Desktop\\2206.08853.pdf"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\2206.08853.pdf"
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 2
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+          "python",
+          "-c",
+          "import pyautogui; import time; pyautogui.hotkey('f11'); time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.scroll(-20)"
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/a82b78bb-7fde-4cb3-94a4-035baf10bcf0",
+  "related_apps": [
+    "chrome",
+    "pdf"
+  ],
+  "evaluator": {
+    "func": "is_expected_bookmarks",
+    "result": {
+      "type": "bookmarks"
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "type": "liked_authors_websites_urls",
+        "names": [
+          "Liked Authors"
+        ],
+        "urls": [
+          ["https://jimfan.me/", "https://research.nvidia.com/person/linxi-jim-fan"],
+          ["https://research.nvidia.com/person/de-an-huang", "https://ai.stanford.edu/~dahuang/"],
+          ["https://yukezhu.me/", "https://www.cs.utexas.edu/people/faculty-researchers/yuke-zhu", "https://experts.utexas.edu/yuke_zhu", "https://research.nvidia.com/person/yuke-zhu"],
+          ["http://tensorlab.cms.caltech.edu/users/anima/", "https://www.eas.caltech.edu/people/anima"]
+        ]
+      }
+    }
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json b/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
index c869428..b4ae4ae 100644
--- a/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
+++ b/evaluation_examples/examples/Windows/multi_app/b5062e3e-641c-4e3a-907b-ac864d2e7652.json
@@ -1,17 +1,14 @@
 {
 	"id": "b5062e3e-641c-4e3a-907b-ac864d2e7652",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Please help me to extract the name, e-mail, and affiliation of the first author from each paper in the folder and organize them in an Excel table. Include headers for each field. Sort the authors by their full names alphabetically and save the table as \"~/authors.xlsx\".",
+	"instruction": "Please help me to extract the name, e-mail, and affiliation of the first author from each paper in the folder and organize them in an Excel table. Include headers for each field. Sort the authors by their full names alphabetically and save the table as \"Documents\\authors.xlsx\".",
 	"source": "authors",
 	"config": [
 		{
 			"type": "command",
 			"parameters": {
-				"command": [
-					"mkdir",
-					"-p",
-					"/home/user/Documents/Papers"
-				]
+				"command": "mkdir C:\\Users\\chenj\\Documents\\Papers",
+				"shell": true
 			}
 		},
 		{
@@ -19,19 +16,19 @@
 			"parameters": {
 				"files": [
 					{
-						"path": "/home/user/Documents/Papers/zhang_appagent.pdf",
+						"path": "C:\\Users\\chenj\\Documents\\Papers\\zhang_appagent.pdf",
 						"url": "https://arxiv.org/pdf/2312.13771.pdf"
 					},
 					{
-						"path": "/home/user/Documents/Papers/niu_screenagent.pdf",
+						"path": "C:\\Users\\chenj\\Documents\\Papers\\niu_screenagent.pdf",
 						"url": "https://arxiv.org/pdf/2402.07945.pdf"
 					},
 					{
-						"path": "/home/user/Documents/Papers/koh_visualwebarena.pdf",
+						"path": "C:\\Users\\chenj\\Documents\\Papers\\koh_visualwebarena.pdf",
 						"url": "https://arxiv.org/pdf/2401.13649.pdf"
 					},
 					{
-						"path": "/home/user/Documents/Papers/deng_mind2web.pdf",
+						"path": "C:\\Users\\chenj\\Documents\\Papers\\deng_mind2web.pdf",
 						"url": "https://papers.nips.cc/paper_files/paper/2023/file/5950bf290a1570ea401bf98882128160-Paper-Datasets_and_Benchmarks.pdf"
 					}
 				]
@@ -41,8 +38,8 @@
 			"type": "launch",
 			"parameters": {
 				"command": [
-					"nautilus",
-					"/home/user/Documents/Papers"
+					"explorer.exe",
+					"C:\\Users\\chenj\\Documents\\Papers"
 				]
 			}
 		}
@@ -53,25 +50,10 @@
 		"os"
 	],
 	"evaluator": {
-		"postconfig": [
-			{
-				"type": "execute",
-				"parameters": {
-					"command": [
-						"libreoffice",
-						"--convert-to",
-						"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
-						"--outdir",
-						"/home/user",
-						"/home/user/authors.xlsx"
-					]
-				}
-			}
-		],
 		"func": "compare_table",
 		"result": {
 			"type": "vm_file",
-			"path": "/home/user/authors.xlsx",
+			"path": "C:\\Users\\chenj\\authors.xlsx",
 			"dest": "authors.xlsx"
 		},
 		"expected": {
diff --git a/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json b/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
index 72d43a0..0fd69f8 100644
--- a/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
+++ b/evaluation_examples/examples/Windows/multi_app/c867c42d-a52d-4a24-8ae3-f75d256b5618.json
@@ -35,14 +35,14 @@
 		{
 			"type": "execute",
 			"parameters": {
-				"command": "rm -r C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"command": "rd /s /q C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
 				"shell": true
 			}
 		},
 		{
 			"type": "execute",
 			"parameters": {
-				"command": "mv C:\\Users\\chenj\\Thunderbird C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
+				"command": "move C:\\Users\\chenj\\Thunderbird C:\\Users\\chenj\\AppData\\Roaming\\Thunderbird",
 				"shell": true
 			}
 		},
diff --git a/evaluation_examples/examples/Windows/multi_app/d1acdb87-bb67-4f30-84aa-990e56a09c92.json b/evaluation_examples/examples/Windows/multi_app/d1acdb87-bb67-4f30-84aa-990e56a09c92.json
new file mode 100644
index 0000000..0a671aa
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/d1acdb87-bb67-4f30-84aa-990e56a09c92.json
@@ -0,0 +1,128 @@
+{
+  "id": "d1acdb87-bb67-4f30-84aa-990e56a09c92",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Hello! I'm eagerly planning a culinary adventure to Hong Kong and have curated a list of must-visit restaurants that I've been longing to explore. However, I could use some assistance in compiling a few essential details about these establishments. Would you be so kind as to help me out? It would be fantastic if you could search for these restaurants on Google Maps. I'm particularly interested in obtaining their addresses, any available websites, and contact phone numbers. If you could gather this information and input it into my form file, I would be immensely grateful. Many thanks in advance!",
+  "source": "authors",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\restaurants.txt",
+            "url": "https://drive.google.com/uc?id=1IehFLJPZcFv8Ujk31ExbyGLji9AylmmJ&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\MUST_VISIT.xlsx",
+            "url": "https://drive.google.com/uc?id=1fXmjvZcwkIcckMIAXi3Hv_JAbVWpgs_l&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\MUST_VISIT.xlsx"
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\restaurants.txt"
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 5
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "restaurants.txt"
+      }
+    }
+  ],
+  "trajectory": "trajectories/d1acdb87-bb67-4f30-84aa-990e56a09c92",
+  "related_apps": [
+    "os",
+    "chrome",
+    "libreoffice_calc"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "MUST_VISIT - Excel",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+						"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 1.0
+        }
+      }
+    ],
+    "func": "compare_table",
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\MUST_VISIT.xlsx",
+      "dest": "MUST_VISIT.xlsx"
+    },
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download",
+      "dest": "MUST_VISIT-gt.xlsx"
+    },
+    "options": {
+      "rules": [
+        {
+          "type": "sheet_fuzzy",
+          "sheet_idx0": "RNSheet1",
+          "sheet_idx1": "ENSheet1",
+          "rules": [
+            {
+              "range": ["A1:A6", "D1:D6"],
+              "type": "exact_match"
+            },
+            {
+              "range": ["B1:B6"],
+              "type": "fuzzy_match",
+              "threshold": 85,
+              "normalization": [
+                ["Rd", "Road"],
+                ["St", "Street"]
+              ],
+              "ignore_case": true
+            },
+            {
+              "range": ["C1:C6"],
+              "type": "includes",
+              "trim_leadings": "+ ",
+              "ignore_chars": " ()-"
+            }
+          ]
+        }
+      ]
+    }
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json b/evaluation_examples/examples/Windows/multi_app/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json
new file mode 100644
index 0000000..22de11c
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/deec51c9-3b1e-4b9e-993c-4776f20e8bb2.json
@@ -0,0 +1,101 @@
+{
+  "id": "deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
+  "snapshot": "libreoffice_calc",
+  "instruction": "Find a paper list of all the new foundation language models issued on 11st Oct. 2023 via arxiv daily, and organize it into the sheet I opened.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\New Large Language Models.xlsx",
+            "url": "https://drive.google.com/uc?id=1NJFAUDzatd5TbBqXeCy3-ok4BWj-xayT&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "C:\\Users\\chenj\\Desktop\\New Large Language Models.xlsx"
+      }
+    }
+  ],
+  "trajectory": "trajectories/deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
+  "related_apps": [
+    "libreoffice_calc",
+    "chrome",
+    "os"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "activate_window",
+        "parameters": {
+          "window_name": "New Large Language Models - Excel",
+          "strict": true
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 0.5
+        }
+      },
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+			"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+          ]
+        }
+      },
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 1.0
+        }
+      }
+    ],
+    "func": "compare_table",
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Desktop\\New Large Language Models.xlsx",
+      "dest": "New Large Language Models.xlsx"
+    },
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.google.com/uc?id=1BHOyjFo72b74YKWTqPMaoNvCzICkos-G&export=download",
+      "dest": "New Large Language Models Gold.xlsx"
+    },
+    "options": {
+      "rules": [
+        {
+          "type": "sheet_fuzzy",
+          "sheet_idx0": "RNSheet1",
+          "sheet_idx1": "ENSheet1",
+          "rules": [
+            {
+              "range": [
+                "B2:B5",
+                "C2:C5"
+              ],
+              "type": "exact_match"
+            },
+            {
+              "range": [
+                "A2:A5"
+              ],
+              "type": "fuzzy_match",
+              "threshold": 90,
+              "ignore_case": true
+            }
+          ]
+        }
+      ]
+    }
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/e2392362-125e-4f76-a2ee-524b183a3412.json b/evaluation_examples/examples/Windows/multi_app/e2392362-125e-4f76-a2ee-524b183a3412.json
new file mode 100644
index 0000000..a770c64
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/e2392362-125e-4f76-a2ee-524b183a3412.json
@@ -0,0 +1,121 @@
+{
+  "id": "e2392362-125e-4f76-a2ee-524b183a3412",
+  "snapshot": "chrome",
+  "instruction": "I recently started using the famous personal academic homepage template from academicpages.github.io to build my own personal homepage, and I have cloned it to my local Documents\\Code\\Website folder. According to an online tutorial, I can configure my name and contact information in the _config.yaml file. However, I am not familiar with the YAML file format. Please help me find the sections related to the name and contact information in this file and change them to \"Test Account\" and \"Test@gmail.com\".",
+  "source": "authors",
+  "config": [
+    {
+      "type": "command",
+      "parameters": {
+        "command": "mkdir C:\\Users\\chenj\\Documents\\Code\\Website",
+        "shell": true
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\.tmp.7z",
+            "url": "https://drive.google.com/uc?id=1LYc6rBSuCNBtTQIg-m9zP6KmlEB_Zfdo&export=download"
+          }
+        ]
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+					"C:\\Program Files\\7-Zip\\7z.exe",
+          "x", "-oC:\\Users\\chenj\\Documents\\Code\\Website",
+          "C:\\Users\\chenj\\.tmp.7z"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+					"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+					"ncat.exe", "-k", "-l", "0.0.0.0", "9222",
+					"--sh-exec", "ncat.exe 127.0.0.1 1337"
+				]
+      }
+    },
+    {
+      "type": "chrome_open_tabs",
+      "parameters": {
+        "urls_to_open": [
+          "https://academicpages.github.io/"
+        ]
+      }
+    }
+  ],
+  "trajectory": "trajectories/e2392362-125e-4f76-a2ee-524b183a3412",
+  "related_apps": [
+    "chrome",
+    "os",
+    "vscode"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "execute",
+        "parameters": {
+          "command": [
+            "python",
+            "-c",
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);"
+          ]
+        }
+      }
+    ],
+    "func": "check_json",
+    "options": {
+      "is_yaml": true
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": [
+          {
+            "key": [
+              "name"
+            ],
+            "method": "eq",
+            "ref": "Test Account"
+          },
+          {
+            "key": [
+              "author",
+              "name"
+            ],
+            "method": "eq",
+            "ref": "Test Account"
+          },
+          {
+            "key": [
+              "author",
+              "email"
+            ],
+            "method": "eq",
+            "ref": "Test@gmail.com"
+          }
+        ]
+      }
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "C:\\Users\\chenj\\Documents\\Code\\Website\\academicpages.github.io\\_config.yml",
+      "dest": "_config.yaml"
+    }
+  }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/eb303e01-261e-4972-8c07-c9b4e7a4922a.json b/evaluation_examples/examples/Windows/multi_app/eb303e01-261e-4972-8c07-c9b4e7a4922a.json
new file mode 100644
index 0000000..801293e
--- /dev/null
+++ b/evaluation_examples/examples/Windows/multi_app/eb303e01-261e-4972-8c07-c9b4e7a4922a.json
@@ -0,0 +1,82 @@
+{
+    "id": "eb303e01-261e-4972-8c07-c9b4e7a4922a",
+    "snapshot": "libreoffice_impress",
+    "instruction": "Tomorrow, I'm scheduled to deliver a talk, and my PowerPoint slides and speaking notes are saved on the desktop. Help me insert my planned remarks for each slide into the \"note\" section of the PowerPoint as a reminder. I've completed this task for some slides; assist me in completing the remaining part.",
+    "source": "authors",
+    "config": [
+        {
+            "type": "download",
+            "parameters": {
+                "files": [
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1MdgN8ECxzLgHgjq8eKqrNQt3MPDjnKwa&export=download&authuser=0&confirm=t&uuid=ed5c37db-c565-4ca1-bbd1-bbdba13e9306&at=APZUnTUNi8YTLlZqMZ0r--bBpBEG:1709449877819",
+                        "path": "C:\\Users\\chenj\\Desktop\\lecture1-2021-with-ink.pptx"
+                    },
+                    {
+                        "url": "https://drive.usercontent.google.com/download?id=1FkPOcsWpsjUXSUld1NblwyVzcsE19uIe&export=download&authuser=0&confirm=t&uuid=27501bc0-732b-4ff7-abf4-a52427aea264&at=APZUnTWleaafIVF2iZkiuHo0vQ66:1709449873140",
+                        "path": "C:\\Users\\chenj\\Desktop\\notes.docx"
+                    }
+                ]
+            }
+        },
+        {
+            "type": "open",
+            "parameters": {
+                "path": "C:\\Users\\chenj\\Desktop\\lecture1-2021-with-ink.pptx"
+            }
+        }
+    ],
+    "trajectory": "trajectories/eb303e01-261e-4972-8c07-c9b4e7a4922a",
+    "related_apps": [
+        "libreoffice_impress",
+        "libreoffice_writer"
+    ],
+    "evaluator": {
+        "postconfig": [
+            {
+                "type": "activate_window",
+                "parameters": {
+                    "window_name": "lecture1-2021-with-ink - PowerPoint",
+                    "strict": true
+                }
+            },
+            {
+                "type": "sleep",
+                "parameters": {
+                    "seconds": 5
+                }
+            },
+            {
+                "type": "execute",
+                "parameters": {
+                    "command": [
+                        "python",
+                        "-c",
+						"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+                    ]
+                }
+            },
+            {
+                "type": "sleep",
+                "parameters": {
+                    "seconds": 0.5
+                }
+            }
+        ],
+        "func": "compare_pptx_files",
+        "expected": {
+            "type": "cloud_file",
+            "path": "https://drive.usercontent.google.com/download?id=18orj_0q6N4w7ijADOJeU5ZkDDw-RdFUl&export=download&authuser=0&confirm=t&uuid=c05d2bce-bccb-4504-8fe4-7c409788d727&at=APZUnTVlCicnIm0cMdJ9FrZg4MSN:1709453015475",
+            "dest": "lecture1-2021-with-ink_Gold.pptx"
+        },
+        "result": {
+            "type": "vm_file",
+            "path": "C:\\Users\\chenj\\Desktop\\lecture1-2021-with-ink.pptx",
+            "dest": "lecture1-2021-with-ink.pptx"
+        },
+        "options": {
+            "examine_shape": false,
+            "examine_bullets": false
+        }
+    }
+}
diff --git a/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json b/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
index 09c8983..2336d7a 100644
--- a/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
+++ b/evaluation_examples/examples/Windows/multi_app/f918266a-b3e0-4914-865d-4faa564f1aef.json
@@ -27,7 +27,7 @@
 		{
 			"type": "execute",
 			"parameters": {
-				"command": "rm C:\\Users\\chenj\\Desktop\\calculator.zip",
+				"command": "del C:\\Users\\chenj\\Desktop\\calculator.zip",
 				"shell": true
 			}
 		}
@@ -46,7 +46,7 @@
         },
         "result": {
             "type": "vm_file",
-            "path": "/home/user/Desktop/log.txt",
+            "path": "C:\\Users\\chenj\\Desktop\\log.txt",
             "dest": "log.txt"
         }
     }
diff --git a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
index 414aab4..cb7d65e 100644
--- a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
+++ b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
@@ -1,50 +1,50 @@
 {
-    "id": "6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a",
-    "snapshot": "multiapps",
-    "instruction": "I now want to count the meeting cities of the three machine learning conferences in the past ten years from 2013 to 2019(including 2013 and 2019). I have listed the names and years of the conferences in excel. Please fill in the vacant locations.",
-    "source": "author",
-    "config": [
-      {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.google.com/uc?export=download&id=19wUxTQeoKr6ihJWJ_9cu2tzKQH0cnxWH",
-                    "path": "/home/user/Desktop/ConferenceCity.xlsx"
-                }
-            ]
-        }
-      },
-      {
-        "type": "open",
-        "parameters": {
-          "path": "/home/user/Desktop/ConferenceCity.xlsx"
-        }
-      }
-    ],
-    "trajectory": "trajectories/",
-    "related_apps": [
-      "calc", "chrome", "os"
-    ],
-    "evaluator": {
-      "postconfig":[      
-        {
-        "type": "download",
-        "parameters": {
-            "files": [
-                {
-                    "url": "https://drive.google.com/uc?export=download&id=1ZcITkIOs2Z86S5L6MShSohFs3_xVfeCP",
-                    "path": "/home/user/Desktop/ConferenceCity_Gold.xlsx"
-                }
-            ]
-        }
-      },
-      {
-        "type": "activate_window",
-        "parameters": {
-          "window_name": "ConferenceCity.xlsx - LibreOffice Calc"
-        }
-      },
+	"id": "6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a",
+	"snapshot": "multiapps",
+	"instruction": "I now want to count the meeting cities of the three machine learning conferences in the past ten years from 2013 to 2019(including 2013 and 2019). I have listed the names and years of the conferences in excel. Please fill in the vacant locations.",
+	"source": "author",
+	"config": [
+		{
+			"type": "download",
+			"parameters": {
+				"files": [
+					{
+						"url": "https://drive.google.com/uc?export=download&id=19wUxTQeoKr6ihJWJ_9cu2tzKQH0cnxWH",
+						"path": "/home/user/Desktop/ConferenceCity.xlsx"
+					}
+				]
+			}
+		},
+		{
+			"type": "open",
+			"parameters": {
+				"path": "/home/user/Desktop/ConferenceCity.xlsx"
+			}
+		}
+	],
+	"trajectory": "trajectories/",
+	"related_apps": [
+		"calc", "chrome", "os"
+	],
+	"evaluator": {
+		"postconfig":[
+			{
+				"type": "download",
+				"parameters": {
+					"files": [
+						{
+							"url": "https://drive.google.com/uc?export=download&id=1ZcITkIOs2Z86S5L6MShSohFs3_xVfeCP",
+							"path": "/home/user/Desktop/ConferenceCity_Gold.xlsx"
+						}
+					]
+				}
+			},
+			{
+				"type": "activate_window",
+				"parameters": {
+					"window_name": "ConferenceCity.xlsx - LibreOffice Calc"
+				}
+			},
 			{
 				"type": "execute",
 				"parameters": {
@@ -55,18 +55,18 @@
 					]
 				}
 			}
-      ],
-      "func": "compare_conference_city_in_order",
-      "expected": {
-        "type": "rule",
-        "rules":{
-          "expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
-        }
-      },
-      "result": {
-        "type": "vm_file",
-        "path": "/home/user/Desktop/ConferenceCity.xlsx",
-        "dest": "ConferenceCity.xlsx"
-      }
-    }
-  }
\ No newline at end of file
+		],
+		"func": "compare_conference_city_in_order",
+		"expected": {
+			"type": "rule",
+			"rules":{
+				"expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
+			}
+		},
+		"result": {
+			"type": "vm_file",
+			"path": "/home/user/Desktop/ConferenceCity.xlsx",
+			"dest": "ConferenceCity.xlsx"
+		}
+	}
+}
diff --git a/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json b/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json
index b591cfd..eb55e68 100644
--- a/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json
+++ b/evaluation_examples/examples/multi_apps/e2392362-125e-4f76-a2ee-524b183a3412.json
@@ -31,7 +31,7 @@
         "command": [
           "tar",
           "-xJvf",
-          ".tmp.tar.xz",
+          "/home/user/.tmp.tar.xz",
           "-C",
           "/home/user/Code/Website/"
         ]
@@ -124,4 +124,4 @@
       "dest": "_config.yaml"
     }
   }
-}
\ No newline at end of file
+}

From 402fcf01d0e3c50db6d5fd7b203bdda32c3f4dd6 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Thu, 21 Mar 2024 15:30:59 +0800
Subject: [PATCH 40/40] ver Mar21stv2

fixed error
---
 .../1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json b/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json
index 6e49735..5bf8042 100644
--- a/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json
+++ b/evaluation_examples/examples/Windows/multi_app/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json
@@ -1,3 +1,27 @@
+{
+  "id": "1f18aa87-af6f-41ef-9853-cdb8f32ebdea",
+  "snapshot": "libreoffice_calc",
+  "instruction": "I've prepared some grammar tests and placed them in the 'Grammar test' folder. I've already provided the multiple-choice answers for Test 1 in the 'answer doc' file. Could you please follow the same format to write out the answers for the remaining two tests in the doc file? This way, I can distribute them to the students as a reference. Thank you.",
+  "source": "authors",
+  "config": [
+    {
+      "type": "command",
+      "parameters": {
+        "command": "mkdir \"C:\\Users\\chenj\\Desktop\\students work\" \"C:\\Users\\chenj\\Desktop\\Lec powerpoint\" \"C:\\Users\\chenj\\Desktop\\Grammar test\" \"C:\\Users\\chenj\\Desktop\\Grammar rules PDF\" C:\\Users\\chenj\\Desktop\\FDI",
+		"shell": true
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammer test 1.docx",
+            "url": "https://drive.google.com/uc?id=1VaXQ9XdzMv079xKFs0Y2XrwdmwFHIvBK&export=download"
+          },
+          {
+            "path": "C:\\Users\\chenj\\Desktop\\Grammer test 2.docx",
+            "url": "https://drive.google.com/uc?id=1k2T88WreTwi-Yyp9mEJnreEQC3DdkJ2x&export=download"
           },
           {
             "path": "C:\\Users\\chenj\\Desktop\\Grammer test 3.docx",