From a1e02c6d57fa0028cc1f900f98ccb735656fb9c7 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Thu, 1 Feb 2024 00:17:41 +0800
Subject: [PATCH 1/2] ver Jan31stv7

fixed an error in thunderbird evaluation
---
 .../thunderbird/030eeff7-b492-4218-b312-701ec99ee0cc.json       | 2 +-
 main.py                                                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation_examples/examples/thunderbird/030eeff7-b492-4218-b312-701ec99ee0cc.json b/evaluation_examples/examples/thunderbird/030eeff7-b492-4218-b312-701ec99ee0cc.json
index a419f69..b169b6b 100644
--- a/evaluation_examples/examples/thunderbird/030eeff7-b492-4218-b312-701ec99ee0cc.json
+++ b/evaluation_examples/examples/thunderbird/030eeff7-b492-4218-b312-701ec99ee0cc.json
@@ -68,7 +68,7 @@
     "expected": {
       "type": "rule",
       "rules": {
-        "unexpect": {
+        "expect": {
           "mail.identity.id1.auto_quote": {
             "method": "eq",
             "ref": false
diff --git a/main.py b/main.py
index ce7ee30..543c851 100644
--- a/main.py
+++ b/main.py
@@ -47,7 +47,7 @@ def human_agent():
     Runs the Gym environment with human input.
     """
 
-    with open("evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json", "r") as f:
+    with open("evaluation_examples/examples/thunderbird/030eeff7-b492-4218-b312-701ec99ee0cc.json", "r") as f:
         example = json.load(f)
     example["snapshot"] = "Snapshot 30"
 

From 5d436a6b66f9230d64875ad8c1886f9ab9ced7ed Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Thu, 1 Feb 2024 11:38:46 +0800
Subject: [PATCH 2/2] ver Feb1st

human evaluation and SoM experiments on Thunderbird
---
 .../06fe7178-4491-4589-810f-2e2bc9502122.json |  2 +-
 .../2ad9387a-65d8-4e33-ad5b-7580065a27ca.json |  2 +-
 experiment_screenshot_som.py                  | 22 ++++++++++++++++---
 main.py                                       |  2 +-
 mm_agents/gpt_4v_agent.py                     | 19 ++++++++++------
 5 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json b/evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json
index 733a831..5a51bc8 100644
--- a/evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json
+++ b/evaluation_examples/examples/thunderbird/06fe7178-4491-4589-810f-2e2bc9502122.json
@@ -1,7 +1,7 @@
 {
   "id": "06fe7178-4491-4589-810f-2e2bc9502122",
   "snapshot": "thunderbird",
-  "instruction": "Could you help me back up all the email files in my profile to ~/emails.bak? Please save them separately in eml format.",
+  "instruction": "Could you help me back up all the email files in my inbox to ~/emails.bak? Please save them separately in eml format.",
   "source": "https://www.quora.com/How-do-I-backup-email-files-in-Mozilla-Thunderbird",
   "config": [
     {
diff --git a/evaluation_examples/examples/thunderbird/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json b/evaluation_examples/examples/thunderbird/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json
index 6b89f31..f8e17ec 100644
--- a/evaluation_examples/examples/thunderbird/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json
+++ b/evaluation_examples/examples/thunderbird/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json
@@ -1,7 +1,7 @@
 {
   "id": "2ad9387a-65d8-4e33-ad5b-7580065a27ca",
   "snapshot": "thunderbird",
-  "instruction": "Create two local folders for me: COMPANY and UNIVERSITY.",
+  "instruction": "Create two local folders in Thunderbird for me: COMPANY and UNIVERSITY.",
   "source": "https://support.mozilla.org/bm/questions/1027435",
   "config": [
     {
diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py
index 24f8fd7..5cddff7 100644
--- a/experiment_screenshot_som.py
+++ b/experiment_screenshot_som.py
@@ -46,7 +46,7 @@ logger = logging.getLogger("desktopenv.experiment")
 #PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
 
 
-PATH_TO_VM = "../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
+PATH_TO_VM = "/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx"
 
 def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
     trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
@@ -198,6 +198,22 @@ if __name__ == '__main__':
               , "aa3a8974-2e85-438b-b29e-a64df44deb4b"
               , "a01fbce3-2793-461f-ab86-43680ccbae25"
               , "4f07fbe9-70de-4927-a4d5-bb28bc12c52c"
+              # 42, ^ calc, v thunderbird
+              , "bb5e4c0d-f964-439c-97b6-bdb9747de3f4"
+              , "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
+              , "12086550-11c0-466b-b367-1d9e75b3910e"
+              , "06fe7178-4491-4589-810f-2e2bc9502122"
+              , "6766f2b8-8a72-417f-a9e5-56fcaa735837"
+              , "e1e75309-3ddb-4d09-92ec-de869c928143"
+              , "3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5"
+              , "35253b65-1c19-4304-8aa4-6884b8218fc0"
+              , "d088f539-cab4-4f9a-ac92-9999fc3a656e"
+              , "2ad9387a-65d8-4e33-ad5b-7580065a27ca"
+              , "480bcfea-d68f-4aaa-a0a9-2589ef319381"
+              , "030eeff7-b492-4218-b312-701ec99ee0cc"
+              , "94760984-3ff5-41ee-8347-cf1af709fea0"
+              , "99146c54-4f37-4ab8-9327-5f3291665e1e"
+              , "c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
               ]
-    for example_id in xx_list[18:]:
-        main("libreoffice_calc", example_id)
+    for example_id in xx_list[42:]:
+        main("thunderbird", example_id)
diff --git a/main.py b/main.py
index 543c851..627ed5f 100644
--- a/main.py
+++ b/main.py
@@ -47,7 +47,7 @@ def human_agent():
     Runs the Gym environment with human input.
     """
 
-    with open("evaluation_examples/examples/thunderbird/030eeff7-b492-4218-b312-701ec99ee0cc.json", "r") as f:
+    with open("evaluation_examples/examples/thunderbird/c9e7eaf2-b1a1-4efc-a982-721972fa9f02.json", "r") as f:
         example = json.load(f)
     example["snapshot"] = "Snapshot 30"
 
diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py
index 0dd15cf..9810eff 100644
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -460,14 +460,18 @@ class GPT4v_Agent:
         with open("messages.json", "w") as f:
             f.write(json.dumps(messages, indent=4))
 
-        response = self.call_llm({
-            "model": self.model,
-            "messages": messages,
-            "max_tokens": self.max_tokens
-        })
+        try:
+            response = self.call_llm({
+                "model": self.model,
+                "messages": messages,
+                "max_tokens": self.max_tokens
+            })
+        except:
+            response = ""
 
         logger.debug("RESPONSE: %s", response)
 
+        # {{{
         if self.exp == "seeact":
             messages.append({
                 "role": "assistant",
@@ -503,7 +507,7 @@ class GPT4v_Agent:
         except Exception as e:
             print("Failed to parse action from response", e)
             actions = None
-            self.thoughts.append("")
+            self.thoughts.append("") # }}}
 
         return actions
 
@@ -516,7 +520,8 @@ class GPT4v_Agent:
         response = requests.post(
             "https://api.openai.com/v1/chat/completions",
             headers=self.headers,
-            json=payload
+            json=payload,
+            timeout=20
         )
 
         if response.status_code != 200: