From be5d55a3f8144a0e7eb224cefdf30157708852b7 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Thu, 1 Feb 2024 14:22:34 +0800
Subject: [PATCH 1/3] ver Feb1stv2

failed to start up experiments of multi_apps
---
 branch_flag                  |  2 +-
 experiment_screenshot_som.py | 27 ++++++++++++++++++++++++---
 mm_agents/gpt_4v_agent.py    |  3 ++-
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/branch_flag b/branch_flag
index 9daeafb..760637d 100644
--- a/branch_flag
+++ b/branch_flag
@@ -1 +1 @@
-test
+exp_som
diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py
index 5cddff7..0fbe534 100644
--- a/experiment_screenshot_som.py
+++ b/experiment_screenshot_som.py
@@ -133,7 +133,7 @@ def main(example_class, example_id):
         example = json.load(f)
     #example["snapshot"] = "exp_v1"
     # example["snapshot"] = "exp_setup4"
-    example["snapshot"] = "Snapshot 30"
+    example["snapshot"] = "Snapshot 34"
 
     logger.info("TASK: %s/%s", example_class, example_id)
 
@@ -214,6 +214,27 @@ if __name__ == '__main__':
               , "94760984-3ff5-41ee-8347-cf1af709fea0"
               , "99146c54-4f37-4ab8-9327-5f3291665e1e"
               , "c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
+              # 57, ^ thunderbird, v multi_apps
+              , "f8cfa149-d1c1-4215-8dac-4a0932bad3c2"
+              , "897e3b53-5d4d-444b-85cb-2cdc8a97d903"
+              , "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc"
+              , "b52b40a5-ad70-4c53-b5b0-5650a8387052"
+              , "46407397-a7d5-4c6b-92c6-dbe038b1457b"
+              , "2b9493d7-49b8-493a-a71b-56cd1f4d6908"
+              , "51f5801c-18b3-4f25-b0c3-02f85507a078"
+              , "2c9fc0de-3ee7-45e1-a5df-c86206ad78b5"
+              , "510f64c8-9bcc-4be1-8d30-638705850618"
+              , "937087b6-f668-4ba6-9110-60682ee33441"
+              , "ee9a3c83-f437-4879-8918-be5efbb9fac7"
+              , "3680a5ee-6870-426a-a997-eba929a0d25c"
+              , "d9b7c649-c975-4f53-88f5-940b29c47247"
+              , "f7dfbef3-7697-431c-883a-db8583a4e4f9"
+              , "a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb"
+              , "78aed49a-a710-4321-a793-b611a7c5b56b"
+              , "c867c42d-a52d-4a24-8ae3-f75d256b5618"
+              , "e135df7c-7687-4ac0-a5f0-76b74438b53e"
+              , "58565672-7bfe-48ab-b828-db349231de6b"
+              , "2fe4b718-3bd7-46ec-bdce-b184f5653624"
               ]
-    for example_id in xx_list[42:]:
-        main("thunderbird", example_id)
+    for example_id in xx_list[57:]:
+        main("multi_apps", example_id)
diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py
index 9810eff..7278c98 100644
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -466,7 +466,8 @@ class GPT4v_Agent:
                 "messages": messages,
                 "max_tokens": self.max_tokens
             })
-        except:
+        except Exception as e:
+            logger.warning("LLM INVOCATION ERROR: %s", str(e))
             response = ""
 
         logger.debug("RESPONSE: %s", response)

From 9df0854469630ee857048ca468c8104e3fe56d11 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Thu, 1 Feb 2024 22:56:09 +0800
Subject: [PATCH 2/3] ver Feb1stv3

rerun SoM experiment on thunderbird
---
 experiment_screenshot_som.py | 4 ++--
 requirements.txt             | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py
index 0fbe534..304293e 100644
--- a/experiment_screenshot_som.py
+++ b/experiment_screenshot_som.py
@@ -236,5 +236,5 @@ if __name__ == '__main__':
               , "58565672-7bfe-48ab-b828-db349231de6b"
               , "2fe4b718-3bd7-46ec-bdce-b184f5653624"
               ]
-    for example_id in xx_list[57:]:
-        main("multi_apps", example_id)
+    for example_id in xx_list[42:43]:
+        main("thunderbird", example_id)
diff --git a/requirements.txt b/requirements.txt
index ab1dcf1..bce1ae0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -39,4 +39,6 @@ fastdtw
 odfpy
 openai
 func-timeout
-beautifulsoup4
\ No newline at end of file
+beautifulsoup4
+dashscope
+google-generativeai

From 538b9928fefd0d0151a86fe658e688591fa96406 Mon Sep 17 00:00:00 2001
From: rhythmcao <ruishengcao@gmail.com>
Date: Fri, 2 Feb 2024 02:23:25 +0800
Subject: [PATCH 3/3] fix some problems in libreoffice writer

---
 desktop_env/evaluators/metrics/__init__.py    |  1 +
 desktop_env/evaluators/metrics/docs.py        | 54 +++++++++++++++++--
 .../0810415c-bde4-4443-9047-d5f70165a697.json |  2 +-
 .../0a0faba3-5580-44df-965d-f562a99b291c.json | 12 +++--
 .../8472fece-c7dd-4241-8d65-9b3cd1a0b568.json |  2 +-
 .../adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json |  2 +-
 6 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 3f67ebf..ec562a0 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -43,6 +43,7 @@ from .docs import (
     compare_highlighted_text,
     is_first_line_centered,
     check_file_exists,
+    check_tabstops,
     compare_contains_image
 )
 from .general import (
diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py
index 466483b..def8bf0 100644
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -6,11 +6,13 @@ import zipfile
 from typing import List, Dict, Any
 
 from docx import Document
-from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
+from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
 from docx.shared import RGBColor
 from odf.opendocument import load
 from odf.text import P
 from odf.text import Span
+from skimage.color import deltaE_ciede2000
+from skimage.color import rgb2lab
 
 logger = logging.getLogger("desktopenv.metric.docs")
 
@@ -141,7 +143,7 @@ def compare_docx_tables(docx_file1, docx_file2):
         # Compare each cell
         for i in range(len(table1.rows)):
             for j in range(len(table1.columns)):
-                if table1.cell(i, j).text != table2.cell(i, j).text:
+                if table1.cell(i, j).text.strip() != table2.cell(i, j).text.strip():
                     return 0
 
     return 1
@@ -234,6 +236,40 @@ def check_file_exists(directory, filename):
     return 1 if os.path.isfile(file_path) else 0
 
 
+def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
+    doc1: Document = Document(docx_file1)
+    doc2: Document = Document(docx_file2)
+    para1 = [p for p in doc1.paragraphs if p.text.strip()]
+    para2 = [p for p in doc2.paragraphs if p.text.strip()]
+    if len(para1) != len(para2): return .0
+
+    if kwargs.get('word_number_split_by_tabstop', None) is not None:
+        number = kwargs['word_number_split_by_tabstop']
+        index = kwargs.get('index', 0)
+        for p1 in para1:
+            splits = p1.text.split('\t')
+            if len(splits) == 0: return .0
+            words = list(filter(lambda x: x.strip(), re.split(r'\s', splits[index])))
+            if len(words) != number: return .0        
+    
+    section = doc2.sections[0]
+    paragraph_width = section.page_width - section.left_margin - section.right_margin
+    ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
+    minus = .0
+    for p1, p2 in zip(para1, para2):
+        # filter CLEAR tabstop and default left-0 tabstop
+        tabs1 = [tst for tst in p1.paragraph_format.tab_stops if not ignore_tabs(tst)]
+        tabs2 = [tst for tst in p2.paragraph_format.tab_stops if not ignore_tabs(tst)]
+        if len(tabs1) != len(tabs2): return .0
+        difference = .0
+        for t1, t2 in zip(tabs1, tabs2):
+            if t1.alignment != t2.alignment: return .0
+            difference += abs(t1.position - t2.position)
+        minus += difference / paragraph_width
+    score = 1 - (minus / len(para1))
+    return score
+
+
 def compare_contains_image(docx_file1, docx_file2):
     doc1 = Document(docx_file1)
     doc2 = Document(docx_file2)
@@ -258,10 +294,18 @@ def compare_contains_image(docx_file1, docx_file2):
 # print(find_default_font("Ani", config_path))
 
 
-def evaluate_colored_words_in_tables(file_path1, file_path2):
+def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
     if not compare_docx_files(file_path1, file_path2):
         return 0
     document = Document(file_path1)
+    threshold = kwargs.get('threshold', 3.5)
+
+    def _calculate_color_difference(rgb1, rgb2):
+        srgb1 = [rgb1[0] / 255.0, rgb1[1] / 255.0, rgb1[2] / 255.0]
+        srgb2 = [rgb2[0] / 255.0, rgb2[1] / 255.0, rgb2[2] / 255.0]
+        lab1, lab2 = rgb2lab(srgb1), rgb2lab(srgb2)
+        delta_e = deltaE_ciede2000(lab1, lab2)
+        return delta_e
 
     for table in document.tables:
         # Iterate through rows and cells in the table
@@ -273,9 +317,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
                         if word:
                             first_letter = word[0].lower()
 
-                            if first_letter in 'aeiou' and run.font.color.rgb != RGBColor(255, 0, 0):
+                            if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(255, 0, 0)) > threshold:
                                 return 0  # Vowel-colored words should be red
-                            elif first_letter not in 'aeiou' and run.font.color.rgb != RGBColor(0, 0, 255):
+                            elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(0, 0, 255)) > threshold:
                                 return 0  # Non-vowel-colored words should be blue
 
     return 1  # All words in tables are correctly colored
diff --git a/evaluation_examples/examples/libreoffice_writer/0810415c-bde4-4443-9047-d5f70165a697.json b/evaluation_examples/examples/libreoffice_writer/0810415c-bde4-4443-9047-d5f70165a697.json
index 619707d..0471ab3 100644
--- a/evaluation_examples/examples/libreoffice_writer/0810415c-bde4-4443-9047-d5f70165a697.json
+++ b/evaluation_examples/examples/libreoffice_writer/0810415c-bde4-4443-9047-d5f70165a697.json
@@ -55,7 +55,7 @@
     "func": "compare_line_spacing",
     "expected": {
       "type": "cloud_file",
-      "path": "https://drive.usercontent.google.com/download?id=1-svVsH-l2ofufEKuN-cYrIrvXNobtATE&export=download&authuser=0&confirm=t&uuid=be7f891a-f858-48f5-a72d-4e42bbfb8b65&at=APZUnTXzBnaeSJjmxeh4zG03pzA0:1704179807785",
+      "path": "https://drive.usercontent.google.com/download?id=1-svVsH-l2ofufEKuN-cYrIrvXNobtATE&export=download&authuser=0&confirm=t&uuid=95ca5e2e-7fb3-4084-9f7b-a608a8277322&at=APZUnTXFO_571vyDp_r_LskPfq-j:1706796981024",
       "dest": "Novels_Intro_Packet_Gold.docx"
     },
     "result": {
diff --git a/evaluation_examples/examples/libreoffice_writer/0a0faba3-5580-44df-965d-f562a99b291c.json b/evaluation_examples/examples/libreoffice_writer/0a0faba3-5580-44df-965d-f562a99b291c.json
index 5c2e737..b148787 100644
--- a/evaluation_examples/examples/libreoffice_writer/0a0faba3-5580-44df-965d-f562a99b291c.json
+++ b/evaluation_examples/examples/libreoffice_writer/0a0faba3-5580-44df-965d-f562a99b291c.json
@@ -1,7 +1,7 @@
 {
   "id": "0a0faba3-5580-44df-965d-f562a99b291c",
   "snapshot": "libreoffice_writer",
-  "instruction": "I would like to make the first three words of the sentence left-aligned and the rest right-aligned. I basically want to have some empty space in the middle to add some photos. Assume that every sentence will have at least three words. Could you help me on alignment for me?",
+  "instruction": "I would like to make the first three words of the sentence left-aligned and the rest right-aligned. I basically want to have some empty space in the middle to add some photos. Assume that every sentence will have at least three words. Could you help me on alignment for me using tabstops?",
   "source": "https://stackoverflow.com/questions/64528055/how-to-make-part-of-my-sentence-left-aligned-and-rest-as-right-aligned",
   "config": [
     {
@@ -9,7 +9,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive.google.com/uc?id=1Wrjxsf184Go70TcRGM4Tohczh29Q9B_U&export=download",
+            "url": "https://drive.usercontent.google.com/download?id=1Wrjxsf184Go70TcRGM4Tohczh29Q9B_U&export=download&authuser=0&confirm=t&uuid=811f572f-03ee-47b9-8fd5-4978920ff425&at=APZUnTXcRTZAOb33QlpZ7-FT8I8Q:1706799959703",
             "path": "Desktop/04 CHIN9505 EBook Purchasing info 2021 Jan.docx"
           }
         ]
@@ -52,16 +52,20 @@
         }
       }
     ],
-    "func": "compare_init_lines",
+    "func": "check_tabstops",
     "expected": {
       "type": "cloud_file",
-      "path": "https://drive.google.com/uc?id=1yyHGj8KUHDMsZmc1QeJ1KkvSEGy83jMR&export=download",
+      "path": "https://drive.usercontent.google.com/download?id=1yyHGj8KUHDMsZmc1QeJ1KkvSEGy83jMR&export=download&authuser=0&confirm=t&uuid=32f8aa47-c590-4ece-bf65-65a0d683fcfa&at=APZUnTU1_BaeVgyB8GLJWfJrIAYh:1706802911129",
       "dest": "04 CHIN9505 EBook Purchasing info 2021 Jan_Gold.docx"
     },
     "result": {
       "type": "vm_file",
       "path": "Desktop/04 CHIN9505 EBook Purchasing info 2021 Jan.docx",
       "dest": "04 CHIN9505 EBook Purchasing info 2021 Jan.docx"
+    },
+    "options": {
+      "word_number_split_by_tabstop": 3,
+      "index": 0
     }
   }
 }
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_writer/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json b/evaluation_examples/examples/libreoffice_writer/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json
index 4a7c8f4..4c6a790 100644
--- a/evaluation_examples/examples/libreoffice_writer/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json
+++ b/evaluation_examples/examples/libreoffice_writer/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json
@@ -55,7 +55,7 @@
     "func": "evaluate_colored_words_in_tables",
     "expected": {
       "type": "cloud_file",
-      "path": "https://drive.google.com/uc?id=1ksn444K17lFOdm5pELrQYvuZHkOsKq69&export=download",
+      "path": "https://drive.usercontent.google.com/download?id=1XmF-6ttL23xMK-j4P50qVGO4vgb6EgZR&export=download&authuser=0&confirm=t&uuid=fe5c16a5-3131-4a19-a6bf-c5e7faf341dd&at=APZUnTWDtqYGJvChovcgUVHDnvzy:1706807220392",
       "dest": "Dolch_Sight_Words_Primer_Gold.docx"
     },
     "result": {
diff --git a/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json b/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json
index ef0bb84..b264f08 100644
--- a/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json
+++ b/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json
@@ -1,7 +1,7 @@
 {
   "id": "adf5e2c3-64c7-4644-b7b6-d2f0167927e7",
   "snapshot": "libreoffice_writer",
-  "instruction": "Help me adding \"Steinberg, F. M., Bearden, M. M., & Keen, C. L. (2003). Cocoa and chocolate flavonoids: Implications for cardiovascular health. Journal of the American Dietetic Association, 103(2), 215-223. doi: 10.1053/jada.2003.50028\" to my reference list, and add a cross reference in the fourth paragraph where I marked \"<add here>\".",
+  "instruction": "Help me adding \"Steinberg, F. M., Bearden, M. M., & Keen, C. L. (2003). Cocoa and chocolate flavonoids: Implications for cardiovascular health. Journal of the American Dietetic Association, 103(2), 215-223. doi: 10.1053/jada.2003.50028\" to my reference list, and add a cross reference (using reference number) in the fourth paragraph where I marked \"<add here>\".",
   "source": "https://seekstar.github.io/2022/04/11/libreoffice%E5%BC%95%E7%94%A8%E6%96%87%E7%8C%AE/",
   "config": [
     {