From ba8ae104cf3315861fa53c8146c2981c80679028 Mon Sep 17 00:00:00 2001
From: tsuky_chen <3107760494@qq.com>
Date: Wed, 17 Jan 2024 18:00:20 +0800
Subject: [PATCH 01/13] update impress eval examples

---
 desktop_env/evaluators/metrics/__init__.py    |  1 +
 desktop_env/evaluators/metrics/impress.py     | 19 ++++++++
 .../3b27600c-3668-4abd-8f84-7bcdebbccbdb.json | 40 ++++++++++++++--
 .../ce88f674-ab7a-43da-9201-468d38539e4a.json | 35 ++++++++++++--
 .../f0a334af-f91b-4c03-b578-aac9bec2b543.json | 46 +++++++++++++++++--
 5 files changed, 126 insertions(+), 15 deletions(-)

diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 59ebab2..fc46434 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -15,3 +15,4 @@ from .general import check_csv, check_accessibility_tree, check_list, run_sqlite
 from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
 from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed
 from .impress import check_slide_numbers_color, compare_pptx_files, check_for_two_lines, check_for_audio, check_formula_shape, check_file_exists
+from .impress import check_slide_orientation_Portrait, contains_mp4_video
\ No newline at end of file
diff --git a/desktop_env/evaluators/metrics/impress.py b/desktop_env/evaluators/metrics/impress.py
index 719ae56..749bc0e 100644
--- a/desktop_env/evaluators/metrics/impress.py
+++ b/desktop_env/evaluators/metrics/impress.py
@@ -94,6 +94,25 @@ def check_formula_shape(prs):
             return 1
     return 0
 
+def check_slide_orientation_Portrait(pptx_path):
+    presentation = Presentation(pptx_path)
+    
+    slide_height = presentation.slide_height
+    slide_width = presentation.slide_width
+    
+    if slide_width < slide_height:
+        return 1
+    return 0
+
+def contains_mp4_video(pptx_path):
+    prs = Presentation(pptx_path)
+    for slide in prs.slides:
+        for shape in slide.shapes:
+            if shape.shape_type == 16:
+                if shape.media_type == 3:
+                    return 1
+    return 0
+
 if __name__ == "__main__":
     path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx"
     presentation = Presentation(path1)
diff --git a/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json b/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json
index 784913c..615cabc 100644
--- a/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json
+++ b/evaluation_examples/examples/libreoffice_impress/3b27600c-3668-4abd-8f84-7bcdebbccbdb.json
@@ -1,12 +1,42 @@
 {
   "id": "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
   "snapshot": "libreoffice_impress",
-  "instruction": "Could you help me change the background color to blue 2 and apply it to all my slides.",
+  "instruction": "Please make the background blue on all my slides.",
   "source": "https://www.libreofficehelp.com/change-slide-background-impress/#All_Slides",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1aHMJzk2G8B_EqDlTAZLEiJ4h-ZsgA9UE&export=download&authuser=0&confirm=t&uuid=196a082d-5f08-4b3e-a64f-c021351f9cd8&at=APZUnTUXH4gvLvElvm9TtFhUJlIn:1705481007789",
+            "path": "Desktop/lec17-gui-events.pptx"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "Desktop/lec17-gui-events.pptx"
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
-    ""
+    "libreoffice_impress"
   ],
-  "evaluator": "evaluation_dir"
-}
+  "evaluator": {
+    "func": "compare_pptx_files",
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.usercontent.google.com/download?id=1LU-wnmIqMQgwkdAUFBLE1wNkH4gSl3IR&export=download&authuser=0&confirm=t&uuid=74520405-4028-4fbe-bab8-d56dc82ffb6c&at=APZUnTU0dz5ZE5CcQry8IeY5_s1J:1705481009686",
+      "dest": "lec17-gui-events_Gold.docx"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "Desktop/lec17-gui-events.pptx",
+      "dest": "lec17-gui-events.pptx"
+    }
+  }
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_impress/ce88f674-ab7a-43da-9201-468d38539e4a.json b/evaluation_examples/examples/libreoffice_impress/ce88f674-ab7a-43da-9201-468d38539e4a.json
index 1149fbb..7f3b272 100644
--- a/evaluation_examples/examples/libreoffice_impress/ce88f674-ab7a-43da-9201-468d38539e4a.json
+++ b/evaluation_examples/examples/libreoffice_impress/ce88f674-ab7a-43da-9201-468d38539e4a.json
@@ -1,12 +1,37 @@
 {
   "id": "ce88f674-ab7a-43da-9201-468d38539e4a",
   "snapshot": "libreoffice_impress",
-  "instruction": "Could you help me change my slides to portrait (from landscape)?",
+  "instruction": "Please set my slides upright instead of sideways.",
   "source": "https://justclickhere.co.uk/resources/change-slides-in-impress-to-portrait/",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1LErTnC_w_YPQVo84QK5sifww9xZ-Cq0X&export=download&authuser=0&confirm=t&uuid=81ff0aaf-9c2e-4342-b7ce-36e65dd2218e&at=APZUnTUmQKCTp2HUP0dOqYqD10G3:1705479016156",
+            "path": "Desktop/AM_Last_Page_Template.pptx"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "Desktop/AM_Last_Page_Template.pptx"
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
-    ""
+    "libreoffice_impress"
   ],
-  "evaluator": "evaluation_dir"
-}
+  "evaluator": {
+    "func": "check_slide_orientation_Portrait",
+    "result": {
+      "type": "vm_file",
+      "path": "Desktop/AM_Last_Page_Template.pptx",
+      "dest": "AM_Last_Page_Template.pptx"
+    }
+  }
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_impress/f0a334af-f91b-4c03-b578-aac9bec2b543.json b/evaluation_examples/examples/libreoffice_impress/f0a334af-f91b-4c03-b578-aac9bec2b543.json
index 86342e6..3411aa8 100644
--- a/evaluation_examples/examples/libreoffice_impress/f0a334af-f91b-4c03-b578-aac9bec2b543.json
+++ b/evaluation_examples/examples/libreoffice_impress/f0a334af-f91b-4c03-b578-aac9bec2b543.json
@@ -1,12 +1,48 @@
 {
   "id": "f0a334af-f91b-4c03-b578-aac9bec2b543",
   "snapshot": "libreoffice_impress",
-  "instruction": "Help me insert the video at VIDEO_PATH in the current slide.",
+  "instruction": "Insert the video Movie_countdown_2.mov on the Desktop into my current slide, please.",
   "source": "https://www.libreofficehelp.com/insert-video-impress-presentation/#Inserting_a_Video_in_Impress",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1vvRkrxOK_sFPX9PLFniFqrdNEZ2pQnPP&export=download&authuser=0&confirm=t&uuid=71964a12-2d0a-4c71-9375-2f9ec15de1ad&at=APZUnTX_B-T2GeZPS7ZmchMQ6E7m:1705481285721",
+            "path": "Desktop/Movie_activities_TURKEY.pptx"
+          }
+        ]
+      }
+    },
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1S4lACtBb40Ff0DEjB2bG2tzr2qWwQLGd&export=download&authuser=0&confirm=t&uuid=a28c123e-5371-4e17-82c2-ed7b1f05b728&at=APZUnTW_rlUPV6mM4RjS0R6dMSv4:1705469776913",
+            "path": "Desktop/Movie_countdown_2.mov"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "Desktop/Movie_activities_TURKEY.pptx"
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
-    ""
+    "libreoffice_impress"
   ],
-  "evaluator": "evaluation_dir"
-}
+  "evaluator": {
+    "func": "contains_mp4_video",
+    "result": {
+      "type": "vm_file",
+      "path": "Desktop/Movie_activities_TURKEY.pptx",
+      "dest": "Movie_activities_TURKEY.pptx"
+    }
+  }
+}
\ No newline at end of file

From b60eb2a93342660341ba8849ff703c662464bc93 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Thu, 18 Jan 2024 01:43:57 +0800
Subject: [PATCH 02/13] VM resolution adjust support

---
 desktop_env/envs/desktop_env.py | 63 ++++++++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index 786ed72..94e3b0d 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -5,7 +5,7 @@ import os
 import subprocess
 import tempfile
 import time
-from typing import Callable, Any, Optional
+from typing import Callable, Any, Optional, Tuple
 # import uuid
 # import platform
 from typing import List, Dict
@@ -48,7 +48,8 @@ class DesktopEnv(gym.Env):
             action_space: str = "computer_13",
             task_config: Dict[str, Any] = None,
             tmp_dir: str = "tmp",
-            cache_dir: str = "cache"
+            cache_dir: str = "cache",
+            screen_size: Tuple[int] = (1920, 1080)
     ):
         """
         Args:
@@ -73,6 +74,7 @@ class DesktopEnv(gym.Env):
         self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm)))
         self.tmp_dir_base: str = tmp_dir
         self.cache_dir_base: str = cache_dir
+        self.vm_screen_size = screen_size
 
         # task-aware stuffs
         # todo: handling the logic of snapshot directory
@@ -80,6 +82,7 @@ class DesktopEnv(gym.Env):
 
         # Initialize emulator and controller
         logger.info("Initializing...")
+        self._config_screen_size()
         self._start_emulator()
         self.vm_ip = self._get_vm_ip()
         self.controller = PythonController(vm_ip=self.vm_ip)
@@ -87,7 +90,6 @@ class DesktopEnv(gym.Env):
 
         # Meta info of the VM, move to the reset() function
         self.vm_platform: str = ""  # self.controller.get_vm_platform()
-        self.vm_screen_size = None  # self.controller.get_vm_screen_size()
 
         # mode: human or machine
         assert action_space in ["computer_13", "pyautogui"]
@@ -101,6 +103,56 @@ class DesktopEnv(gym.Env):
         self._step_no: int = 0
         self.action_history: List[Dict[str, any]] = []
 
+    def _config_screen_size(self):
+        def calculate_vram_size(width, height, bits_per_pixel=32):
+            """
+            Calculate VRAM size for given width, height, and color depth.
+            Color depth defaults to 32 bits per pixel.
+            """
+            bytes_per_pixel = bits_per_pixel // 8
+            vram_size = width * height * bytes_per_pixel
+            return vram_size
+        if not os.path.isfile(self.path_to_vm):
+            logger.warning(f"The specified vmx file does not exist: {self.path_to_vm}")
+            return False
+
+        width, height = self.vm_screen_size
+        vramSize = calculate_vram_size(width, height)
+
+        try:
+            with open(self.path_to_vm, 'r') as file:
+                lines = file.readlines()
+
+            new_lines = []
+            for line in lines:
+                if "svga.autodetect" in line:
+                    continue
+                elif "svga.vramSize" in line:
+                    continue
+                elif "displayWidth" in line:
+                    continue
+                elif "displayHeight" in line:
+                    continue
+                else:
+                    new_lines.append(line)
+
+            # Append new settings for screen size and VRAM.
+            new_lines.append(f'svga.autodetect = "TRUE"\n')
+            new_lines.append(f'svga.vramSize = "{vramSize}"\n')
+            new_lines.append(f'displayWidth = "{width}"\n')
+            new_lines.append(f'displayHeight = "{height}"\n')
+
+            with open(self.path_to_vm, 'w') as file:
+                file.writelines(new_lines)
+            logger.info(f"Screen size for {self.path_to_vm} set to {width}x{height} with VRAM size {vramSize} bytes")
+            return True
+        except IOError as e:
+            logger.error(f"An IOError occurred: {e}")
+            return False
+        except Exception as e:
+            logger.error(f"An error occurred: {e}")
+            return False
+
     def _start_emulator(self):
         while True:
             try:
@@ -119,7 +171,7 @@ class DesktopEnv(gym.Env):
                 logger.error(f"Error executing command: {e.output.decode().strip()}")
 
     def _get_vm_ip(self):
-        max_retries = 10
+        max_retries = 20
         logger.info("Getting IP Address...")
         for _ in range(max_retries):
             try:
@@ -190,6 +242,8 @@ class DesktopEnv(gym.Env):
         _execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
         time.sleep(5)
 
+        self._config_screen_size()
+        print(self.vm_screen_size)
         logger.info("Starting emulator...")
         self._start_emulator()
         logger.info("Emulator started.")
@@ -197,6 +251,7 @@ class DesktopEnv(gym.Env):
         logger.info("Get meta info of the VM...")
         self.vm_platform = self.controller.get_vm_platform()
         self.vm_screen_size = self.controller.get_vm_screen_size()
+        print(self.vm_screen_size)
 
         logger.info("Setting up environment...")
         self.setup_controller.setup(self.config)

From 91824f754cd3eaa69a0c93bad9cae28e93a4ff1c Mon Sep 17 00:00:00 2001
From: rhythmcao <ruishengcao@gmail.com>
Date: Thu, 18 Jan 2024 14:12:54 +0800
Subject: [PATCH 03/13] 1. extend evaluator to list (compatible with single
 evaluator) 2. fix a variable name error in metrics/general.py

---
 desktop_env/envs/desktop_env.py           | 68 ++++++++++++++++++-----
 desktop_env/evaluators/metrics/general.py |  4 +-
 2 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index 94e3b0d..df49f1e 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -8,7 +8,7 @@ import time
 from typing import Callable, Any, Optional, Tuple
 # import uuid
 # import platform
-from typing import List, Dict
+from typing import List, Dict, Union
 
 import gymnasium as gym
 
@@ -211,12 +211,28 @@ class DesktopEnv(gym.Env):
         self.instruction = task_config["instruction"]
         self.config = task_config["config"]
 
+        # evaluator dict
+        # func -> metric function string, or list of metric function strings
+        # conj -> conjuction of multiple metrics if func is a list with length > 1, "and"/"or"
+        # result -> result getter config, or list of result getter configs
+        # expected (optional) -> expected getter config, or list of expected getter configs
+        # options (optional) -> metric options, or list of metric options
+        # if func is a str list, then result, expected (if exists), options (if exists) should also be lists of the same length
+        # even if one of the metrics does not need expected or options field, it should be included in the list with None
         self.evaluator = task_config["evaluator"]
-        self.metric: Metric = getattr(metrics, self.evaluator["func"])
-        self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
-        self.expected_getter: Getter = getattr(getters, "get_{:}".format(
-            self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
-        self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
+        self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] if type(self.evaluator["func"]) == list \
+            else getattr(metrics, self.evaluator["func"])
+        self.metric_conj: str = self.evaluator.get("conj", "and") # take conjuction of multiple metrics
+        self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in self.evaluator["result"]] \
+            if type(self.evaluator["result"]) == list else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
+        if "expected" in self.evaluator:
+            self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in self.evaluator["expected"]] \
+                if type(self.evaluator["expected"]) == list else getattr(getters, "get_{:}".format())
+        else: self.expected_getter = [None for _ in len(self.metric)] if type(self.metric) == list else None
+        self.metric_options: Union[List[Dict[str, Any]], Dict[str, Any]] = [opt if opt else {} for opt in self.evaluator["options"]] \
+            if type(self.evaluator.get("options", {})) == list else self.evaluator["options"] if "options" in self.evaluator else \
+                [{} for _ in len(self.metric)] if type(self.metric) == list else {}
+        assert type(self.evaluator["func"]) != list or (len(self.metric) == len(self.result_getter) == len(self.expected_getter) == len(self.metric_options))
 
     def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]:
         logger.info("Resetting environment...")
@@ -311,17 +327,39 @@ class DesktopEnv(gym.Env):
 
         self.setup_controller.setup(self.evaluator.get("postconfig", []))
 
-        try:
-            result_state = self.result_getter(self, self.evaluator["result"])
-        except FileNotFoundError:
-            logger.error("File not found!")
-            return 0
+        if type(self.metric) == list:
+            for idx, metric in enumerate(self.metric):
+                try:
+                    config = self.evaluator["result"][idx]
+                    result_state = self.result_getter[idx](self, config)
+                except FileNotFoundError:
+                    logger.error("File not found!")
+                    if self.metric_conj == 'and':
+                        return 0
 
-        expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
-            else None
+                expected = self.evaluator["expected"][idx]
+                expected_state = self.expected_getter[idx](self, expected) if expected else None
 
-        metric: float = self.metric(result_state, expected_state, **self.metric_options) if expected_state is not None \
-            else self.metric(result_state, **self.metric_options)
+                metric: int = metric(result_state, expected_state, **self.metric_options[idx]) if expected_state is not None \
+                    else metric(result_state, **self.metric_options[idx])
+                
+                if self.metric_conj == 'and' and not bool(metric):
+                    return 0
+                elif self.metric_conj == 'or' and bool(metric):
+                    return 1
+            return 1 if self.metric_conj == 'and' else 0
+        else:
+            try:
+                result_state = self.result_getter(self, self.evaluator["result"])
+            except FileNotFoundError:
+                logger.error("File not found!")
+                return 0
+
+            expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
+                else None
+
+            metric: float = self.metric(result_state, expected_state, **self.metric_options) if expected_state is not None \
+                else self.metric(result_state, **self.metric_options)
 
         return metric
 
diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py
index 6246861..98c9596 100644
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -180,8 +180,8 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
     with open(result) as f:
         result: Dict[str, Any] = json.load(f)
 
-    expect_rules = rule.get("expect", {})
-    unexpect_rules = rule.get("unexpect", {})
+    expect_rules = rules.get("expect", {})
+    unexpect_rules = rules.get("unexpect", {})
 
     metric = True
     for r in expect_rules:

From bdd21d06ca50cb8605c9ca26c3f513f144ab0b07 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Fri, 19 Jan 2024 20:34:11 +0800
Subject: [PATCH 04/13] Fix minor bugs

---
 desktop_env/envs/desktop_env.py | 51 ++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index df49f1e..2f4287e 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -112,6 +112,7 @@ class DesktopEnv(gym.Env):
             bytes_per_pixel = bits_per_pixel // 8
             vram_size = width * height * bytes_per_pixel
             return vram_size
+
         if not os.path.isfile(self.path_to_vm):
             logger.warning(f"The specified vmx file does not exist: {self.path_to_vm}")
             return False
@@ -213,26 +214,42 @@ class DesktopEnv(gym.Env):
 
         # evaluator dict
         # func -> metric function string, or list of metric function strings
-        # conj -> conjuction of multiple metrics if func is a list with length > 1, "and"/"or"
+        # conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or"
         # result -> result getter config, or list of result getter configs
         # expected (optional) -> expected getter config, or list of expected getter configs
         # options (optional) -> metric options, or list of metric options
         # if func is a str list, then result, expected (if exists), options (if exists) should also be lists of the same length
         # even if one of the metrics does not need expected or options field, it should be included in the list with None
         self.evaluator = task_config["evaluator"]
-        self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] if type(self.evaluator["func"]) == list \
-            else getattr(metrics, self.evaluator["func"])
-        self.metric_conj: str = self.evaluator.get("conj", "and") # take conjuction of multiple metrics
-        self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in self.evaluator["result"]] \
-            if type(self.evaluator["result"]) == list else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
+        self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] \
+        if isinstance(self.evaluator["func"], list) \
+        else getattr(metrics, self.evaluator["func"])
+        self.metric_conj: str = self.evaluator.get("conj", "and")  # take conjunction of multiple metrics
+        self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in
+                                      self.evaluator["result"]] \
+            if isinstance(self.evaluator["result"], list) \
+            else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
         if "expected" in self.evaluator:
-            self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in self.evaluator["expected"]] \
-                if type(self.evaluator["expected"]) == list else getattr(getters, "get_{:}".format())
-        else: self.expected_getter = [None for _ in len(self.metric)] if type(self.metric) == list else None
-        self.metric_options: Union[List[Dict[str, Any]], Dict[str, Any]] = [opt if opt else {} for opt in self.evaluator["options"]] \
-            if type(self.evaluator.get("options", {})) == list else self.evaluator["options"] if "options" in self.evaluator else \
-                [{} for _ in len(self.metric)] if type(self.metric) == list else {}
-        assert type(self.evaluator["func"]) != list or (len(self.metric) == len(self.result_getter) == len(self.expected_getter) == len(self.metric_options))
+            self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in
+                                            self.evaluator["expected"]] \
+                if isinstance(self.evaluator["expected"], list) \
+                else getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
+        else:
+            self.expected_getter = [None] * len(self.metric) \
+                if isinstance(self.metric, list) \
+                else None
+        self.metric_options: Union[List[Dict[str, Any]], Dict[str, Any]] = [opt if opt else {} for opt in
+                                                                            self.evaluator["options"]] \
+            if isinstance(self.evaluator.get("options", {}), list) \
+            else self.evaluator["options"] \
+            if "options" in self.evaluator \
+            else [{}] * len(self.metric) \
+            if isinstance(self.metric, list) \
+            else {}
+
+        assert (not isinstance(self.evaluator["func"], list)
+                or (len(self.metric) == len(self.result_getter) == len(self.expected_getter) == len(
+                    self.metric_options)))
 
     def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]:
         logger.info("Resetting environment...")
@@ -340,9 +357,10 @@ class DesktopEnv(gym.Env):
                 expected = self.evaluator["expected"][idx]
                 expected_state = self.expected_getter[idx](self, expected) if expected else None
 
-                metric: int = metric(result_state, expected_state, **self.metric_options[idx]) if expected_state is not None \
+                metric: int = metric(result_state, expected_state,
+                                     **self.metric_options[idx]) if expected_state is not None \
                     else metric(result_state, **self.metric_options[idx])
-                
+
                 if self.metric_conj == 'and' and not bool(metric):
                     return 0
                 elif self.metric_conj == 'or' and bool(metric):
@@ -358,7 +376,8 @@ class DesktopEnv(gym.Env):
             expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
                 else None
 
-            metric: float = self.metric(result_state, expected_state, **self.metric_options) if expected_state is not None \
+            metric: float = self.metric(result_state, expected_state,
+                                        **self.metric_options) if expected_state is not None \
                 else self.metric(result_state, **self.metric_options)
 
         return metric

From 46bd3386dd23f25626190923891385f3ce566568 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Fri, 19 Jan 2024 20:34:47 +0800
Subject: [PATCH 05/13] Support input screenshot and a11y tree altogether

---
 mm_agents/gpt_4v_agent.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py
index d594b76..0dc3cb1 100644
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -6,6 +6,7 @@ from typing import Dict, List
 
 import requests
 
+from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes
 from mm_agents.gpt_4v_prompt_action import SYS_PROMPT as SYS_PROMPT_ACTION
 from mm_agents.gpt_4v_prompt_code import SYS_PROMPT as SYS_PROMPT_CODE
 
@@ -64,11 +65,12 @@ def parse_code_from_string(input_string):
 
 
 class GPT4v_Agent:
-    def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300, action_space="computer_13"):
+    def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300, action_space="computer_13", add_a11y_tree=False):
         self.instruction = instruction
         self.model = model
         self.max_tokens = max_tokens
         self.action_space = action_space
+        self.add_a11y_tree = add_a11y_tree
 
         self.headers = {
             "Content-Type": "application/json",
@@ -95,17 +97,34 @@ class GPT4v_Agent:
         Predict the next action(s) based on the current observation.
         """
         base64_image = encode_image(obs["screenshot"])
+        accessibility_tree = obs["accessibility_tree"]
+
+        leaf_nodes = find_leaf_nodes(accessibility_tree)
+        filtered_nodes = filter_nodes(leaf_nodes)
+
+        linearized_accessibility_tree = "tag\ttext\tposition\tsize\n"
+        # Linearize the accessibility tree nodes into a table format
+
+        for node in filtered_nodes:
+            linearized_accessibility_tree += node.tag + "\t"
+            linearized_accessibility_tree += node.attrib.get('name') + "\t"
+            linearized_accessibility_tree += node.attrib.get(
+                '{uri:deskat:component.at-spi.gnome.org}screencoord') + "\t"
+            linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') + "\n"
+
         self.trajectory.append({
             "role": "user",
             "content": [
                 {
                     "type": "text",
-                    "text": "What's the next step that you will do to help with the task?"
+                    "text": "What's the next step that you will do to help with the task?" if not self.add_a11y_tree
+                    else "And given the XML format of accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(linearized_accessibility_tree)
                 },
                 {
                     "type": "image_url",
                     "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_image}"
+                        "url": f"data:image/jpeg;base64,{base64_image}",
+                        "detail": "high"
                     }
                 }
             ]

From 09f3e776aede4a912a972c3f57c7929e33a961df Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 20 Jan 2024 00:13:46 +0800
Subject: [PATCH 06/13] Initialize all baselines: screenshot, a11y tree, both,
 SoM, SeeAct

---
 experiment_pure_text.py                       |    2 +
 experiment.py => experiment_screenshot.py     |    0
 mm_agents/SoM_agent.py                        |  283 ----
 .../heuristic_retrieve.py                     |   27 +-
 mm_agents/gpt_4_agent.py                      |  195 ---
 mm_agents/gpt_4_prompt_action.py              |  244 ---
 mm_agents/gpt_4_prompt_code.py                |   18 -
 mm_agents/gpt_4v_agent.py                     |  373 ++++-
 mm_agents/gpt_4v_prompt_action.py             |  244 ---
 mm_agents/gpt_4v_prompt_code.py               |   18 -
 mm_agents/prompts.py                          |  862 ++++++++++
 mm_agents/sam_test.py                         |  124 --
 mm_agents/visualizer.py                       | 1405 +++++++++++++++++
 requirements.txt                              |    1 +
 14 files changed, 2588 insertions(+), 1208 deletions(-)
 rename experiment.py => experiment_screenshot.py (100%)
 delete mode 100644 mm_agents/SoM_agent.py
 delete mode 100644 mm_agents/gpt_4_agent.py
 delete mode 100644 mm_agents/gpt_4_prompt_action.py
 delete mode 100644 mm_agents/gpt_4_prompt_code.py
 delete mode 100644 mm_agents/gpt_4v_prompt_action.py
 delete mode 100644 mm_agents/gpt_4v_prompt_code.py
 create mode 100644 mm_agents/prompts.py
 delete mode 100644 mm_agents/sam_test.py
 create mode 100644 mm_agents/visualizer.py

diff --git a/experiment_pure_text.py b/experiment_pure_text.py
index cfcbd46..4fd19b1 100644
--- a/experiment_pure_text.py
+++ b/experiment_pure_text.py
@@ -62,6 +62,8 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
         env.controller.start_recording()
 
     while not done and step_num < max_steps:
+        with open("accessibility_tree.xml", "w", encoding="utf-8") as f:
+            f.write(observation["accessibility_tree"])
         actions = agent.predict(observation)
         step_num += 1
         for action in actions:
diff --git a/experiment.py b/experiment_screenshot.py
similarity index 100%
rename from experiment.py
rename to experiment_screenshot.py
diff --git a/mm_agents/SoM_agent.py b/mm_agents/SoM_agent.py
deleted file mode 100644
index e3b3e59..0000000
--- a/mm_agents/SoM_agent.py
+++ /dev/null
@@ -1,283 +0,0 @@
-# fixme: Need to be rewrite on new action space
-
-import os
-import re
-import base64
-import PIL.Image
-import json
-import requests
-
-import torch
-import argparse
-
-# seem
-from seem.modeling.BaseModel import BaseModel as BaseModel_Seem
-from seem.utils.distributed import init_distributed as init_distributed_seem
-from seem.modeling import build_model as build_model_seem
-from task_adapter.seem.tasks import inference_seem_pano
-
-# semantic sam
-from semantic_sam.BaseModel import BaseModel
-from semantic_sam import build_model
-from semantic_sam.utils.dist import init_distributed_mode
-from semantic_sam.utils.arguments import load_opt_from_config_file
-from semantic_sam.utils.constants import COCO_PANOPTIC_CLASSES
-from task_adapter.semantic_sam.tasks import inference_semsam_m2m_auto, prompt_switch
-
-# sam
-from segment_anything import sam_model_registry
-from task_adapter.sam.tasks.inference_sam_m2m_auto import inference_sam_m2m_auto
-
-from scipy.ndimage import label
-from io import BytesIO
-import numpy as np
-
-SYS_PROMPT = '''
-You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
-For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
-
-Firstly you need to predict the class of your action, select from one below:
-- **CLICK**: click on the screen with the specified integer label
-- **TYPE**: type a string on the keyboard
-
-- For CLICK, you need to predict the correct integer label shown on the screenshot
-for example, format as:
-```
-{
-  "action_type": "CLICK",
-  "label": 7
-}
-```
-- For TYPE, you need to specify the text you want to type
-for example, format as:
-```
-{
-  "action_type": "TYPE",
-  "text": "hello world"
-}
-```
-
-For every step, you should only return the action_type and the parameters of your action as a dict, without any other things. You MUST wrap the dict with backticks (\`).
-You can predict multiple actions at one step, but you should only return one action for each step.
-You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
-'''
-
-# build args
-semsam_cfg = "configs/semantic_sam_only_sa-1b_swinL.yaml"
-seem_cfg = "configs/seem_focall_unicl_lang_v1.yaml"
-
-semsam_ckpt = "./swinl_only_sam_many2many.pth"
-sam_ckpt = "./sam_vit_h_4b8939.pth"
-seem_ckpt = "./seem_focall_v1.pt"
-
-opt_semsam = load_opt_from_config_file(semsam_cfg)
-opt_seem = load_opt_from_config_file(seem_cfg)
-opt_seem = init_distributed_seem(opt_seem)
-
-# build model
-model_semsam = BaseModel(opt_semsam, build_model(opt_semsam)).from_pretrained(semsam_ckpt).eval().cuda()
-model_sam = sam_model_registry["vit_h"](checkpoint=sam_ckpt).eval().cuda()
-model_seem = BaseModel_Seem(opt_seem, build_model_seem(opt_seem)).from_pretrained(seem_ckpt).eval().cuda()
-
-with torch.no_grad():
-    with torch.autocast(device_type='cuda', dtype=torch.float16):
-        model_seem.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(COCO_PANOPTIC_CLASSES + ["background"], is_eval=True)
-
-@torch.no_grad()
-def inference(image, slider, mode, alpha, label_mode, anno_mode, *args, **kwargs):
-    if slider < 1.5:
-        model_name = 'seem'
-    elif slider > 2.5:
-        model_name = 'sam'
-    else:
-        model_name = 'semantic-sam'
-        if slider < 1.5 + 0.14:
-            level = [1]
-        elif slider < 1.5 + 0.28:
-            level = [2]
-        elif slider < 1.5 + 0.42:
-            level = [3]
-        elif slider < 1.5 + 0.56:
-            level = [4]
-        elif slider < 1.5 + 0.70:
-            level = [5]
-        elif slider < 1.5 + 0.84:
-            level = [6]
-        else:
-            level = [6, 1, 2, 3, 4, 5]
-
-    if label_mode == 'Alphabet':
-        label_mode = 'a'
-    else:
-        label_mode = '1'
-
-    text_size, hole_scale, island_scale = 1280, 100, 100
-    text, text_part, text_thresh = '', '', '0.0'
-
-    with torch.autocast(device_type='cuda', dtype=torch.float16):
-        semantic = False
-
-        if model_name == 'semantic-sam':
-            model = model_semsam
-            output, mask = inference_semsam_m2m_auto(model, image, level, text, text_part, text_thresh, text_size, hole_scale, island_scale, semantic, label_mode=label_mode, alpha=alpha, anno_mode=anno_mode, *args, **kwargs)
-
-        elif model_name == 'sam':
-            model = model_sam
-            output, mask = inference_sam_m2m_auto(model, image, text_size, label_mode, alpha, anno_mode)
-
-        elif model_name == 'seem':
-            model = model_seem
-            output, mask = inference_seem_pano(model, image, text_size, label_mode, alpha, anno_mode)
-
-        return output, mask
-
-# Function to encode the image
-def encode_image(image):
-    pil_img = PIL.Image.fromarray(image)
-    buff = BytesIO()
-    pil_img.save(buff, format="JPEG")
-    new_image_string = base64.b64encode(buff.getvalue()).decode("utf-8")
-    return new_image_string
-
-def parse_actions_from_string(input_string):
-    # Search for a JSON string within the input string
-    actions = []
-    matches = re.findall(r'```json\s+(.*?)\s+```', input_string, re.DOTALL)
-    if matches:
-        # Assuming there's only one match, parse the JSON string into a dictionary
-        try:
-            for match in matches:
-                action_dict = json.loads(match)
-                actions.append(action_dict)
-            return actions
-        except json.JSONDecodeError as e:
-            return f"Failed to parse JSON: {e}"
-    else:
-        matches = re.findall(r'```\s+(.*?)\s+```', input_string, re.DOTALL)
-        if matches:
-            # Assuming there's only one match, parse the JSON string into a dictionary
-            try:
-                for match in matches:
-                    action_dict = json.loads(match)
-                    actions.append(action_dict)
-                return actions
-            except json.JSONDecodeError as e:
-                return f"Failed to parse JSON: {e}"
-        else:
-            try:
-                action_dict = json.loads(input_string)
-                return [action_dict]
-            except json.JSONDecodeError as e:
-                raise ValueError("Invalid response format: " + input_string)
-
-class GPT4v_Agent:
-    def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300):
-        self.instruction = instruction
-        self.model = model
-        self.max_tokens = max_tokens
-
-        self.headers = {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {api_key}"
-        }
-
-        self.trajectory = [
-            {
-                "role": "system",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": SYS_PROMPT
-                    },
-                ]
-            }
-        ]
-
-    def predict(self, obs):
-        obs, mask = inference(obs, slider=3.0, mode="Automatic", alpha=0.1, label_mode="Number", anno_mode=["Mark", "Box"])
-        PIL.Image.fromarray(obs).save("desktop.jpeg")
-        base64_image = encode_image(obs)
-        self.trajectory.append({
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "What's the next step for instruction '{}'?".format(self.instruction)
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_image}"
-                    }
-                }
-            ]
-        })
-        traj_to_show = []
-        for i in range(len(self.trajectory)):
-            traj_to_show.append(self.trajectory[i]["content"][0]["text"])
-            if len(self.trajectory[i]["content"]) > 1:
-                traj_to_show.append("screenshot_obs")
-        print("Trajectory:", traj_to_show)
-        payload = {
-            "model": self.model,
-            "messages": self.trajectory,
-            "max_tokens": self.max_tokens
-        }
-        response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
-
-        try:
-            actions = self.parse_actions(response.json()['choices'][0]['message']['content'], mask)
-        except:
-            print("Failed to parse action from response:", response.json()['choices'][0]['message']['content'])
-            actions = None
-
-        return actions
-
-    def parse_actions(self, response: str, mask):
-        # response example
-        """
-        ```json
-        {
-          "action_type": "CLICK",
-          "click_type": "RIGHT"
-        }
-        ```
-        """
-
-        # parse from the response
-        actions = parse_actions_from_string(response)
-        print(actions)
-
-        # add action into the trajectory
-        self.trajectory.append({
-            "role": "assistant",
-            "content": [
-                {
-                    "type": "text",
-                    "text": response
-                },
-            ]
-        })
-
-        # parse action
-        parsed_actions = []
-        for action in actions:
-            action_type = action['action_type']
-            if action_type == "CLICK":
-                label = int(action['label'])
-                x, y, w, h = mask[label-1]['bbox']
-                parsed_actions.append({"action_type": action_type, "x": int(x + w//2) , "y": int(y + h//2)})
-
-            if action_type == "TYPE":
-                parsed_actions.append({"action_type": action_type, "text": action["text"]})
-
-        return parsed_actions
-
-
-if __name__ == '__main__':
-    # OpenAI API Key
-    api_key = os.environ.get("OPENAI_API_KEY")
-
-    agent = GPT4v_Agent(api_key=api_key, instruction="Open Firefox")
-    obs = PIL.Image.open('desktop.png')
-    print(agent.predict(obs=obs))
\ No newline at end of file
diff --git a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
index d6f83eb..c59060c 100644
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -41,10 +41,12 @@ def filter_nodes(nodes):
         elif node.tag == 'text':
             continue
         else:
-            coords = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
+            coords = tuple(
+                map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
             if coords[0] < 0 or coords[1] < 0:
                 continue
-            size = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
+            size = tuple(
+                map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
             if size[0] <= 0 or size[1] <= 0:
                 continue
             # Node is not a 'panel', add to the list.
@@ -57,6 +59,9 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
     # Load the screenshot image
     image = Image.open(image_file_path)
     draw = ImageDraw.Draw(image)
+    marks = []
+
+    # todo: change the image tagger to align with SoM paper
 
     # Optional: Load a font. If you don't specify a font, a default one will be used.
     try:
@@ -95,8 +100,26 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
                 text_position = (coords[0], bottom_right[1])  # Adjust Y to be above the bottom right
                 draw.text(text_position, str(index), font=font, fill="purple")
 
+                # each mark is an x, y, w, h tuple
+                marks.append([coords[0], coords[1], size[0], size[1]])
+
             except ValueError as e:
                 pass
 
     # Save the result
     image.save(output_image_file_path)
+    return marks
+
+
+def print_nodes_with_indent(nodes, indent=0):
+    for node in nodes:
+        print(' ' * indent, node.tag, node.attrib)
+        print_nodes_with_indent(node, indent + 2)
+
+
+if __name__ == '__main__':
+    with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
+        xml_file_str = f.read()
+
+    nodes = ET.fromstring(xml_file_str)
+    print_nodes_with_indent(nodes)
diff --git a/mm_agents/gpt_4_agent.py b/mm_agents/gpt_4_agent.py
deleted file mode 100644
index aa19185..0000000
--- a/mm_agents/gpt_4_agent.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import base64
-import json
-import re
-import time
-from typing import Dict, List
-
-import requests
-
-from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes
-from mm_agents.gpt_4_prompt_action import SYS_PROMPT as SYS_PROMPT_ACTION
-from mm_agents.gpt_4_prompt_code import SYS_PROMPT as SYS_PROMPT_CODE
-
-
-# Function to encode the image
-def encode_image(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')
-
-
-def parse_actions_from_string(input_string):
-    # Search for a JSON string within the input string
-    actions = []
-    matches = re.findall(r'```json\s+(.*?)\s+```', input_string, re.DOTALL)
-    if matches:
-        # Assuming there's only one match, parse the JSON string into a dictionary
-        try:
-            for match in matches:
-                action_dict = json.loads(match)
-                actions.append(action_dict)
-            return actions
-        except json.JSONDecodeError as e:
-            return f"Failed to parse JSON: {e}"
-    else:
-        matches = re.findall(r'```\s+(.*?)\s+```', input_string, re.DOTALL)
-        if matches:
-            # Assuming there's only one match, parse the JSON string into a dictionary
-            try:
-                for match in matches:
-                    action_dict = json.loads(match)
-                    actions.append(action_dict)
-                return actions
-            except json.JSONDecodeError as e:
-                return f"Failed to parse JSON: {e}"
-        else:
-            try:
-                action_dict = json.loads(input_string)
-                return [action_dict]
-            except json.JSONDecodeError as e:
-                raise ValueError("Invalid response format: " + input_string)
-
-
-def parse_code_from_string(input_string):
-    # This regular expression will match both ```code``` and ```python code```
-    # and capture the `code` part. It uses a non-greedy match for the content inside.
-    pattern = r"```(?:\w+\s+)?(.*?)```"
-    # Find all non-overlapping matches in the string
-    matches = re.findall(pattern, input_string, re.DOTALL)
-
-    # The regex above captures the content inside the triple backticks.
-    # The `re.DOTALL` flag allows the dot `.` to match newline characters as well,
-    # so the code inside backticks can span multiple lines.
-
-    # matches now contains all the captured code snippets
-
-    codes = []
-
-    for match in matches:
-        match = match.strip()
-        commands = ['WAIT', 'DONE', 'FAIL']  # fixme: updates this part when we have more commands
-
-        if match in commands:
-            codes.append(match.strip())
-        elif match.split('\n')[-1] in commands:
-            if len(match.split('\n')) > 1:
-                codes.append("\n".join(match.split('\n')[:-1]))
-            codes.append(match.split('\n')[-1])
-        else:
-            codes.append(match)
-
-    return codes
-
-
-class GPT4_Agent:
-    def __init__(self, api_key, instruction, model="gpt-4-1106-preview", max_tokens=600, action_space="computer_13"):
-        self.instruction = instruction
-        self.model = model
-        self.max_tokens = max_tokens
-        self.action_space = action_space
-
-        self.headers = {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {api_key}"
-        }
-
-        self.trajectory = [
-            {
-                "role": "system",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": {
-                                    "computer_13": SYS_PROMPT_ACTION,
-                                    "pyautogui": SYS_PROMPT_CODE
-                                }[action_space] + "\nHere is the instruction for the task: {}".format(self.instruction)
-                    },
-                ]
-            }
-        ]
-
-    def predict(self, obs: Dict) -> List:
-        """
-        Predict the next action(s) based on the current observation.
-        """
-        accessibility_tree = obs["accessibility_tree"]
-
-        leaf_nodes = find_leaf_nodes(accessibility_tree)
-        filtered_nodes = filter_nodes(leaf_nodes)
-
-        linearized_accessibility_tree = "tag\ttext\tposition\tsize\n"
-        # Linearize the accessibility tree nodes into a table format
-
-        for node in filtered_nodes:
-            linearized_accessibility_tree += node.tag + "\t"
-            linearized_accessibility_tree += node.attrib.get('name') + "\t"
-            linearized_accessibility_tree += node.attrib.get(
-                '{uri:deskat:component.at-spi.gnome.org}screencoord') + "\t"
-            linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') + "\n"
-
-        self.trajectory.append({
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Given the XML format of accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
-                        linearized_accessibility_tree)
-                }
-            ]
-        })
-
-        # print(
-        #     "Given the XML format of accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
-        #         linearized_accessibility_tree)
-        # )
-
-        traj_to_show = []
-        for i in range(len(self.trajectory)):
-            traj_to_show.append(self.trajectory[i]["content"][0]["text"])
-            if len(self.trajectory[i]["content"]) > 1:
-                traj_to_show.append("screenshot_obs")
-
-        payload = {
-            "model": self.model,
-            "messages": self.trajectory,
-            "max_tokens": self.max_tokens
-        }
-
-        while True:
-            try:
-                response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers,
-                                         json=payload)
-                break
-            except:
-                print("Failed to generate response, retrying...")
-                time.sleep(5)
-                pass
-
-        try:
-            actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
-        except:
-            print("Failed to parse action from response:", response.json())
-            actions = None
-
-        return actions
-
-    def parse_actions(self, response: str):
-        # parse from the response
-        if self.action_space == "computer_13":
-            actions = parse_actions_from_string(response)
-        elif self.action_space == "pyautogui":
-            actions = parse_code_from_string(response)
-        else:
-            raise ValueError("Invalid action space: " + self.action_space)
-
-        # add action into the trajectory
-        self.trajectory.append({
-            "role": "assistant",
-            "content": [
-                {
-                    "type": "text",
-                    "text": response
-                },
-            ]
-        })
-
-        return actions
diff --git a/mm_agents/gpt_4_prompt_action.py b/mm_agents/gpt_4_prompt_action.py
deleted file mode 100644
index 3019074..0000000
--- a/mm_agents/gpt_4_prompt_action.py
+++ /dev/null
@@ -1,244 +0,0 @@
-SYS_PROMPT = """
-You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
-For each step, you will get an observation of the desktop by the XML format of accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
-
-HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
-ACTION_SPACE = [
-    {
-        "action_type": "MOVE_TO",
-        "note": "move the cursor to the specified position",
-        "parameters": {
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": False,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "CLICK",
-        "note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
-        "parameters": {
-            "button": {
-                "type": str,
-                "range": ["left", "right", "middle"],
-                "optional": True,
-            },
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": True,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": True,
-            },
-            "num_clicks": {
-                "type": int,
-                "range": [1, 2, 3],
-                "optional": True,
-            },
-        }
-    },
-    {
-        "action_type": "MOUSE_DOWN",
-        "note": "press the left button if the button not specified, otherwise press the specified button",
-        "parameters": {
-            "button": {
-                "type": str,
-                "range": ["left", "right", "middle"],
-                "optional": True,
-            }
-        }
-    },
-    {
-        "action_type": "MOUSE_UP",
-        "note": "release the left button if the button not specified, otherwise release the specified button",
-        "parameters": {
-            "button": {
-                "type": str,
-                "range": ["left", "right", "middle"],
-                "optional": True,
-            }
-        }
-    },
-    {
-        "action_type": "RIGHT_CLICK",
-        "note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
-        "parameters": {
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": True,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": True,
-            }
-        }
-    },
-    {
-        "action_type": "DOUBLE_CLICK",
-        "note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
-        "parameters": {
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": True,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": True,
-            }
-        }
-    },
-    {
-        "action_type": "DRAG_TO",
-        "note": "drag the cursor to the specified position with the left button pressed",
-        "parameters": {
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": False,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "SCROLL",
-        "note": "scroll the mouse wheel up or down",
-        "parameters": {
-            "dx": {
-                "type": int,
-                "range": None,
-                "optional": False,
-            },
-            "dy": {
-                "type": int,
-                "range": None,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "TYPING",
-        "note": "type the specified text",
-        "parameters": {
-            "text": {
-                "type": str,
-                "range": None,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "PRESS",
-        "note": "press the specified key and release it",
-        "parameters": {
-            "key": {
-                "type": str,
-                "range": KEYBOARD_KEYS,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "KEY_DOWN",
-        "note": "press the specified key",
-        "parameters": {
-            "key": {
-                "type": str,
-                "range": KEYBOARD_KEYS,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "KEY_UP",
-        "note": "release the specified key",
-        "parameters": {
-            "key": {
-                "type": str,
-                "range": KEYBOARD_KEYS,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "HOTKEY",
-        "note": "press the specified key combination",
-        "parameters": {
-            "keys": {
-                "type": list,
-                "range": [KEYBOARD_KEYS],
-                "optional": False,
-            }
-        }
-    },
-    ############################################################################################################
-    {
-        "action_type": "WAIT",
-        "note": "wait until the next action",
-    },
-    {
-        "action_type": "FAIL",
-        "note": "decide the task can not be performed",
-    },
-    {
-        "action_type": "DONE",
-        "note": "decide the task is done",
-    }
-]
-Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
-- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
-for example, format as:
-```
-{
-  "action_type": "MOUSE_MOVE",
-  "x": 1319.11,
-  "y": 65.06
-}
-```
-- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
-for example, format as:
-```
-{
-  "action_type": "CLICK",
-  "click_type": "LEFT"
-}
-```
-- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
-for example, format as:
-```
-{
-  "action_type": "KEY",
-  "key": "ctrl+c"
-}
-```
-- For TYPE, you need to specify the text you want to type
-for example, format as:
-```
-{
-  "action_type": "TYPE",
-  "text": "hello world"
-}
-```
-
-REMEMBER:
-For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
-You MUST wrap the dict with backticks (\`).
-You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
-You CAN predict multiple actions at one step, but you should only return one action for each step.
-"""
\ No newline at end of file
diff --git a/mm_agents/gpt_4_prompt_code.py b/mm_agents/gpt_4_prompt_code.py
deleted file mode 100644
index 25e4083..0000000
--- a/mm_agents/gpt_4_prompt_code.py
+++ /dev/null
@@ -1,18 +0,0 @@
-SYS_PROMPT = """
-You are an agent which follow my instruction and perform desktop computer tasks as instructed.
-You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
-For each step, you will get an observation of the desktop by the XML format of accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
-
-You are required to use `pyautogui` to perform the action. 
-Return one line or multiple lines of python code to perform the action each time, be time efficient.
-You ONLY need to return the code inside a code block, like this:
-```python
-# your code here
-```
-Specially, it is also allowed to return the following special code:
-When you think you have to wait for some time, return ```WAIT```;
-When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
-When you think the task is done, return ```DONE```.
-
-First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
-"""
\ No newline at end of file
diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py
index 0dc3cb1..6e2000c 100644
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -1,14 +1,27 @@
 import base64
 import json
+import os
 import re
 import time
+import uuid
 from typing import Dict, List
 
+import backoff
 import requests
+from openai.error import (
+    APIConnectionError,
+    APIError,
+    RateLimitError,
+    ServiceUnavailableError,
+    InvalidRequestError
+)
 
-from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes
-from mm_agents.gpt_4v_prompt_action import SYS_PROMPT as SYS_PROMPT_ACTION
-from mm_agents.gpt_4v_prompt_code import SYS_PROMPT as SYS_PROMPT_CODE
+from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes, draw_bounding_boxes
+from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION, \
+    SYS_PROMPT_IN_A11Y_OUT_CODE, SYS_PROMPT_IN_A11Y_OUT_ACTION, \
+    SYS_PROMPT_IN_BOTH_OUT_CODE, SYS_PROMPT_IN_BOTH_OUT_ACTION, \
+    SYS_PROMPT_IN_SOM_A11Y_OUT_TAG, \
+    SYS_PROMPT_SEEACT, ACTION_DESCRIPTION_PROMPT_SEEACT, ACTION_GROUNDING_PROMPT_SEEACT
 
 
 # Function to encode the image
@@ -17,6 +30,35 @@ def encode_image(image_path):
         return base64.b64encode(image_file.read()).decode('utf-8')
 
 
+def linearize_accessibility_tree(accessibility_tree):
+    leaf_nodes = find_leaf_nodes(accessibility_tree)
+    filtered_nodes = filter_nodes(leaf_nodes)
+
+    linearized_accessibility_tree = "tag\ttext\tposition\tsize\n"
+    # Linearize the accessibility tree nodes into a table format
+
+    for node in filtered_nodes:
+        linearized_accessibility_tree += node.tag + "\t"
+        linearized_accessibility_tree += node.attrib.get('name') + "\t"
+        linearized_accessibility_tree += node.attrib.get(
+            '{uri:deskat:component.at-spi.gnome.org}screencoord') + "\t"
+        linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') + "\n"
+
+    return linearized_accessibility_tree
+
+
+def tag_screenshot(screenshot, accessibility_tree):
+    # Creat a tmp file to store the screenshot in random name
+    uuid_str = str(uuid.uuid4())
+    os.makedirs("tmp/images", exist_ok=True)
+    tagged_screenshot_file_path = os.path.join("tmp/images", uuid_str + ".png")
+    nodes = filter_nodes(find_leaf_nodes(accessibility_tree))
+    # Make tag screenshot
+    marks = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)
+
+    return marks, tagged_screenshot_file_path
+
+
 def parse_actions_from_string(input_string):
     # Search for a JSON string within the input string
     actions = []
@@ -61,124 +103,295 @@ def parse_code_from_string(input_string):
     # so the code inside backticks can span multiple lines.
 
     # matches now contains all the captured code snippets
-    return matches
+
+    codes = []
+
+    for match in matches:
+        match = match.strip()
+        commands = ['WAIT', 'DONE', 'FAIL']  # fixme: updates this part when we have more commands
+
+        if match in commands:
+            codes.append(match.strip())
+        elif match.split('\n')[-1] in commands:
+            if len(match.split('\n')) > 1:
+                codes.append("\n".join(match.split('\n')[:-1]))
+            codes.append(match.split('\n')[-1])
+        else:
+            codes.append(match)
+
+    return codes
+
+
+def parse_code_from_som_string(input_string, masks):
+    for i, mask in enumerate(masks):
+        x, y, w, h = mask
+        input_string = input_string.replace("tag#" + str(i), "{}, {}".format(int(x + w // 2), int(y + h // 2)))
+
+    return parse_code_from_string(input_string)
 
 
 class GPT4v_Agent:
-    def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300, action_space="computer_13", add_a11y_tree=False):
+    def __init__(
+            self,
+            api_key,
+            instruction,
+            model="gpt-4-vision-preview",
+            max_tokens=300,
+            action_space="computer_13",
+            exp="screenshot_a11y_tree"
+            # exp can be in ["screenshot", "a11y_tree", "screenshot_a11y_tree", "som", "seeact"]
+    ):
+
         self.instruction = instruction
         self.model = model
         self.max_tokens = max_tokens
         self.action_space = action_space
-        self.add_a11y_tree = add_a11y_tree
+        self.exp = exp
 
         self.headers = {
             "Content-Type": "application/json",
             "Authorization": f"Bearer {api_key}"
         }
 
-        self.trajectory = [
-            {
-                "role": "system",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": {
-                                    "computer_13": SYS_PROMPT_ACTION,
-                                    "pyautogui": SYS_PROMPT_CODE
-                                }[action_space] + "\nHere is the instruction for the task: {}".format(self.instruction)
-                    },
-                ]
-            }
-        ]
+        self.actions = []
+        self.observations = []
+
+        if exp == "screenshot":
+            if action_space == "computer_13":
+                self.system_message = SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION
+            elif action_space == "pyautogui":
+                self.system_message = SYS_PROMPT_IN_SCREENSHOT_OUT_CODE
+            else:
+                raise ValueError("Invalid action space: " + action_space)
+        elif exp == "a11y_tree":
+            if action_space == "computer_13":
+                self.system_message = SYS_PROMPT_IN_A11Y_OUT_ACTION
+            elif action_space == "pyautogui":
+                self.system_message = SYS_PROMPT_IN_A11Y_OUT_CODE
+            else:
+                raise ValueError("Invalid action space: " + action_space)
+        elif exp == "both":
+            if action_space == "computer_13":
+                self.system_message = SYS_PROMPT_IN_BOTH_OUT_ACTION
+            elif action_space == "pyautogui":
+                self.system_message = SYS_PROMPT_IN_BOTH_OUT_CODE
+            else:
+                raise ValueError("Invalid action space: " + action_space)
+        elif exp == "som":
+            if action_space == "computer_13":
+                raise ValueError("Invalid action space: " + action_space)
+            elif action_space == "pyautogui":
+                self.system_message = SYS_PROMPT_IN_SOM_A11Y_OUT_TAG
+            else:
+                raise ValueError("Invalid action space: " + action_space)
+        elif exp == "seeact":
+            if action_space == "computer_13":
+                raise ValueError("Invalid action space: " + action_space)
+            elif action_space == "pyautogui":
+                self.system_message = SYS_PROMPT_SEEACT
+            else:
+                raise ValueError("Invalid action space: " + action_space)
+        else:
+            raise ValueError("Invalid experiment type: " + exp)
+
+        self.system_message = (self.system_message +
+                               "\nHere is the instruction for the task: {}".format(self.instruction))
 
     def predict(self, obs: Dict) -> List:
         """
         Predict the next action(s) based on the current observation.
         """
-        base64_image = encode_image(obs["screenshot"])
-        accessibility_tree = obs["accessibility_tree"]
 
-        leaf_nodes = find_leaf_nodes(accessibility_tree)
-        filtered_nodes = filter_nodes(leaf_nodes)
+        # Prepare the payload for the API call
+        messages = []
 
-        linearized_accessibility_tree = "tag\ttext\tposition\tsize\n"
-        # Linearize the accessibility tree nodes into a table format
+        if len(self.actions) > 0:
+            system_message = self.system_message + "\nHere are the actions you have done so far:\n" + "\n->\n".join(
+                self.actions)
+        else:
+            system_message = self.system_message
 
-        for node in filtered_nodes:
-            linearized_accessibility_tree += node.tag + "\t"
-            linearized_accessibility_tree += node.attrib.get('name') + "\t"
-            linearized_accessibility_tree += node.attrib.get(
-                '{uri:deskat:component.at-spi.gnome.org}screencoord') + "\t"
-            linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') + "\n"
-
-        self.trajectory.append({
-            "role": "user",
+        messages.append({
+            "role": "system",
             "content": [
                 {
                     "type": "text",
-                    "text": "What's the next step that you will do to help with the task?" if not self.add_a11y_tree
-                    else "And given the XML format of accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(linearized_accessibility_tree)
+                    "text": system_message
                 },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_image}",
-                        "detail": "high"
-                    }
-                }
             ]
         })
 
-        traj_to_show = []
-        for i in range(len(self.trajectory)):
-            traj_to_show.append(self.trajectory[i]["content"][0]["text"])
-            if len(self.trajectory[i]["content"]) > 1:
-                traj_to_show.append("screenshot_obs")
+        masks = None
 
-        print("Trajectory:", traj_to_show)
+        if self.exp in ["screenshot", "both"]:
+            base64_image = encode_image(obs["screenshot"])
+            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+            messages.append({
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Given the screenshot as below. What's the next step that you will do to help with the task?"
+                        if self.exp == "screenshot"
+                        else "Given the screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                            linearized_accessibility_tree)
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}",
+                            "detail": "high"
+                        }
+                    }
+                ]
+            })
+        elif self.exp == "a11y_tree":
+            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+            messages.append({
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Given the info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                            linearized_accessibility_tree)
+                    }
+                ]
+            })
+        elif self.exp == "som":
+            # Add som to the screenshot
+            masks, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
 
-        payload = {
+            base64_image = encode_image(tagged_screenshot)
+            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+
+            messages.append({
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Given the info from the tagged screenshot as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                            linearized_accessibility_tree)
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}",
+                            "detail": "high"
+                        }
+                    }
+                ]
+            })
+        elif self.exp == "seeact":
+            # Add som to the screenshot
+            masks, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
+
+            base64_image = encode_image(tagged_screenshot)
+            linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+
+            messages.append({
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": ACTION_DESCRIPTION_PROMPT_SEEACT.format(linearized_accessibility_tree)
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}",
+                            "detail": "high"
+                        }
+                    }
+                ]
+            })
+        else:
+            raise ValueError("Invalid experiment type: " + self.exp)
+
+        response = self.call_llm({
             "model": self.model,
-            "messages": self.trajectory,
+            "messages": messages,
             "max_tokens": self.max_tokens
-        }
+        })
 
+        if self.exp == "seeact":
+            messages.append({
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": response
+                    }
+                ]
+            })
+
+            messages.append({
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "{}\n\nWhat's the next step that you will do to help with the task?".format(
+                            ACTION_GROUNDING_PROMPT_SEEACT)
+                    }
+                ]
+            })
+
+            response = self.call_llm({
+                "model": self.model,
+                "messages": messages,
+                "max_tokens": self.max_tokens
+            })
+
+        try:
+            actions = self.parse_actions(response, masks)
+        except Exception as e:
+            print("Failed to parse action from response", e)
+            actions = None
+
+        return actions
+
+    @backoff.on_exception(
+        backoff.expo,
+        (APIError, RateLimitError, APIConnectionError, ServiceUnavailableError, InvalidRequestError),
+    )
+    def call_llm(self, payload):
         while True:
             try:
-                response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers,
-                                         json=payload)
+                response = requests.post(
+                    "https://api.openai.com/v1/chat/completions",
+                    headers=self.headers,
+                    json=payload
+                )
                 break
             except:
                 print("Failed to generate response, retrying...")
                 time.sleep(5)
                 pass
-        try:
-            actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
-        except:
-            print("Failed to parse action from response:", response.json())
-            actions = None
 
-        return actions
+        return response.json()['choices'][0]['message']['content']
 
-    def parse_actions(self, response: str):
-        # parse from the response
-        if self.action_space == "computer_13":
-            actions = parse_actions_from_string(response)
-        elif self.action_space == "pyautogui":
-            actions = parse_code_from_string(response)
-        else:
-            raise ValueError("Invalid action space: " + self.action_space)
+    def parse_actions(self, response: str, masks=None):
 
-        # add action into the trajectory
-        self.trajectory.append({
-            "role": "assistant",
-            "content": [
-                {
-                    "type": "text",
-                    "text": response
-                },
-            ]
-        })
+        if self.exp in ["screenshot", "a11y_tree", "both"]:
+            # parse from the response
+            if self.action_space == "computer_13":
+                actions = parse_actions_from_string(response)
+            elif self.action_space == "pyautogui":
+                actions = parse_code_from_string(response)
+            else:
+                raise ValueError("Invalid action space: " + self.action_space)
 
-        return actions
+            self.actions.append(actions)
+
+            return actions
+        elif self.exp in ["som", "seeact"]:
+            # parse from the response
+            if self.action_space == "computer_13":
+                raise ValueError("Invalid action space: " + self.action_space)
+            elif self.action_space == "pyautogui":
+                actions = parse_code_from_som_string(response, masks)
+            else:
+                raise ValueError("Invalid action space: " + self.action_space)
+
+            self.actions.append(actions)
+
+            return actions
diff --git a/mm_agents/gpt_4v_prompt_action.py b/mm_agents/gpt_4v_prompt_action.py
deleted file mode 100644
index 4323df6..0000000
--- a/mm_agents/gpt_4v_prompt_action.py
+++ /dev/null
@@ -1,244 +0,0 @@
-SYS_PROMPT = """
-You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
-For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
-
-HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
-ACTION_SPACE = [
-    {
-        "action_type": "MOVE_TO",
-        "note": "move the cursor to the specified position",
-        "parameters": {
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": False,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "CLICK",
-        "note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
-        "parameters": {
-            "button": {
-                "type": str,
-                "range": ["left", "right", "middle"],
-                "optional": True,
-            },
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": True,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": True,
-            },
-            "num_clicks": {
-                "type": int,
-                "range": [1, 2, 3],
-                "optional": True,
-            },
-        }
-    },
-    {
-        "action_type": "MOUSE_DOWN",
-        "note": "press the left button if the button not specified, otherwise press the specified button",
-        "parameters": {
-            "button": {
-                "type": str,
-                "range": ["left", "right", "middle"],
-                "optional": True,
-            }
-        }
-    },
-    {
-        "action_type": "MOUSE_UP",
-        "note": "release the left button if the button not specified, otherwise release the specified button",
-        "parameters": {
-            "button": {
-                "type": str,
-                "range": ["left", "right", "middle"],
-                "optional": True,
-            }
-        }
-    },
-    {
-        "action_type": "RIGHT_CLICK",
-        "note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
-        "parameters": {
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": True,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": True,
-            }
-        }
-    },
-    {
-        "action_type": "DOUBLE_CLICK",
-        "note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
-        "parameters": {
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": True,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": True,
-            }
-        }
-    },
-    {
-        "action_type": "DRAG_TO",
-        "note": "drag the cursor to the specified position with the left button pressed",
-        "parameters": {
-            "x": {
-                "type": float,
-                "range": [0, X_MAX],
-                "optional": False,
-            },
-            "y": {
-                "type": float,
-                "range": [0, Y_MAX],
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "SCROLL",
-        "note": "scroll the mouse wheel up or down",
-        "parameters": {
-            "dx": {
-                "type": int,
-                "range": None,
-                "optional": False,
-            },
-            "dy": {
-                "type": int,
-                "range": None,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "TYPING",
-        "note": "type the specified text",
-        "parameters": {
-            "text": {
-                "type": str,
-                "range": None,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "PRESS",
-        "note": "press the specified key and release it",
-        "parameters": {
-            "key": {
-                "type": str,
-                "range": KEYBOARD_KEYS,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "KEY_DOWN",
-        "note": "press the specified key",
-        "parameters": {
-            "key": {
-                "type": str,
-                "range": KEYBOARD_KEYS,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "KEY_UP",
-        "note": "release the specified key",
-        "parameters": {
-            "key": {
-                "type": str,
-                "range": KEYBOARD_KEYS,
-                "optional": False,
-            }
-        }
-    },
-    {
-        "action_type": "HOTKEY",
-        "note": "press the specified key combination",
-        "parameters": {
-            "keys": {
-                "type": list,
-                "range": [KEYBOARD_KEYS],
-                "optional": False,
-            }
-        }
-    },
-    ############################################################################################################
-    {
-        "action_type": "WAIT",
-        "note": "wait until the next action",
-    },
-    {
-        "action_type": "FAIL",
-        "note": "decide the task can not be performed",
-    },
-    {
-        "action_type": "DONE",
-        "note": "decide the task is done",
-    }
-]
-Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
-- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
-for example, format as:
-```
-{
-  "action_type": "MOUSE_MOVE",
-  "x": 1319.11,
-  "y": 65.06
-}
-```
-- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
-for example, format as:
-```
-{
-  "action_type": "CLICK",
-  "click_type": "LEFT"
-}
-```
-- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
-for example, format as:
-```
-{
-  "action_type": "KEY",
-  "key": "ctrl+c"
-}
-```
-- For TYPE, you need to specify the text you want to type
-for example, format as:
-```
-{
-  "action_type": "TYPE",
-  "text": "hello world"
-}
-```
-
-REMEMBER:
-For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
-You MUST wrap the dict with backticks (\`).
-You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
-You CAN predict multiple actions at one step, but you should only return one action for each step.
-"""
\ No newline at end of file
diff --git a/mm_agents/gpt_4v_prompt_code.py b/mm_agents/gpt_4v_prompt_code.py
deleted file mode 100644
index 8f256da..0000000
--- a/mm_agents/gpt_4v_prompt_code.py
+++ /dev/null
@@ -1,18 +0,0 @@
-SYS_PROMPT = """
-You are an agent which follow my instruction and perform desktop computer tasks as instructed.
-You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
-For each step, you will get an observation of an image, which is the screenshot of the computer screen and you will predict the action of the computer based on the image.
-
-You are required to use `pyautogui` to perform the action. 
-Return one line or multiple lines of python code to perform the action each time, be time efficient.
-You ONLY need to return the code inside a code block, like this:
-```python
-# your code here
-```
-Specially, it is also allowed to return the following special code:
-When you think you have to wait for some time, return ```WAIT```;
-When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
-When you think the task is done, return ```DONE```.
-
-First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
-"""
\ No newline at end of file
diff --git a/mm_agents/prompts.py b/mm_agents/prompts.py
new file mode 100644
index 0000000..dcc9a85
--- /dev/null
+++ b/mm_agents/prompts.py
@@ -0,0 +1,862 @@
+SYS_PROMPT_IN_SCREENSHOT_OUT_CODE = """
+You are an agent which follow my instruction and perform desktop computer tasks as instructed.
+You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
+For each step, you will get an observation of an image, which is the screenshot of the computer screen and you will predict the action of the computer based on the image.
+
+You are required to use `pyautogui` to perform the action. 
+Return one line or multiple lines of python code to perform the action each time, be time efficient.
+You ONLY need to return the code inside a code block, like this:
+```python
+# your code here
+```
+Specially, it is also allowed to return the following special code:
+When you think you have to wait for some time, return ```WAIT```;
+When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
+When you think the task is done, return ```DONE```.
+
+First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+""".strip()
+
+SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION = """
+You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
+For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
+
+HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
+ACTION_SPACE = [
+    {
+        "action_type": "MOVE_TO",
+        "note": "move the cursor to the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": False,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "CLICK",
+        "note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            },
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            },
+            "num_clicks": {
+                "type": int,
+                "range": [1, 2, 3],
+                "optional": True,
+            },
+        }
+    },
+    {
+        "action_type": "MOUSE_DOWN",
+        "note": "press the left button if the button not specified, otherwise press the specified button",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "MOUSE_UP",
+        "note": "release the left button if the button not specified, otherwise release the specified button",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "RIGHT_CLICK",
+        "note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "DOUBLE_CLICK",
+        "note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "DRAG_TO",
+        "note": "drag the cursor to the specified position with the left button pressed",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": False,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "SCROLL",
+        "note": "scroll the mouse wheel up or down",
+        "parameters": {
+            "dx": {
+                "type": int,
+                "range": None,
+                "optional": False,
+            },
+            "dy": {
+                "type": int,
+                "range": None,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "TYPING",
+        "note": "type the specified text",
+        "parameters": {
+            "text": {
+                "type": str,
+                "range": None,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "PRESS",
+        "note": "press the specified key and release it",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "KEY_DOWN",
+        "note": "press the specified key",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "KEY_UP",
+        "note": "release the specified key",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "HOTKEY",
+        "note": "press the specified key combination",
+        "parameters": {
+            "keys": {
+                "type": list,
+                "range": [KEYBOARD_KEYS],
+                "optional": False,
+            }
+        }
+    },
+    ############################################################################################################
+    {
+        "action_type": "WAIT",
+        "note": "wait until the next action",
+    },
+    {
+        "action_type": "FAIL",
+        "note": "decide the task can not be performed",
+    },
+    {
+        "action_type": "DONE",
+        "note": "decide the task is done",
+    }
+]
+Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
+- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
+for example, format as:
+```
+{
+  "action_type": "MOUSE_MOVE",
+  "x": 1319.11,
+  "y": 65.06
+}
+```
+- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
+for example, format as:
+```
+{
+  "action_type": "CLICK",
+  "click_type": "LEFT"
+}
+```
+- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
+for example, format as:
+```
+{
+  "action_type": "KEY",
+  "key": "ctrl+c"
+}
+```
+- For TYPE, you need to specify the text you want to type
+for example, format as:
+```
+{
+  "action_type": "TYPE",
+  "text": "hello world"
+}
+```
+
+REMEMBER:
+For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+You MUST wrap the dict with backticks (\`).
+You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
+You CAN predict multiple actions at one step, but you should only return one action for each step.
+""".strip()
+
+SYS_PROMPT_IN_A11Y_OUT_CODE = """
+You are an agent which follow my instruction and perform desktop computer tasks as instructed.
+You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
+For each step, you will get an observation of the desktop by accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
+
+You are required to use `pyautogui` to perform the action. 
+Return one line or multiple lines of python code to perform the action each time, be time efficient.
+You ONLY need to return the code inside a code block, like this:
+```python
+# your code here
+```
+Specially, it is also allowed to return the following special code:
+When you think you have to wait for some time, return ```WAIT```;
+When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
+When you think the task is done, return ```DONE```.
+
+First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+""".strip()
+
+SYS_PROMPT_IN_A11Y_OUT_ACTION = """
+You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
+For each step, you will get an observation of the desktop by accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
+
+HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
+ACTION_SPACE = [
+    {
+        "action_type": "MOVE_TO",
+        "note": "move the cursor to the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": False,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "CLICK",
+        "note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            },
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            },
+            "num_clicks": {
+                "type": int,
+                "range": [1, 2, 3],
+                "optional": True,
+            },
+        }
+    },
+    {
+        "action_type": "MOUSE_DOWN",
+        "note": "press the left button if the button not specified, otherwise press the specified button",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "MOUSE_UP",
+        "note": "release the left button if the button not specified, otherwise release the specified button",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "RIGHT_CLICK",
+        "note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "DOUBLE_CLICK",
+        "note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "DRAG_TO",
+        "note": "drag the cursor to the specified position with the left button pressed",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": False,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "SCROLL",
+        "note": "scroll the mouse wheel up or down",
+        "parameters": {
+            "dx": {
+                "type": int,
+                "range": None,
+                "optional": False,
+            },
+            "dy": {
+                "type": int,
+                "range": None,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "TYPING",
+        "note": "type the specified text",
+        "parameters": {
+            "text": {
+                "type": str,
+                "range": None,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "PRESS",
+        "note": "press the specified key and release it",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "KEY_DOWN",
+        "note": "press the specified key",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "KEY_UP",
+        "note": "release the specified key",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "HOTKEY",
+        "note": "press the specified key combination",
+        "parameters": {
+            "keys": {
+                "type": list,
+                "range": [KEYBOARD_KEYS],
+                "optional": False,
+            }
+        }
+    },
+    ############################################################################################################
+    {
+        "action_type": "WAIT",
+        "note": "wait until the next action",
+    },
+    {
+        "action_type": "FAIL",
+        "note": "decide the task can not be performed",
+    },
+    {
+        "action_type": "DONE",
+        "note": "decide the task is done",
+    }
+]
+Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
+- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
+for example, format as:
+```
+{
+  "action_type": "MOUSE_MOVE",
+  "x": 1319.11,
+  "y": 65.06
+}
+```
+- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
+for example, format as:
+```
+{
+  "action_type": "CLICK",
+  "click_type": "LEFT"
+}
+```
+- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
+for example, format as:
+```
+{
+  "action_type": "KEY",
+  "key": "ctrl+c"
+}
+```
+- For TYPE, you need to specify the text you want to type
+for example, format as:
+```
+{
+  "action_type": "TYPE",
+  "text": "hello world"
+}
+```
+
+REMEMBER:
+For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+You MUST wrap the dict with backticks (\`).
+You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
+You CAN predict multiple actions at one step, but you should only return one action for each step.
+""".strip()
+
+SYS_PROMPT_IN_BOTH_OUT_CODE = """
+You are an agent which follow my instruction and perform desktop computer tasks as instructed.
+You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
+For each step, you will get an observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library. 
+And you will predict the action of the computer based on the screenshot and accessibility tree.
+
+You are required to use `pyautogui` to perform the action. 
+Return one line or multiple lines of python code to perform the action each time, be time efficient.
+You ONLY need to return the code inside a code block, like this:
+```python
+# your code here
+```
+Specially, it is also allowed to return the following special code:
+When you think you have to wait for some time, return ```WAIT```;
+When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
+When you think the task is done, return ```DONE```.
+
+First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+""".strip()
+
+SYS_PROMPT_IN_BOTH_OUT_ACTION = """
+You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
+For each step, you will get an observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library. 
+And you will predict the action of the computer based on the screenshot and accessibility tree.
+
+HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
+ACTION_SPACE = [
+    {
+        "action_type": "MOVE_TO",
+        "note": "move the cursor to the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": False,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "CLICK",
+        "note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            },
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            },
+            "num_clicks": {
+                "type": int,
+                "range": [1, 2, 3],
+                "optional": True,
+            },
+        }
+    },
+    {
+        "action_type": "MOUSE_DOWN",
+        "note": "press the left button if the button not specified, otherwise press the specified button",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "MOUSE_UP",
+        "note": "release the left button if the button not specified, otherwise release the specified button",
+        "parameters": {
+            "button": {
+                "type": str,
+                "range": ["left", "right", "middle"],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "RIGHT_CLICK",
+        "note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "DOUBLE_CLICK",
+        "note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": True,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": True,
+            }
+        }
+    },
+    {
+        "action_type": "DRAG_TO",
+        "note": "drag the cursor to the specified position with the left button pressed",
+        "parameters": {
+            "x": {
+                "type": float,
+                "range": [0, X_MAX],
+                "optional": False,
+            },
+            "y": {
+                "type": float,
+                "range": [0, Y_MAX],
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "SCROLL",
+        "note": "scroll the mouse wheel up or down",
+        "parameters": {
+            "dx": {
+                "type": int,
+                "range": None,
+                "optional": False,
+            },
+            "dy": {
+                "type": int,
+                "range": None,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "TYPING",
+        "note": "type the specified text",
+        "parameters": {
+            "text": {
+                "type": str,
+                "range": None,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "PRESS",
+        "note": "press the specified key and release it",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "KEY_DOWN",
+        "note": "press the specified key",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "KEY_UP",
+        "note": "release the specified key",
+        "parameters": {
+            "key": {
+                "type": str,
+                "range": KEYBOARD_KEYS,
+                "optional": False,
+            }
+        }
+    },
+    {
+        "action_type": "HOTKEY",
+        "note": "press the specified key combination",
+        "parameters": {
+            "keys": {
+                "type": list,
+                "range": [KEYBOARD_KEYS],
+                "optional": False,
+            }
+        }
+    },
+    ############################################################################################################
+    {
+        "action_type": "WAIT",
+        "note": "wait until the next action",
+    },
+    {
+        "action_type": "FAIL",
+        "note": "decide the task can not be performed",
+    },
+    {
+        "action_type": "DONE",
+        "note": "decide the task is done",
+    }
+]
+Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
+- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
+for example, format as:
+```
+{
+  "action_type": "MOUSE_MOVE",
+  "x": 1319.11,
+  "y": 65.06
+}
+```
+- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
+for example, format as:
+```
+{
+  "action_type": "CLICK",
+  "click_type": "LEFT"
+}
+```
+- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
+for example, format as:
+```
+{
+  "action_type": "KEY",
+  "key": "ctrl+c"
+}
+```
+- For TYPE, you need to specify the text you want to type
+for example, format as:
+```
+{
+  "action_type": "TYPE",
+  "text": "hello world"
+}
+```
+
+REMEMBER:
+For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+You MUST wrap the dict with backticks (\`).
+You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
+You CAN predict multiple actions at one step, but you should only return one action for each step.
+""".strip()
+
+SYS_PROMPT_IN_SOM_A11Y_OUT_TAG = """
+You are an agent which follow my instruction and perform desktop computer tasks as instructed.
+You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
+For each step, you will get an observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library. 
+
+You are required to use `pyautogui` to perform the action. But replace x, y in the code with the tag of the element you want to operate with. such as:
+```python
+pyautogui.moveTo(tag#3)
+pyautogui.click(tag#2)
+pyautogui.dragTo(tag#1, button='left')
+```
+Return one line or multiple lines of python code to perform the action each time, be time efficient.
+You ONLY need to return the code inside a code block, like this:
+```python
+# your code here
+```
+Specially, it is also allowed to return the following special code:
+When you think you have to wait for some time, return ```WAIT```;
+When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
+When you think the task is done, return ```DONE```.
+
+First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+""".strip()
+
+SYS_PROMPT_SEEACT = """
+You are an agent which follow my instruction and perform desktop computer tasks as instructed.
+You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
+For each step, you will get an observation of an image, which is the screenshot of the computer screen and you will predict the action of the computer based on the image.
+""".strip()
+
+ACTION_DESCRIPTION_PROMPT_SEEACT = """
+The text and image shown below is the observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library. 
+{}
+
+Follow the following guidance to think step by step before outlining the next action step at the current stage:
+
+(Current Screenshot Identification)
+Firstly, think about what the current screenshot is.
+
+(Previous Action Analysis)
+Secondly, combined with the screenshot, analyze each step of the previous action history and their intention one by one. Particularly, pay more attention to the last step, which may be more related to what you should do now as the next step.
+
+(Screenshot Details Analysis)
+Closely examine the screenshot to check the status of every part of the webpage to understand what you can operate with and what has been set or completed. You should closely examine the screenshot details to see what steps have been completed by previous actions even though you are given the textual previous actions. Because the textual history may not clearly and sufficiently record some effects of previous actions, you should closely evaluate the status of every part of the webpage to understand what you have done.
+
+(Next Action Based on Screenshot and Analysis)
+Then, based on your analysis, in conjunction with human desktop using habits and the logic of app GUI design, decide on the following action. And clearly outline which button in the screenshot users will operate with as the first next target element, its detailed location, and the corresponding operation.
+"""
+
+ACTION_GROUNDING_PROMPT_SEEACT = """
+You are required to use `pyautogui` to perform the action. But replace x, y in the code with the tag of the element you want to operate with. such as:
+```python
+pyautogui.moveTo(tag#3)
+pyautogui.click(tag#2)
+pyautogui.dragTo(tag#1, button='left')
+```
+Return one line or multiple lines of python code to perform the action each time, be time efficient.
+You ONLY need to return the code inside a code block, like this:
+```python
+# your code here
+```
+Specially, it is also allowed to return the following special code:
+When you think you have to wait for some time, return ```WAIT```;
+When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
+When you think the task is done, return ```DONE```.
+
+First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+"""
diff --git a/mm_agents/sam_test.py b/mm_agents/sam_test.py
deleted file mode 100644
index 9d4ce44..0000000
--- a/mm_agents/sam_test.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import torch
-from PIL import Image
-import requests
-from transformers import SamModel, SamProcessor
-import numpy as np
-import matplotlib.pyplot as plt
-import os
-os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
-
-def show_mask(mask, ax, random_color=False):
-    if random_color:
-        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
-    else:
-        color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
-    h, w = mask.shape[-2:]
-    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
-    ax.imshow(mask_image)
-
-
-def show_box(box, ax):
-    x0, y0 = box[0], box[1]
-    w, h = box[2] - box[0], box[3] - box[1]
-    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
-
-
-def show_boxes_on_image(raw_image, boxes):
-    plt.figure(figsize=(10, 10))
-    plt.imshow(raw_image)
-    for box in boxes:
-        show_box(box, plt.gca())
-    plt.axis('on')
-    plt.show()
-
-
-def show_points_on_image(raw_image, input_points, input_labels=None):
-    plt.figure(figsize=(10, 10))
-    plt.imshow(raw_image)
-    input_points = np.array(input_points)
-    if input_labels is None:
-        labels = np.ones_like(input_points[:, 0])
-    else:
-        labels = np.array(input_labels)
-    show_points(input_points, labels, plt.gca())
-    plt.axis('on')
-    plt.show()
-
-
-def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
-    plt.figure(figsize=(10, 10))
-    plt.imshow(raw_image)
-    input_points = np.array(input_points)
-    if input_labels is None:
-        labels = np.ones_like(input_points[:, 0])
-    else:
-        labels = np.array(input_labels)
-    show_points(input_points, labels, plt.gca())
-    for box in boxes:
-        show_box(box, plt.gca())
-    plt.axis('on')
-    plt.show()
-
-
-def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
-    plt.figure(figsize=(10, 10))
-    plt.imshow(raw_image)
-    input_points = np.array(input_points)
-    if input_labels is None:
-        labels = np.ones_like(input_points[:, 0])
-    else:
-        labels = np.array(input_labels)
-    show_points(input_points, labels, plt.gca())
-    for box in boxes:
-        show_box(box, plt.gca())
-    plt.axis('on')
-    plt.show()
-
-
-def show_points(coords, labels, ax, marker_size=375):
-    pos_points = coords[labels == 1]
-    neg_points = coords[labels == 0]
-    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
-               linewidth=1.25)
-    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
-               linewidth=1.25)
-
-
-def show_masks_on_image(raw_image, masks, scores):
-    if len(masks.shape) == 4:
-        masks = masks.squeeze()
-    if scores.shape[0] == 1:
-        scores = scores.squeeze()
-
-    nb_predictions = scores.shape[-1]
-    fig, axes = plt.subplots(1, nb_predictions, figsize=(15, 15))
-
-    for i, (mask, score) in enumerate(zip(masks, scores)):
-        mask = mask.cpu().detach()
-        axes[i].imshow(np.array(raw_image))
-        show_mask(mask, axes[i])
-        axes[i].title.set_text(f"Mask {i + 1}, Score: {score.item():.3f}")
-        axes[i].axis("off")
-    plt.show()
-
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
-processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
-
-img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
-raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
-
-plt.imshow(raw_image)
-
-inputs = processor(raw_image, return_tensors="pt").to(device)
-with torch.no_grad():
-    outputs = model(**inputs)
-
-masks = processor.image_processor.post_process_masks(
-    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
-)
-
-
-scores = outputs.iou_scores
-show_masks_on_image(raw_image, masks[0], scores)
diff --git a/mm_agents/visualizer.py b/mm_agents/visualizer.py
new file mode 100644
index 0000000..bd78a98
--- /dev/null
+++ b/mm_agents/visualizer.py
@@ -0,0 +1,1405 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import colorsys
+import logging
+import math
+import numpy as np
+from enum import Enum, unique
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import matplotlib.figure as mplfigure
+import pycocotools.mask as mask_util
+import torch
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from detectron2.utils.colormap import random_color
+import random
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["ColorMode", "VisImage", "Visualizer"]
+
+
+_SMALL_OBJECT_AREA_THRESH = 1000
+_LARGE_MASK_AREA_THRESH = 120000
+_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
+_BLACK = (0, 0, 0)
+_RED = (1.0, 0, 0)
+
+_KEYPOINT_THRESHOLD = 0.05
+
+
+@unique
+class ColorMode(Enum):
+    """
+    Enum of different color modes to use for instance visualizations.
+    """
+
+    IMAGE = 0
+    """
+    Picks a random color for every instance and overlay segmentations with low opacity.
+    """
+    SEGMENTATION = 1
+    """
+    Let instances of the same category have similar colors
+    (from metadata.thing_colors), and overlay them with
+    high opacity. This provides more attention on the quality of segmentation.
+    """
+    IMAGE_BW = 2
+    """
+    Same as IMAGE, but convert all areas without masks to gray-scale.
+    Only available for drawing per-instance mask predictions.
+    """
+
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (
+                height,
+                width,
+            ), f"mask shape: {m.shape}, target dims: {height}, {width}"
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+
+class _PanopticPrediction:
+    """
+    Unify different panoptic annotation/prediction formats
+    """
+
+    def __init__(self, panoptic_seg, segments_info, metadata=None):
+        if segments_info is None:
+            assert metadata is not None
+            # If "segments_info" is None, we assume "panoptic_img" is a
+            # H*W int32 image storing the panoptic_id in the format of
+            # category_id * label_divisor + instance_id. We reserve -1 for
+            # VOID label.
+            label_divisor = metadata.label_divisor
+            segments_info = []
+            for panoptic_label in np.unique(panoptic_seg.numpy()):
+                if panoptic_label == -1:
+                    # VOID region.
+                    continue
+                pred_class = panoptic_label // label_divisor
+                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
+                segments_info.append(
+                    {
+                        "id": int(panoptic_label),
+                        "category_id": int(pred_class),
+                        "isthing": bool(isthing),
+                    }
+                )
+        del metadata
+
+        self._seg = panoptic_seg
+
+        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
+        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
+        areas = areas.numpy()
+        sorted_idxs = np.argsort(-areas)
+        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
+        self._seg_ids = self._seg_ids.tolist()
+        for sid, area in zip(self._seg_ids, self._seg_areas):
+            if sid in self._sinfo:
+                self._sinfo[sid]["area"] = float(area)
+
+    def non_empty_mask(self):
+        """
+        Returns:
+            (H, W) array, a mask for all pixels that have a prediction
+        """
+        empty_ids = []
+        for id in self._seg_ids:
+            if id not in self._sinfo:
+                empty_ids.append(id)
+        if len(empty_ids) == 0:
+            return np.zeros(self._seg.shape, dtype=np.uint8)
+        assert (
+            len(empty_ids) == 1
+        ), ">1 ids corresponds to no labels. This is currently not supported"
+        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
+
+    def semantic_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or sinfo["isthing"]:
+                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
+                continue
+            yield (self._seg == sid).numpy().astype(np.bool), sinfo
+
+    def instance_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or not sinfo["isthing"]:
+                continue
+            mask = (self._seg == sid).numpy().astype(np.bool)
+            if mask.sum() > 0:
+                yield mask, sinfo
+
+
+def _create_text_labels(classes, scores, class_names, is_crowd=None):
+    """
+    Args:
+        classes (list[int] or None):
+        scores (list[float] or None):
+        class_names (list[str] or None):
+        is_crowd (list[bool] or None):
+
+    Returns:
+        list[str] or None
+    """
+    labels = None
+    if classes is not None:
+        if class_names is not None and len(class_names) > 0:
+            labels = [class_names[i] for i in classes]
+        else:
+            labels = [str(i) for i in classes]
+    if scores is not None:
+        if labels is None:
+            labels = ["{:.0f}%".format(s * 100) for s in scores]
+        else:
+            labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
+    if labels is not None and is_crowd is not None:
+        labels = [l + ("|crowd" if crowd else "") for l, crowd in zip(labels, is_crowd)]
+    return labels
+
+
+class VisImage:
+    def __init__(self, img, scale=1.0):
+        """
+        Args:
+            img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255].
+            scale (float): scale the input image
+        """
+        self.img = img
+        self.scale = scale
+        self.width, self.height = img.shape[1], img.shape[0]
+        self._setup_figure(img)
+
+    def _setup_figure(self, img):
+        """
+        Args:
+            Same as in :meth:`__init__()`.
+
+        Returns:
+            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
+            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
+        """
+        fig = mplfigure.Figure(frameon=False)
+        self.dpi = fig.get_dpi()
+        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
+        # (https://github.com/matplotlib/matplotlib/issues/15363)
+        fig.set_size_inches(
+            (self.width * self.scale + 1e-2) / self.dpi,
+            (self.height * self.scale + 1e-2) / self.dpi,
+        )
+        self.canvas = FigureCanvasAgg(fig)
+        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
+        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
+        ax.axis("off")
+        self.fig = fig
+        self.ax = ax
+        self.reset_image(img)
+
+    def reset_image(self, img):
+        """
+        Args:
+            img: same as in __init__
+        """
+        img = img.astype("uint8")
+        self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
+
+    def save(self, filepath):
+        """
+        Args:
+            filepath (str): a string that contains the absolute path, including the file name, where
+                the visualized image will be saved.
+        """
+        self.fig.savefig(filepath)
+
+    def get_image(self):
+        """
+        Returns:
+            ndarray:
+                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
+                The shape is scaled w.r.t the input image using the given `scale` argument.
+        """
+        canvas = self.canvas
+        s, (width, height) = canvas.print_to_buffer()
+        # buf = io.BytesIO()  # works for cairo backend
+        # canvas.print_rgba(buf)
+        # width, height = self.width, self.height
+        # s = buf.getvalue()
+
+        buffer = np.frombuffer(s, dtype="uint8")
+
+        img_rgba = buffer.reshape(height, width, 4)
+        rgb, alpha = np.split(img_rgba, [3], axis=2)
+        return rgb.astype("uint8")
+
+
+class Visualizer:
+    """
+    Visualizer that draws data about detection/segmentation on images.
+
+    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
+    that draw primitive objects to images, as well as high-level wrappers like
+    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
+    that draw composite data in some pre-defined style.
+
+    Note that the exact visualization style for the high-level wrappers are subject to change.
+    Style such as color, opacity, label contents, visibility of labels, or even the visibility
+    of objects themselves (e.g. when the object is too small) may change according
+    to different heuristics, as long as the results still look visually reasonable.
+
+    To obtain a consistent style, you can implement custom drawing functions with the
+    abovementioned primitive methods instead. If you need more customized visualization
+    styles, you can process the data yourself following their format documented in
+    tutorials (:doc:`/tutorials/models`, :doc:`/tutorials/datasets`). This class does not
+    intend to satisfy everyone's preference on drawing styles.
+
+    This visualizer focuses on high rendering quality rather than performance. It is not
+    designed to be used for real-time applications.
+    """
+
+    # TODO implement a fast, rasterized version using OpenCV
+
+    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
+                the height and width of the image respectively. C is the number of
+                color channels. The image is required to be in RGB format since that
+                is a requirement of the Matplotlib library. The image is also expected
+                to be in the range [0, 255].
+            metadata (Metadata): dataset metadata (e.g. class names and colors)
+            instance_mode (ColorMode): defines one of the pre-defined style for drawing
+                instances on an image.
+        """
+        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
+        if metadata is None:
+            metadata = MetadataCatalog.get("__nonexist__")
+        self.metadata = metadata
+        self.output = VisImage(self.img, scale=scale)
+        self.cpu_device = torch.device("cpu")
+
+        # too small texts are useless, therefore clamp to 9
+        self._default_font_size = max(
+            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
+        )
+        self._default_font_size = 18
+        self._instance_mode = instance_mode
+        self.keypoint_threshold = _KEYPOINT_THRESHOLD
+
+        import matplotlib.colors as mcolors
+        css4_colors = mcolors.CSS4_COLORS
+        self.color_proposals = [list(mcolors.hex2color(color)) for color in css4_colors.values()]
+
+    def draw_instance_predictions(self, predictions):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes.tolist() if predictions.has("pred_classes") else None
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+
+        keep = (scores > 0.5).cpu()
+        boxes = boxes[keep]
+        scores = scores[keep]
+        classes = np.array(classes)
+        classes = classes[np.array(keep)]
+        labels = np.array(labels)
+        labels = labels[np.array(keep)]
+
+        if predictions.has("pred_masks"):
+            masks = np.asarray(predictions.pred_masks)
+            masks = masks[np.array(keep)]
+            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
+        else:
+            masks = None
+
+        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+        # if self.metadata.get("thing_colors"):
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
+            ]
+            alpha = 0.4
+        else:
+            colors = None
+            alpha = 0.4
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.reset_image(
+                self._create_grayscale_image(
+                    (predictions.pred_masks.any(dim=0) > 0).numpy()
+                    if predictions.has("pred_masks")
+                    else None
+                )
+            )
+            alpha = 0.3
+        
+        self.overlay_instances(
+            masks=masks,
+            boxes=boxes,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+        )
+        return self.output
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.7):
+        """
+        Draw semantic segmentation predictions/labels.
+
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = self.metadata.stuff_classes[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+    def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7):
+        """
+        Draw panoptic prediction annotations or results.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict] or None): Describe each segment in `panoptic_seg`.
+                If it is a ``list[dict]``, each dict contains keys "id", "category_id".
+                If None, category id of each pixel is computed by
+                ``pixel // metadata.label_divisor``.
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask()))
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = self.metadata.stuff_classes[category_idx].replace('-other','').replace('-merged','')
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        class_names = [name.replace('-other','').replace('-merged','') for name in self.metadata.thing_classes]
+        labels = _create_text_labels(
+            category_ids, scores, class_names, [x.get("iscrowd", 0) for x in sinfo]
+        )
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
+
+        return self.output
+
+    draw_panoptic_seg_predictions = draw_panoptic_seg  # backward compatibility
+
+    def draw_dataset_dict(self, dic):
+        """
+        Draw annotations/segmentaions in Detectron2 Dataset format.
+
+        Args:
+            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        annos = dic.get("annotations", None)
+        if annos:
+            if "segmentation" in annos[0]:
+                masks = [x["segmentation"] for x in annos]
+            else:
+                masks = None
+            if "keypoints" in annos[0]:
+                keypts = [x["keypoints"] for x in annos]
+                keypts = np.array(keypts).reshape(len(annos), -1, 3)
+            else:
+                keypts = None
+
+            boxes = [
+                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
+                if len(x["bbox"]) == 4
+                else x["bbox"]
+                for x in annos
+            ]
+
+            colors = None
+            category_ids = [x["category_id"] for x in annos]
+            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+                colors = [
+                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
+                    for c in category_ids
+                ]
+            names = self.metadata.get("thing_classes", None)
+            labels = _create_text_labels(
+                category_ids,
+                scores=None,
+                class_names=names,
+                is_crowd=[x.get("iscrowd", 0) for x in annos],
+            )
+            self.overlay_instances(
+                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
+            )
+
+        sem_seg = dic.get("sem_seg", None)
+        if sem_seg is None and "sem_seg_file_name" in dic:
+            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
+                sem_seg = Image.open(f)
+                sem_seg = np.asarray(sem_seg, dtype="uint8")
+        if sem_seg is not None:
+            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.4)
+
+        pan_seg = dic.get("pan_seg", None)
+        if pan_seg is None and "pan_seg_file_name" in dic:
+            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
+                pan_seg = Image.open(f)
+                pan_seg = np.asarray(pan_seg)
+                from panopticapi.utils import rgb2id
+
+                pan_seg = rgb2id(pan_seg)
+        if pan_seg is not None:
+            segments_info = dic["segments_info"]
+            pan_seg = torch.tensor(pan_seg)
+            self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.7)
+        return self.output
+
+    def overlay_instances(
+        self,
+        *,
+        boxes=None,
+        labels=None,
+        masks=None,
+        keypoints=None,
+        assigned_colors=None,
+        alpha=0.5,
+    ):
+        """
+        Args:
+            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
+                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
+                or a :class:`RotatedBoxes`,
+                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image,
+            labels (list[str]): the text to be displayed for each instance.
+            masks (masks-like object): Supported types are:
+
+                * :class:`detectron2.structures.PolygonMasks`,
+                  :class:`detectron2.structures.BitMasks`.
+                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
+                  The first level of the list corresponds to individual instances. The second
+                  level to all the polygon that compose the instance, and the third level
+                  to the polygon coordinates. The third level should have the format of
+                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
+                * list[dict]: each dict is a COCO-style RLE.
+            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
+                where the N is the number of instances and K is the number of keypoints.
+                The last dimension corresponds to (x, y, visibility or score).
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = 0
+        if boxes is not None:
+            boxes = self._convert_boxes(boxes)
+            num_instances = len(boxes)
+        if masks is not None:
+            masks = self._convert_masks(masks)
+            if num_instances:
+                assert len(masks) == num_instances
+            else:
+                num_instances = len(masks)
+        if keypoints is not None:
+            if num_instances:
+                assert len(keypoints) == num_instances
+            else:
+                num_instances = len(keypoints)
+            keypoints = self._convert_keypoints(keypoints)
+        if labels is not None:
+            assert len(labels) == num_instances
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+        if num_instances == 0:
+            return self.output
+        if boxes is not None and boxes.shape[1] == 5:
+            return self.overlay_rotated_instances(
+                boxes=boxes, labels=labels, assigned_colors=assigned_colors
+            )
+
+        # Display in largest to smallest order to reduce occlusion.
+        areas = None
+        if boxes is not None:
+            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
+        elif masks is not None:
+            areas = np.asarray([x.area() for x in masks])
+
+        if areas is not None:
+            sorted_idxs = np.argsort(-areas).tolist()
+            # Re-order overlapped instances in descending order.
+            boxes = boxes[sorted_idxs] if boxes is not None else None
+            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
+            assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
+            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
+
+        for i in range(num_instances):
+            color = assigned_colors[i]
+            if boxes is not None:
+                self.draw_box(boxes[i], edge_color=color)
+
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
+
+            if labels is not None:
+                # first get a box
+                if boxes is not None:
+                    x0, y0, x1, y1 = boxes[i]
+                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
+                    horiz_align = "left"
+                elif masks is not None:
+                    # skip small mask without polygon
+                    if len(masks[i].polygons) == 0:
+                        continue
+
+                    x0, y0, x1, y1 = masks[i].bbox()
+
+                    # draw text in the center (defined by median) when box is not drawn
+                    # median is less sensitive to outliers.
+                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
+                    horiz_align = "center"
+                else:
+                    continue  # drawing the box confidence for keypoints isn't very useful.
+                # for small objects, draw text at the side to avoid occlusion
+                instance_area = (y1 - y0) * (x1 - x0)
+                if (
+                    instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
+                    or y1 - y0 < 40 * self.output.scale
+                ):
+                    if y1 >= self.output.height - 5:
+                        text_pos = (x1, y0)
+                    else:
+                        text_pos = (x0, y1)
+
+                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
+                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+                font_size = (
+                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+                    * 0.5
+                    * self._default_font_size
+                )
+                self.draw_text(
+                    labels[i],
+                    text_pos,
+                    color=lighter_color,
+                    horizontal_alignment=horiz_align,
+                    font_size=font_size,
+                )
+
+        # draw keypoints
+        if keypoints is not None:
+            for keypoints_per_instance in keypoints:
+                self.draw_and_connect_keypoints(keypoints_per_instance)
+
+        return self.output
+
+    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
+        """
+        Args:
+            boxes (ndarray): an Nx5 numpy array of
+                (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image.
+            labels (list[str]): the text to be displayed for each instance.
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = len(boxes)
+
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+        if num_instances == 0:
+            return self.output
+
+        # Display in largest to smallest order to reduce occlusion.
+        if boxes is not None:
+            areas = boxes[:, 2] * boxes[:, 3]
+
+        sorted_idxs = np.argsort(-areas).tolist()
+        # Re-order overlapped instances in descending order.
+        boxes = boxes[sorted_idxs]
+        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        colors = [assigned_colors[idx] for idx in sorted_idxs]
+
+        for i in range(num_instances):
+            self.draw_rotated_box_with_label(
+                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
+            )
+
+        return self.output
+
+    def draw_and_connect_keypoints(self, keypoints):
+        """
+        Draws keypoints of an instance and follows the rules for keypoint connections
+        to draw lines between appropriate keypoints. This follows color heuristics for
+        line color.
+
+        Args:
+            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
+                and the last dimension corresponds to (x, y, probability).
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        visible = {}
+        keypoint_names = self.metadata.get("keypoint_names")
+        for idx, keypoint in enumerate(keypoints):
+
+            # draw keypoint
+            x, y, prob = keypoint
+            if prob > self.keypoint_threshold:
+                self.draw_circle((x, y), color=_RED)
+                if keypoint_names:
+                    keypoint_name = keypoint_names[idx]
+                    visible[keypoint_name] = (x, y)
+
+        if self.metadata.get("keypoint_connection_rules"):
+            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
+                if kp0 in visible and kp1 in visible:
+                    x0, y0 = visible[kp0]
+                    x1, y1 = visible[kp1]
+                    color = tuple(x / 255.0 for x in color)
+                    self.draw_line([x0, x1], [y0, y1], color=color)
+
+        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
+        # Note that this strategy is specific to person keypoints.
+        # For other keypoints, it should just do nothing
+        try:
+            ls_x, ls_y = visible["left_shoulder"]
+            rs_x, rs_y = visible["right_shoulder"]
+            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
+        except KeyError:
+            pass
+        else:
+            # draw line from nose to mid-shoulder
+            nose_x, nose_y = visible.get("nose", (None, None))
+            if nose_x is not None:
+                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
+
+            try:
+                # draw line from mid-shoulder to mid-hip
+                lh_x, lh_y = visible["left_hip"]
+                rh_x, rh_y = visible["right_hip"]
+            except KeyError:
+                pass
+            else:
+                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
+                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
+        return self.output
+
+    """
+    Primitive drawing functions:
+    """
+
+    def draw_text(
+        self,
+        text,
+        position,
+        *,
+        font_size=None,
+        color="g",
+        horizontal_alignment="center",
+        rotation=0,
+    ):
+        """
+        Args:
+            text (str): class label
+            position (tuple): a tuple of the x and y coordinates to place text on image.
+            font_size (int, optional): font of the text. If not provided, a font size
+                proportional to the image width is calculated and used.
+            color: color of the text. Refer to `matplotlib.colors` for full list
+                of formats that are accepted.
+            horizontal_alignment (str): see `matplotlib.text.Text`
+            rotation: rotation angle in degrees CCW
+
+        Returns:
+            output (VisImage): image object with text drawn.
+        """
+        if not font_size:
+            font_size = self._default_font_size
+
+        # since the text background is dark, we don't want the text to be dark
+        color = np.maximum(list(mplc.to_rgb(color)), 0.15)
+        color[np.argmax(color)] = max(0.8, np.max(color))
+
+        def contrasting_color(rgb):
+            """Returns 'white' or 'black' depending on which color contrasts more with the given RGB value."""
+            
+            # Decompose the RGB tuple
+            R, G, B = rgb
+
+            # Calculate the Y value
+            Y = 0.299 * R + 0.587 * G + 0.114 * B
+
+            # If Y value is greater than 128, it's closer to white so return black. Otherwise, return white.
+            return 'black' if Y > 128 else 'white'
+
+        bbox_background = contrasting_color(color*255)
+
+        x, y = position
+        self.output.ax.text(
+            x,
+            y,
+            text,
+            size=font_size * self.output.scale,
+            family="sans-serif",
+            bbox={"facecolor": bbox_background, "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
+            verticalalignment="top",
+            horizontalalignment=horizontal_alignment,
+            color=color,
+            zorder=10,
+            rotation=rotation,
+        )
+        return self.output
+
+    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
+        """
+        Args:
+            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
+                are the coordinates of the image's top left corner. x1 and y1 are the
+                coordinates of the image's bottom right corner.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+
+        linewidth = max(self._default_font_size / 12, 1)
+
+        self.output.ax.add_patch(
+            mpl.patches.Rectangle(
+                (x0, y0),
+                width,
+                height,
+                fill=False,
+                edgecolor=edge_color,
+                linewidth=linewidth * self.output.scale,
+                alpha=alpha,
+                linestyle=line_style,
+            )
+        )
+        return self.output
+
+    def draw_rotated_box_with_label(
+        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
+    ):
+        """
+        Draw a rotated box with label on its top-left corner.
+
+        Args:
+            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
+                where cnt_x and cnt_y are the center coordinates of the box.
+                w and h are the width and height of the box. angle represents how
+                many degrees the box is rotated CCW with regard to the 0-degree box.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+            label (string): label for rotated box. It will not be rendered when set to None.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        cnt_x, cnt_y, w, h, angle = rotated_box
+        area = w * h
+        # use thinner lines when the box is small
+        linewidth = self._default_font_size / (
+            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
+        )
+
+        theta = angle * math.pi / 180.0
+        c = math.cos(theta)
+        s = math.sin(theta)
+        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
+        # x: left->right ; y: top->down
+        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
+        for k in range(4):
+            j = (k + 1) % 4
+            self.draw_line(
+                [rotated_rect[k][0], rotated_rect[j][0]],
+                [rotated_rect[k][1], rotated_rect[j][1]],
+                color=edge_color,
+                linestyle="--" if k == 1 else line_style,
+                linewidth=linewidth,
+            )
+
+        if label is not None:
+            text_pos = rotated_rect[1]  # topleft corner
+
+            height_ratio = h / np.sqrt(self.output.height * self.output.width)
+            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
+            font_size = (
+                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
+            )
+            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
+
+        return self.output
+
+    def draw_circle(self, circle_coord, color, radius=3):
+        """
+        Args:
+            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
+                of the center of the circle.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            radius (int): radius of the circle.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x, y = circle_coord
+        self.output.ax.add_patch(
+            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
+        )
+        return self.output
+
+    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
+        """
+        Args:
+            x_data (list[int]): a list containing x values of all the points being drawn.
+                Length of list should match the length of y_data.
+            y_data (list[int]): a list containing y values of all the points being drawn.
+                Length of list should match the length of x_data.
+            color: color of the line. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
+                for a full list of formats that are accepted.
+            linewidth (float or None): width of the line. When it's None,
+                a default value will be computed and used.
+
+        Returns:
+            output (VisImage): image object with line drawn.
+        """
+        if linewidth is None:
+            linewidth = self._default_font_size / 3
+        linewidth = max(linewidth, 1)
+        self.output.ax.add_line(
+            mpl.lines.Line2D(
+                x_data,
+                y_data,
+                linewidth=linewidth * self.output.scale,
+                color=color,
+                linestyle=linestyle,
+            )
+        )
+        return self.output
+
+    def draw_binary_mask(
+        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.7, area_threshold=10
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn on the object
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component smaller than this area will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = False
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+
+        if not mask.has_holes:
+            # draw polygons for regular masks
+            for segment in mask.polygons:
+                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
+                if area < (area_threshold or 0):
+                    continue
+                has_valid_segment = True
+                segment = segment.reshape(-1, 2)
+                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
+        else:
+            # TODO: Use Path/PathPatch to draw vector graphics:
+            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+            rgba = np.zeros(shape2d + (4,), dtype="float32")
+            rgba[:, :, :3] = color
+            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+            has_valid_segment = True
+            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None and has_valid_segment:
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            self._draw_text_in_mask(binary_mask, text, lighter_color)
+        return self.output
+    
+    def draw_binary_mask_with_number(
+        self, binary_mask, color=None, *, edge_color=None, text=None, label_mode='1', alpha=0.1, anno_mode=['Mask'], area_threshold=10
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn on the object
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component smaller than this area will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            randint = random.randint(0, len(self.color_proposals)-1)
+            color = self.color_proposals[randint]
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = True
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+        bbox = mask.bbox()
+
+        if 'Mask' in anno_mode:
+            if not mask.has_holes:
+                # draw polygons for regular masks
+                for segment in mask.polygons:
+                    area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
+                    if area < (area_threshold or 0):
+                        continue
+                    has_valid_segment = True
+                    segment = segment.reshape(-1, 2)
+                    self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
+            else:
+                # TODO: Use Path/PathPatch to draw vector graphics:
+                # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+                rgba = np.zeros(shape2d + (4,), dtype="float32")
+                rgba[:, :, :3] = color
+                rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+                has_valid_segment = True
+                self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if 'Box' in anno_mode:
+            self.draw_box(bbox, edge_color=color, alpha=0.75)
+
+        if 'Mark' in anno_mode:
+            has_valid_segment = True
+        else:
+            has_valid_segment = False
+
+        if text is not None and has_valid_segment:
+            # lighter_color = tuple([x*0.2 for x in color])
+            lighter_color = [1,1,1] # self._change_color_brightness(color, brightness_factor=0.7)
+            self._draw_number_in_mask(binary_mask, text, lighter_color, label_mode)
+        return self.output
+
+    def draw_soft_mask(self, soft_mask, color=None, *, text=None, alpha=0.5):
+        """
+        Args:
+            soft_mask (ndarray): float array of shape (H, W), each value in [0, 1].
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            text (str): if None, will be drawn on the object
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        shape2d = (soft_mask.shape[0], soft_mask.shape[1])
+        rgba = np.zeros(shape2d + (4,), dtype="float32")
+        rgba[:, :, :3] = color
+        rgba[:, :, 3] = soft_mask * alpha
+        self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None:
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            binary_mask = (soft_mask > 0.5).astype("uint8")
+            self._draw_text_in_mask(binary_mask, text, lighter_color)
+        return self.output
+
+    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
+        """
+        Args:
+            segment: numpy array of shape Nx2, containing all the points in the polygon.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted. If not provided, a darker shade
+                of the polygon color will be used instead.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with polygon drawn.
+        """
+        if edge_color is None:
+            # make edge color darker than the polygon color
+            if alpha > 0.8:
+                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
+            else:
+                edge_color = color
+        edge_color = mplc.to_rgb(edge_color) + (1,)
+
+        polygon = mpl.patches.Polygon(
+            segment,
+            fill=True,
+            facecolor=mplc.to_rgb(color) + (alpha,),
+            edgecolor=edge_color,
+            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
+        )
+        self.output.ax.add_patch(polygon)
+        return self.output
+
+    """
+    Internal methods:
+    """
+
+    def _jitter(self, color):
+        """
+        Randomly modifies given color to produce a slightly different color than the color given.
+
+        Args:
+            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
+                picked. The values in the list are in the [0.0, 1.0] range.
+
+        Returns:
+            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
+                color after being jittered. The values in the list are in the [0.0, 1.0] range.
+        """
+        color = mplc.to_rgb(color)
+        # np.random.seed(0)
+        vec = np.random.rand(3)
+        # better to do it in another color space
+        vec = vec / np.linalg.norm(vec) * 0.5
+        res = np.clip(vec + color, 0, 1)
+        return tuple(res)
+
+    def _create_grayscale_image(self, mask=None):
+        """
+        Create a grayscale version of the original image.
+        The colors in masked area, if given, will be kept.
+        """
+        img_bw = self.img.astype("f4").mean(axis=2)
+        img_bw = np.stack([img_bw] * 3, axis=2)
+        if mask is not None:
+            img_bw[mask] = self.img[mask]
+        return img_bw
+
+    def _change_color_brightness(self, color, brightness_factor):
+        """
+        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+        less or more saturation than the original color.
+
+        Args:
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+                0 will correspond to no change, a factor in [-1.0, 0) range will result in
+                a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+        Returns:
+            modified_color (tuple[double]): a tuple containing the RGB values of the
+                modified color. Each value in the tuple is in the [0.0, 1.0] range.
+        """
+        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+        color = mplc.to_rgb(color)
+        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
+        return modified_color
+
+    def _convert_boxes(self, boxes):
+        """
+        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
+        """
+        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+            return boxes.tensor.detach().numpy()
+        else:
+            return np.asarray(boxes)
+
+    def _convert_masks(self, masks_or_polygons):
+        """
+        Convert different format of masks or polygons to a tuple of masks and polygons.
+
+        Returns:
+            list[GenericMask]:
+        """
+
+        m = masks_or_polygons
+        if isinstance(m, PolygonMasks):
+            m = m.polygons
+        if isinstance(m, BitMasks):
+            m = m.tensor.numpy()
+        if isinstance(m, torch.Tensor):
+            m = m.numpy()
+        ret = []
+        for x in m:
+            if isinstance(x, GenericMask):
+                ret.append(x)
+            else:
+                ret.append(GenericMask(x, self.output.height, self.output.width))
+        return ret
+
+    def _draw_number_in_mask(self, binary_mask, text, color, label_mode='1'):
+        """
+        Find proper places to draw text given a binary mask.
+        """
+
+        def number_to_string(n):
+            chars = []
+            while n:
+                n, remainder = divmod(n-1, 26)
+                chars.append(chr(97 + remainder))
+            return ''.join(reversed(chars))
+
+        binary_mask = np.pad(binary_mask, ((1, 1), (1, 1)), 'constant')
+        mask_dt = cv2.distanceTransform(binary_mask, cv2.DIST_L2, 0)
+        mask_dt = mask_dt[1:-1, 1:-1]
+        max_dist = np.max(mask_dt)
+        coords_y, coords_x = np.where(mask_dt == max_dist)  # coords is [y, x]
+
+        if label_mode == 'a':
+            text = number_to_string(int(text))
+        else:
+            text = text
+
+        self.draw_text(text, (coords_x[len(coords_x)//2] + 2, coords_y[len(coords_y)//2] - 6), color=color)
+
+        # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+        # _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+        # if stats[1:, -1].size == 0:
+        #     return
+        # largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+        # # draw text on the largest component, as well as other very large components.
+        # for cid in range(1, _num_cc):
+        #     if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+        #         # median is more stable than centroid
+        #         # center = centroids[largest_component_id]
+        #         center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+        #         # bottom=np.max((cc_labels == cid).nonzero(), axis=1)[::-1]
+        #         # center[1]=bottom[1]+2
+        #         self.draw_text(text, center, color=color)
+    
+    def _draw_text_in_mask(self, binary_mask, text, color):
+        """
+        Find proper places to draw text given a binary mask.
+        """
+        # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+        _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+        if stats[1:, -1].size == 0:
+            return
+        largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+        # draw text on the largest component, as well as other very large components.
+        for cid in range(1, _num_cc):
+            if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                # median is more stable than centroid
+                # center = centroids[largest_component_id]
+                center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                bottom=np.max((cc_labels == cid).nonzero(), axis=1)[::-1]
+                center[1]=bottom[1]+2
+                self.draw_text(text, center, color=color)
+
+    def _convert_keypoints(self, keypoints):
+        if isinstance(keypoints, Keypoints):
+            keypoints = keypoints.tensor
+        keypoints = np.asarray(keypoints)
+        return keypoints
+
+    def get_output(self):
+        """
+        Returns:
+            output (VisImage): the image output containing the visualizations added
+            to the image.
+        """
+        return self.output
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index f2c4cc3..97019b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -32,3 +32,4 @@ librosa
 pymupdf
 chardet
 playwright
+backoff

From f88331416cec04d84dea72f9945510834e89ca9b Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 20 Jan 2024 18:55:21 +0800
Subject: [PATCH 07/13] Refactor baselines code implementations

---
 ...nt_pure_text.py => experiment_a11y_tree.py |  20 ++-
 experiment_screenshot.py                      |  16 +-
 .../heuristic_retrieve.py                     |  36 ++--
 mm_agents/gemini_pro_agent.py                 |   2 +
 mm_agents/gemini_pro_vision_agent.py          |   2 +
 mm_agents/gpt_4v_agent.py                     | 167 ++++++++++++++----
 mm_agents/prompts.py                          |  26 +--
 7 files changed, 204 insertions(+), 65 deletions(-)
 rename experiment_pure_text.py => experiment_a11y_tree.py (88%)

diff --git a/experiment_pure_text.py b/experiment_a11y_tree.py
similarity index 88%
rename from experiment_pure_text.py
rename to experiment_a11y_tree.py
index 4fd19b1..728d0de 100644
--- a/experiment_pure_text.py
+++ b/experiment_a11y_tree.py
@@ -5,8 +5,7 @@ import os
 import sys
 
 from desktop_env.envs.desktop_env import DesktopEnv
-from mm_agents.gpt_4_agent import GPT4_Agent
-from mm_agents.gemini_pro_agent import GeminiPro_Agent
+from mm_agents.gpt_4v_agent import GPT4v_Agent
 
 #  Logger Configs {{{ # 
 logger = logging.getLogger()
@@ -113,24 +112,29 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
 if __name__ == "__main__":
     action_space = "pyautogui"
     example_class = "chrome"
-    example_id = "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263"
-    gpt4_model = "gpt-4-1106-preview"
+    example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
+    gpt4_model = "gpt-4-vision-preview"
     gemini_model = "gemini-pro-vision"
 
+    logger.info("Running example %s/%s", example_class, example_id)
+    logger.info("Using model %s", gpt4_model)
+    # logger.info("Using model %s", gemini_model)
+
     with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
         example = json.load(f)
     example["snapshot"] = "exp_setup4"
 
     api_key = os.environ.get("OPENAI_API_KEY")
-    agent = GPT4_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], action_space=action_space)
+    agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
+                        action_space=action_space, exp="a11y_tree")
 
     # api_key = os.environ.get("GENAI_API_KEY")
-    # agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space)
+    # agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space, exp="a11y_tree")
 
     root_trajectory_dir = "exp_trajectory"
 
-    example_trajectory_dir = os.path.join(root_trajectory_dir, "text", example_class, gpt4_model, example_id)
-    # example_trajectory_dir = os.path.join(root_trajectory_dir, "text", example_class, gemini_model, example_id)
+    example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gpt4_model, example_id)
+    # example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gemini_model, example_id)
 
     os.makedirs(example_trajectory_dir, exist_ok=True)
 
diff --git a/experiment_screenshot.py b/experiment_screenshot.py
index 8e7f8b5..6d82730 100644
--- a/experiment_screenshot.py
+++ b/experiment_screenshot.py
@@ -113,20 +113,28 @@ if __name__ == "__main__":
     action_space = "pyautogui"
     example_class = "thunderbird"
     example_id = "bb5e4c0d-f964-439c-97b6-bdb9747de3f4"
+    gpt4_model = "gpt-4-vision-preview"
+    gemini_model = "gemini-pro-vision"
+
+    logger.info("Running example %s/%s", example_class, example_id)
+    logger.info("Using model %s", gpt4_model)
+    # logger.info("Using model %s", gemini_model)
 
     with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
         example = json.load(f)
     example["snapshot"] = "exp_setup2"
 
     # api_key = os.environ.get("OPENAI_API_KEY")
-    # agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space)
+    # agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
 
     api_key = os.environ.get("GENAI_API_KEY")
-    agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space)
+    agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
 
     root_trajectory_dir = "exp_trajectory"
 
-    example_trajectory_dir = os.path.join(root_trajectory_dir, example_class, example_id)
+    example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gpt4_model, example_id)
+    # example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gemini_model, example_id)
+
     os.makedirs(example_trajectory_dir, exist_ok=True)
 
-    run_one_example(example, agent, 10, example_trajectory_dir)
+    run_one_example(example, agent, 15, example_trajectory_dir)
diff --git a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
index c59060c..47bbca0 100644
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -60,19 +60,19 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
     image = Image.open(image_file_path)
     draw = ImageDraw.Draw(image)
     marks = []
+    drew_nodes = []
 
-    # todo: change the image tagger to align with SoM paper
-
-    # Optional: Load a font. If you don't specify a font, a default one will be used.
     try:
         # Adjust the path to the font file you have or use a default one
-        font = ImageFont.truetype("arial.ttf", 20)
+        font = ImageFont.truetype("arial.ttf", 15)
     except IOError:
         # Fallback to a basic font if the specified font can't be loaded
         font = ImageFont.load_default()
 
+    index = 1
+
     # Loop over all the visible nodes and draw their bounding boxes
-    for index, _node in enumerate(nodes):
+    for _node in nodes:
         coords_str = _node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord')
         size_str = _node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size')
 
@@ -93,22 +93,30 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
                 if bottom_right[0] < coords[0] or bottom_right[1] < coords[1]:
                     raise ValueError(f"Invalid coordinates or size, coords: {coords}, size: {size}")
 
-                # Draw rectangle on image
-                draw.rectangle([coords, bottom_right], outline="red", width=2)
+                # Check if the area only contains one color
+                cropped_image = image.crop((*coords, *bottom_right))
+                if len(set(list(cropped_image.getdata()))) == 1:
+                    continue
 
-                # Draw index number at the bottom left of the bounding box
+                # Draw rectangle on image
+                draw.rectangle([coords, bottom_right], outline="red", width=1)
+
+                # Draw index number at the bottom left of the bounding box with black background
                 text_position = (coords[0], bottom_right[1])  # Adjust Y to be above the bottom right
-                draw.text(text_position, str(index), font=font, fill="purple")
+                draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
+                draw.text(text_position, str(index), font=font, fill="white")
+                index += 1
 
                 # each mark is an x, y, w, h tuple
                 marks.append([coords[0], coords[1], size[0], size[1]])
+                drew_nodes.append(_node)
 
             except ValueError as e:
                 pass
 
     # Save the result
     image.save(output_image_file_path)
-    return marks
+    return marks, drew_nodes
 
 
 def print_nodes_with_indent(nodes, indent=0):
@@ -120,6 +128,10 @@ def print_nodes_with_indent(nodes, indent=0):
 if __name__ == '__main__':
     with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
         xml_file_str = f.read()
+    filtered_nodes = filter_nodes(find_leaf_nodes(xml_file_str))
+    print(len(filtered_nodes))
+    masks = draw_bounding_boxes(filtered_nodes, 'screenshot.png',
+                                'chrome_desktop_example_1_tagged_remove.png', )
 
-    nodes = ET.fromstring(xml_file_str)
-    print_nodes_with_indent(nodes)
+    # print(masks)
+    print(len(masks))
diff --git a/mm_agents/gemini_pro_agent.py b/mm_agents/gemini_pro_agent.py
index 26f9c0e..ce84488 100644
--- a/mm_agents/gemini_pro_agent.py
+++ b/mm_agents/gemini_pro_agent.py
@@ -1,3 +1,5 @@
+# todo: needs to be refactored
+
 import time
 from typing import Dict, List
 
diff --git a/mm_agents/gemini_pro_vision_agent.py b/mm_agents/gemini_pro_vision_agent.py
index 2d5d365..4a537db 100644
--- a/mm_agents/gemini_pro_vision_agent.py
+++ b/mm_agents/gemini_pro_vision_agent.py
@@ -1,3 +1,5 @@
+# todo: needs to be refactored
+
 import time
 from typing import Dict, List
 
diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py
index 6e2000c..896ff7e 100644
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -2,7 +2,6 @@ import base64
 import json
 import os
 import re
-import time
 import uuid
 from typing import Dict, List
 
@@ -54,9 +53,9 @@ def tag_screenshot(screenshot, accessibility_tree):
     tagged_screenshot_file_path = os.path.join("tmp/images", uuid_str + ".png")
     nodes = filter_nodes(find_leaf_nodes(accessibility_tree))
     # Make tag screenshot
-    marks = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)
+    marks, drew_nodes = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)
 
-    return marks, tagged_screenshot_file_path
+    return marks, drew_nodes, tagged_screenshot_file_path
 
 
 def parse_actions_from_string(input_string):
@@ -123,11 +122,18 @@ def parse_code_from_string(input_string):
 
 
 def parse_code_from_som_string(input_string, masks):
+    # parse the output string by masks
+    mappings = []
     for i, mask in enumerate(masks):
         x, y, w, h = mask
-        input_string = input_string.replace("tag#" + str(i), "{}, {}".format(int(x + w // 2), int(y + h // 2)))
+        mappings.append(("tag#" + str(i + 1), "{}, {}".format(int(x + w // 2), int(y + h // 2))))
 
-    return parse_code_from_string(input_string)
+    # reverse the mappings
+    for mapping in mappings[::-1]:
+        input_string = input_string.replace(mapping[0], mapping[1])
+
+    actions = parse_code_from_string(input_string)
+    return actions
 
 
 class GPT4v_Agent:
@@ -136,7 +142,7 @@ class GPT4v_Agent:
             api_key,
             instruction,
             model="gpt-4-vision-preview",
-            max_tokens=300,
+            max_tokens=500,
             action_space="computer_13",
             exp="screenshot_a11y_tree"
             # exp can be in ["screenshot", "a11y_tree", "screenshot_a11y_tree", "som", "seeact"]
@@ -147,6 +153,7 @@ class GPT4v_Agent:
         self.max_tokens = max_tokens
         self.action_space = action_space
         self.exp = exp
+        self.max_trajectory_length = 3
 
         self.headers = {
             "Content-Type": "application/json",
@@ -194,8 +201,8 @@ class GPT4v_Agent:
         else:
             raise ValueError("Invalid experiment type: " + exp)
 
-        self.system_message = (self.system_message +
-                               "\nHere is the instruction for the task: {}".format(self.instruction))
+        self.system_message = self.system_message + "\nYou are asked to complete the following task: {}".format(
+            self.instruction)
 
     def predict(self, obs: Dict) -> List:
         """
@@ -204,28 +211,111 @@ class GPT4v_Agent:
 
         # Prepare the payload for the API call
         messages = []
-
-        if len(self.actions) > 0:
-            system_message = self.system_message + "\nHere are the actions you have done so far:\n" + "\n->\n".join(
-                self.actions)
-        else:
-            system_message = self.system_message
+        masks = None
 
         messages.append({
             "role": "system",
             "content": [
                 {
                     "type": "text",
-                    "text": system_message
+                    "text": self.system_message
                 },
             ]
         })
 
-        masks = None
+        # Append trajectory
+        assert len(self.observations) == len(self.actions), "The number of observations and actions should be the same."
+
+        if len(self.observations) > self.max_trajectory_length:
+            _observations = self.observations[-self.max_trajectory_length:]
+            _actions = self.actions[-self.max_trajectory_length:]
+        else:
+            _observations = self.observations
+            _actions = self.actions
+
+        for previous_obs, previous_action in zip(_observations, _actions):
+
+            if self.exp in ["both", "som", "seeact"]:
+                _screenshot = previous_obs["screenshot"]
+                _linearized_accessibility_tree = previous_obs["accessibility_tree"]
+
+                messages.append({
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Given the info from the tagged screenshot as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                                _linearized_accessibility_tree)
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{_screenshot}",
+                                "detail": "high"
+                            }
+                        }
+                    ]
+                })
+            elif self.exp == "screenshot":
+                _screenshot = previous_obs["screenshot"]
+
+                messages.append({
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Given the screenshot as below. What's the next step that you will do to help with the task?"
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{_screenshot}",
+                                "detail": "high"
+                            }
+                        }
+                    ]
+                })
+            elif self.exp == "a11y_tree":
+                _linearized_accessibility_tree = previous_obs["accessibility_tree"]
+
+                messages.append({
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Given the info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                                _linearized_accessibility_tree)
+                        }
+                    ]
+                })
+            else:
+                raise ValueError("Invalid experiment type: " + self.exp)
+
+            messages.append({
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "\n".join(previous_action) if len(previous_action) > 0 else "No valid action"
+                    },
+                ]
+            })
 
         if self.exp in ["screenshot", "both"]:
             base64_image = encode_image(obs["screenshot"])
             linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+
+            if self.exp == "both":
+                self.observations.append({
+                    "screenshot": base64_image,
+                    "accessibility_tree": linearized_accessibility_tree
+                })
+            else:
+                self.observations.append({
+                    "screenshot": base64_image,
+                    "accessibility_tree": None
+                })
+
             messages.append({
                 "role": "user",
                 "content": [
@@ -247,6 +337,12 @@ class GPT4v_Agent:
             })
         elif self.exp == "a11y_tree":
             linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
+
+            self.observations.append({
+                "screenshot": None,
+                "accessibility_tree": linearized_accessibility_tree
+            })
+
             messages.append({
                 "role": "user",
                 "content": [
@@ -259,11 +355,15 @@ class GPT4v_Agent:
             })
         elif self.exp == "som":
             # Add som to the screenshot
-            masks, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
-
+            masks, drew_nodes, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
             base64_image = encode_image(tagged_screenshot)
             linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
 
+            self.observations.append({
+                "screenshot": base64_image,
+                "accessibility_tree": linearized_accessibility_tree
+            })
+
             messages.append({
                 "role": "user",
                 "content": [
@@ -288,6 +388,11 @@ class GPT4v_Agent:
             base64_image = encode_image(tagged_screenshot)
             linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
 
+            self.observations.append({
+                "screenshot": base64_image,
+                "accessibility_tree": linearized_accessibility_tree
+            })
+
             messages.append({
                 "role": "user",
                 "content": [
@@ -307,6 +412,9 @@ class GPT4v_Agent:
         else:
             raise ValueError("Invalid experiment type: " + self.exp)
 
+        with open("messages.json", "w") as f:
+            f.write(json.dumps(messages, indent=4))
+
         response = self.call_llm({
             "model": self.model,
             "messages": messages,
@@ -354,20 +462,17 @@ class GPT4v_Agent:
         (APIError, RateLimitError, APIConnectionError, ServiceUnavailableError, InvalidRequestError),
     )
     def call_llm(self, payload):
-        while True:
-            try:
-                response = requests.post(
-                    "https://api.openai.com/v1/chat/completions",
-                    headers=self.headers,
-                    json=payload
-                )
-                break
-            except:
-                print("Failed to generate response, retrying...")
-                time.sleep(5)
-                pass
+        response = requests.post(
+            "https://api.openai.com/v1/chat/completions",
+            headers=self.headers,
+            json=payload
+        )
 
-        return response.json()['choices'][0]['message']['content']
+        if response.status_code != 200:
+            print("Failed to call LLM: " + response.text)
+            return ""
+        else:
+            return response.json()['choices'][0]['message']['content']
 
     def parse_actions(self, response: str, masks=None):
 
diff --git a/mm_agents/prompts.py b/mm_agents/prompts.py
index dcc9a85..90ce22f 100644
--- a/mm_agents/prompts.py
+++ b/mm_agents/prompts.py
@@ -3,7 +3,7 @@ You are an agent which follow my instruction and perform desktop computer tasks
 You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
 For each step, you will get an observation of an image, which is the screenshot of the computer screen and you will predict the action of the computer based on the image.
 
-You are required to use `pyautogui` to perform the action. 
+You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
 Return one line or multiple lines of python code to perform the action each time, be time efficient.
 You ONLY need to return the code inside a code block, like this:
 ```python
@@ -14,7 +14,7 @@ When you think you have to wait for some time, return ```WAIT```;
 When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
 When you think the task is done, return ```DONE```.
 
-First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
 """.strip()
 
 SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION = """
@@ -267,7 +267,7 @@ You are an agent which follow my instruction and perform desktop computer tasks
 You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
 For each step, you will get an observation of the desktop by accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
 
-You are required to use `pyautogui` to perform the action. 
+You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
 Return one line or multiple lines of python code to perform the action each time, be time efficient.
 You ONLY need to return the code inside a code block, like this:
 ```python
@@ -278,7 +278,7 @@ When you think you have to wait for some time, return ```WAIT```;
 When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
 When you think the task is done, return ```DONE```.
 
-First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
 """.strip()
 
 SYS_PROMPT_IN_A11Y_OUT_ACTION = """
@@ -532,7 +532,7 @@ You have good knowledge of computer and good internet connection and assume your
 For each step, you will get an observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library. 
 And you will predict the action of the computer based on the screenshot and accessibility tree.
 
-You are required to use `pyautogui` to perform the action. 
+You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
 Return one line or multiple lines of python code to perform the action each time, be time efficient.
 You ONLY need to return the code inside a code block, like this:
 ```python
@@ -543,7 +543,7 @@ When you think you have to wait for some time, return ```WAIT```;
 When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
 When you think the task is done, return ```DONE```.
 
-First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
 """.strip()
 
 SYS_PROMPT_IN_BOTH_OUT_ACTION = """
@@ -797,12 +797,15 @@ You are an agent which follow my instruction and perform desktop computer tasks
 You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
 For each step, you will get an observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library. 
 
-You are required to use `pyautogui` to perform the action. But replace x, y in the code with the tag of the element you want to operate with. such as:
+You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
+You can replace x, y in the code with the tag of the element you want to operate with. such as:
 ```python
 pyautogui.moveTo(tag#3)
 pyautogui.click(tag#2)
 pyautogui.dragTo(tag#1, button='left')
 ```
+When you think you can directly output precise x and y coordinates or there is no tag on which you want to interact, you can also use them directly. 
+But you should be careful to ensure that the coordinates are correct.
 Return one line or multiple lines of python code to perform the action each time, be time efficient.
 You ONLY need to return the code inside a code block, like this:
 ```python
@@ -813,7 +816,7 @@ When you think you have to wait for some time, return ```WAIT```;
 When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
 When you think the task is done, return ```DONE```.
 
-First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
 """.strip()
 
 SYS_PROMPT_SEEACT = """
@@ -842,12 +845,15 @@ Then, based on your analysis, in conjunction with human desktop using habits and
 """
 
 ACTION_GROUNDING_PROMPT_SEEACT = """
-You are required to use `pyautogui` to perform the action. But replace x, y in the code with the tag of the element you want to operate with. such as:
+You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
+You can replace x, y in the code with the tag of the element you want to operate with. such as:
 ```python
 pyautogui.moveTo(tag#3)
 pyautogui.click(tag#2)
 pyautogui.dragTo(tag#1, button='left')
 ```
+When you think you can directly output precise x and y coordinates or there is no tag on which you want to interact, you can also use them directly. 
+But you should be careful to ensure that the coordinates are correct.
 Return one line or multiple lines of python code to perform the action each time, be time efficient.
 You ONLY need to return the code inside a code block, like this:
 ```python
@@ -858,5 +864,5 @@ When you think you have to wait for some time, return ```WAIT```;
 When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
 When you think the task is done, return ```DONE```.
 
-First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
+First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
 """

From 6f27c5bf50e9c881346a0834c39f56841a3ef057 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 20 Jan 2024 19:19:37 +0800
Subject: [PATCH 08/13] Wrap up SeeAct implementation

---
 experiment_screenshot_a11y_tree.py |  139 +++
 experiment_screenshot_seeact.py    |  135 +++
 experiment_screenshot_som.py       |  135 +++
 mm_agents/gpt_4v_agent.py          |   33 +-
 mm_agents/visualizer.py            | 1405 ----------------------------
 5 files changed, 437 insertions(+), 1410 deletions(-)
 create mode 100644 experiment_screenshot_a11y_tree.py
 create mode 100644 experiment_screenshot_seeact.py
 create mode 100644 experiment_screenshot_som.py
 delete mode 100644 mm_agents/visualizer.py

diff --git a/experiment_screenshot_a11y_tree.py b/experiment_screenshot_a11y_tree.py
new file mode 100644
index 0000000..60c81b6
--- /dev/null
+++ b/experiment_screenshot_a11y_tree.py
@@ -0,0 +1,139 @@
+import datetime
+import json
+import logging
+import os
+import sys
+
+from desktop_env.envs.desktop_env import DesktopEnv
+from mm_agents.gpt_4v_agent import GPT4v_Agent
+
+#  Logger Configs {{{ # 
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(
+    fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+#  }}} Logger Configs # 
+
+logger = logging.getLogger("desktopenv.experiment")
+
+PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
+
+
+def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
+    trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
+    env = DesktopEnv(
+        path_to_vm=PATH_TO_VM,
+        action_space=agent.action_space,
+        task_config=example
+    )
+    # reset the environment to certain snapshot
+    observation = env.reset()
+    done = False
+    step_num = 0
+
+    if recording:
+        # send a request to the server to start recording
+        env.controller.start_recording()
+
+    while not done and step_num < max_steps:
+        actions = agent.predict(observation)
+        step_num += 1
+        for action in actions:
+            # Capture the timestamp before executing the action
+            action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+            logger.info("Step %d: %s", step_num, action)
+
+            observation, reward, done, info = env.step(action)
+
+            logger.info("Reward: %.2f", reward)
+            logger.info("Done: %s", done)
+            logger.info("Info: %s", info)
+
+            # Save screenshot and trajectory information
+            with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
+                with open(observation['screenshot'], "rb") as __f:
+                    screenshot = __f.read()
+                _f.write(screenshot)
+
+            with open(trajectory_recording_path, "a") as f:
+                f.write(json.dumps({
+                    "step_num": step_num,
+                    "action_timestamp": action_timestamp,
+                    "action": action,
+                    "reward": reward,
+                    "done": done,
+                    "info": info,
+                    "screenshot_file": f"step_{step_num}_{action_timestamp}.png"
+                }))
+                f.write("\n")
+
+            if done:
+                logger.info("The episode is done.")
+                break
+
+    if recording:
+        # send a request to the server to stop recording
+        env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+
+    result = env.evaluate()
+    logger.info("Result: %.2f", result)
+
+    # env.close()
+    logger.info("Environment closed.")
+
+
+if __name__ == "__main__":
+    action_space = "pyautogui"
+    example_class = "chrome"
+    example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
+    gpt4_model = "gpt-4-vision-preview"
+    gemini_model = "gemini-pro-vision"
+
+    logger.info("Running example %s/%s", example_class, example_id)
+    logger.info("Using model %s", gpt4_model)
+    # logger.info("Using model %s", gemini_model)
+
+    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
+        example = json.load(f)
+    example["snapshot"] = "exp_setup4"
+
+    api_key = os.environ.get("OPENAI_API_KEY")
+    agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
+                        action_space=action_space, exp="both")
+
+    # api_key = os.environ.get("GENAI_API_KEY")
+    # agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space, exp="both")
+
+    root_trajectory_dir = "exp_trajectory"
+
+    example_trajectory_dir = os.path.join(root_trajectory_dir, "both", example_class, gpt4_model, example_id)
+    # example_trajectory_dir = os.path.join(root_trajectory_dir, "both", example_class, gemini_model, example_id)
+
+    os.makedirs(example_trajectory_dir, exist_ok=True)
+
+    run_one_example(example, agent, 15, example_trajectory_dir)
diff --git a/experiment_screenshot_seeact.py b/experiment_screenshot_seeact.py
new file mode 100644
index 0000000..b718693
--- /dev/null
+++ b/experiment_screenshot_seeact.py
@@ -0,0 +1,135 @@
+import datetime
+import json
+import logging
+import os
+import sys
+
+from desktop_env.envs.desktop_env import DesktopEnv
+from mm_agents.gpt_4v_agent import GPT4v_Agent
+
+#  Logger Configs {{{ # 
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(
+    fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+#  }}} Logger Configs # 
+
+logger = logging.getLogger("desktopenv.experiment")
+
+PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
+
+
+def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
+    trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
+    env = DesktopEnv(
+        path_to_vm=PATH_TO_VM,
+        action_space=agent.action_space,
+        task_config=example
+    )
+    # reset the environment to certain snapshot
+    observation = env.reset()
+    done = False
+    step_num = 0
+
+    if recording:
+        # send a request to the server to start recording
+        env.controller.start_recording()
+
+    while not done and step_num < max_steps:
+        actions = agent.predict(observation)
+        step_num += 1
+        for action in actions:
+            # Capture the timestamp before executing the action
+            action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+            logger.info("Step %d: %s", step_num, action)
+
+            observation, reward, done, info = env.step(action)
+
+            logger.info("Reward: %.2f", reward)
+            logger.info("Done: %s", done)
+            logger.info("Info: %s", info)
+
+            # Save screenshot and trajectory information
+            with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
+                with open(observation['screenshot'], "rb") as __f:
+                    screenshot = __f.read()
+                _f.write(screenshot)
+
+            with open(trajectory_recording_path, "a") as f:
+                f.write(json.dumps({
+                    "step_num": step_num,
+                    "action_timestamp": action_timestamp,
+                    "action": action,
+                    "reward": reward,
+                    "done": done,
+                    "info": info,
+                    "screenshot_file": f"step_{step_num}_{action_timestamp}.png"
+                }))
+                f.write("\n")
+
+            if done:
+                logger.info("The episode is done.")
+                break
+
+    if recording:
+        # send a request to the server to stop recording
+        env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+
+    result = env.evaluate()
+    logger.info("Result: %.2f", result)
+
+    # env.close()
+    logger.info("Environment closed.")
+
+
+if __name__ == "__main__":
+    action_space = "pyautogui"
+    example_class = "chrome"
+    example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
+    gpt4_model = "gpt-4-vision-preview"
+    gemini_model = "gemini-pro-vision"
+
+    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
+        example = json.load(f)
+    example["snapshot"] = "exp_setup4"
+
+    api_key = os.environ.get("OPENAI_API_KEY")
+    agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
+                        action_space=action_space, exp="seeact")
+
+    # api_key = os.environ.get("GENAI_API_KEY")
+    # agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space)
+
+    root_trajectory_dir = "exp_trajectory"
+
+    example_trajectory_dir = os.path.join(root_trajectory_dir, "seeact", example_class, gpt4_model, example_id)
+    # example_trajectory_dir = os.path.join(root_trajectory_dir, "seeact", example_class, gemini_model, example_id)
+
+    os.makedirs(example_trajectory_dir, exist_ok=True)
+
+    run_one_example(example, agent, 15, example_trajectory_dir)
diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py
new file mode 100644
index 0000000..2a64bb3
--- /dev/null
+++ b/experiment_screenshot_som.py
@@ -0,0 +1,135 @@
+import datetime
+import json
+import logging
+import os
+import sys
+
+from desktop_env.envs.desktop_env import DesktopEnv
+from mm_agents.gpt_4v_agent import GPT4v_Agent
+
+#  Logger Configs {{{ # 
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(
+    fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+#  }}} Logger Configs # 
+
+logger = logging.getLogger("desktopenv.experiment")
+
+PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
+
+
+def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
+    trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
+    env = DesktopEnv(
+        path_to_vm=PATH_TO_VM,
+        action_space=agent.action_space,
+        task_config=example
+    )
+    # reset the environment to certain snapshot
+    observation = env.reset()
+    done = False
+    step_num = 0
+
+    if recording:
+        # send a request to the server to start recording
+        env.controller.start_recording()
+
+    while not done and step_num < max_steps:
+        actions = agent.predict(observation)
+        step_num += 1
+        for action in actions:
+            # Capture the timestamp before executing the action
+            action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+            logger.info("Step %d: %s", step_num, action)
+
+            observation, reward, done, info = env.step(action)
+
+            logger.info("Reward: %.2f", reward)
+            logger.info("Done: %s", done)
+            logger.info("Info: %s", info)
+
+            # Save screenshot and trajectory information
+            with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
+                with open(observation['screenshot'], "rb") as __f:
+                    screenshot = __f.read()
+                _f.write(screenshot)
+
+            with open(trajectory_recording_path, "a") as f:
+                f.write(json.dumps({
+                    "step_num": step_num,
+                    "action_timestamp": action_timestamp,
+                    "action": action,
+                    "reward": reward,
+                    "done": done,
+                    "info": info,
+                    "screenshot_file": f"step_{step_num}_{action_timestamp}.png"
+                }))
+                f.write("\n")
+
+            if done:
+                logger.info("The episode is done.")
+                break
+
+    if recording:
+        # send a request to the server to stop recording
+        env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
+
+    result = env.evaluate()
+    logger.info("Result: %.2f", result)
+
+    # env.close()
+    logger.info("Environment closed.")
+
+
+if __name__ == "__main__":
+    action_space = "pyautogui"
+    example_class = "chrome"
+    example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
+    gpt4_model = "gpt-4-vision-preview"
+    gemini_model = "gemini-pro-vision"
+
+    with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
+        example = json.load(f)
+    example["snapshot"] = "exp_setup4"
+
+    api_key = os.environ.get("OPENAI_API_KEY")
+    agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
+                        action_space=action_space, exp="som")
+
+    # api_key = os.environ.get("GENAI_API_KEY")
+    # agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space)
+
+    root_trajectory_dir = "exp_trajectory"
+
+    example_trajectory_dir = os.path.join(root_trajectory_dir, "som", example_class, gpt4_model, example_id)
+    # example_trajectory_dir = os.path.join(root_trajectory_dir, "som", example_class, gemini_model, example_id)
+
+    os.makedirs(example_trajectory_dir, exist_ok=True)
+
+    run_one_example(example, agent, 15, example_trajectory_dir)
diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py
index 896ff7e..10995b6 100644
--- a/mm_agents/gpt_4v_agent.py
+++ b/mm_agents/gpt_4v_agent.py
@@ -235,7 +235,7 @@ class GPT4v_Agent:
 
         for previous_obs, previous_action in zip(_observations, _actions):
 
-            if self.exp in ["both", "som", "seeact"]:
+            if self.exp == "both":
                 _screenshot = previous_obs["screenshot"]
                 _linearized_accessibility_tree = previous_obs["accessibility_tree"]
 
@@ -244,7 +244,28 @@ class GPT4v_Agent:
                     "content": [
                         {
                             "type": "text",
-                            "text": "Given the info from the tagged screenshot as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                            "text": "Given the screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                                _linearized_accessibility_tree)
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{_screenshot}",
+                                "detail": "high"
+                            }
+                        }
+                    ]
+                })
+            elif self.exp in ["som", "seeact"]:
+                _screenshot = previous_obs["screenshot"]
+                _linearized_accessibility_tree = previous_obs["accessibility_tree"]
+
+                messages.append({
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Given the tagged screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
                                 _linearized_accessibility_tree)
                         },
                         {
@@ -369,7 +390,7 @@ class GPT4v_Agent:
                 "content": [
                     {
                         "type": "text",
-                        "text": "Given the info from the tagged screenshot as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
+                        "text": "Given the tagged screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
                             linearized_accessibility_tree)
                     },
                     {
@@ -383,8 +404,7 @@ class GPT4v_Agent:
             })
         elif self.exp == "seeact":
             # Add som to the screenshot
-            masks, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
-
+            masks, drew_nodes, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
             base64_image = encode_image(tagged_screenshot)
             linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
 
@@ -421,6 +441,8 @@ class GPT4v_Agent:
             "max_tokens": self.max_tokens
         })
 
+        print(response)
+
         if self.exp == "seeact":
             messages.append({
                 "role": "assistant",
@@ -448,6 +470,7 @@ class GPT4v_Agent:
                 "messages": messages,
                 "max_tokens": self.max_tokens
             })
+            print(response)
 
         try:
             actions = self.parse_actions(response, masks)
diff --git a/mm_agents/visualizer.py b/mm_agents/visualizer.py
deleted file mode 100644
index bd78a98..0000000
--- a/mm_agents/visualizer.py
+++ /dev/null
@@ -1,1405 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-import colorsys
-import logging
-import math
-import numpy as np
-from enum import Enum, unique
-import cv2
-import matplotlib as mpl
-import matplotlib.colors as mplc
-import matplotlib.figure as mplfigure
-import pycocotools.mask as mask_util
-import torch
-from matplotlib.backends.backend_agg import FigureCanvasAgg
-from PIL import Image
-
-from detectron2.data import MetadataCatalog
-from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
-from detectron2.utils.file_io import PathManager
-
-from detectron2.utils.colormap import random_color
-import random
-
-logger = logging.getLogger(__name__)
-
-__all__ = ["ColorMode", "VisImage", "Visualizer"]
-
-
-_SMALL_OBJECT_AREA_THRESH = 1000
-_LARGE_MASK_AREA_THRESH = 120000
-_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
-_BLACK = (0, 0, 0)
-_RED = (1.0, 0, 0)
-
-_KEYPOINT_THRESHOLD = 0.05
-
-
-@unique
-class ColorMode(Enum):
-    """
-    Enum of different color modes to use for instance visualizations.
-    """
-
-    IMAGE = 0
-    """
-    Picks a random color for every instance and overlay segmentations with low opacity.
-    """
-    SEGMENTATION = 1
-    """
-    Let instances of the same category have similar colors
-    (from metadata.thing_colors), and overlay them with
-    high opacity. This provides more attention on the quality of segmentation.
-    """
-    IMAGE_BW = 2
-    """
-    Same as IMAGE, but convert all areas without masks to gray-scale.
-    Only available for drawing per-instance mask predictions.
-    """
-
-
-class GenericMask:
-    """
-    Attribute:
-        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
-            Each ndarray has format [x, y, x, y, ...]
-        mask (ndarray): a binary mask
-    """
-
-    def __init__(self, mask_or_polygons, height, width):
-        self._mask = self._polygons = self._has_holes = None
-        self.height = height
-        self.width = width
-
-        m = mask_or_polygons
-        if isinstance(m, dict):
-            # RLEs
-            assert "counts" in m and "size" in m
-            if isinstance(m["counts"], list):  # uncompressed RLEs
-                h, w = m["size"]
-                assert h == height and w == width
-                m = mask_util.frPyObjects(m, h, w)
-            self._mask = mask_util.decode(m)[:, :]
-            return
-
-        if isinstance(m, list):  # list[ndarray]
-            self._polygons = [np.asarray(x).reshape(-1) for x in m]
-            return
-
-        if isinstance(m, np.ndarray):  # assumed to be a binary mask
-            assert m.shape[1] != 2, m.shape
-            assert m.shape == (
-                height,
-                width,
-            ), f"mask shape: {m.shape}, target dims: {height}, {width}"
-            self._mask = m.astype("uint8")
-            return
-
-        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
-
-    @property
-    def mask(self):
-        if self._mask is None:
-            self._mask = self.polygons_to_mask(self._polygons)
-        return self._mask
-
-    @property
-    def polygons(self):
-        if self._polygons is None:
-            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
-        return self._polygons
-
-    @property
-    def has_holes(self):
-        if self._has_holes is None:
-            if self._mask is not None:
-                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
-            else:
-                self._has_holes = False  # if original format is polygon, does not have holes
-        return self._has_holes
-
-    def mask_to_polygons(self, mask):
-        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
-        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
-        # Internal contours (holes) are placed in hierarchy-2.
-        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
-        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
-        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
-        hierarchy = res[-1]
-        if hierarchy is None:  # empty mask
-            return [], False
-        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
-        res = res[-2]
-        res = [x.flatten() for x in res]
-        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
-        # We add 0.5 to turn them into real-value coordinate space. A better solution
-        # would be to first +0.5 and then dilate the returned polygon by 0.5.
-        res = [x + 0.5 for x in res if len(x) >= 6]
-        return res, has_holes
-
-    def polygons_to_mask(self, polygons):
-        rle = mask_util.frPyObjects(polygons, self.height, self.width)
-        rle = mask_util.merge(rle)
-        return mask_util.decode(rle)[:, :]
-
-    def area(self):
-        return self.mask.sum()
-
-    def bbox(self):
-        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
-        p = mask_util.merge(p)
-        bbox = mask_util.toBbox(p)
-        bbox[2] += bbox[0]
-        bbox[3] += bbox[1]
-        return bbox
-
-
-class _PanopticPrediction:
-    """
-    Unify different panoptic annotation/prediction formats
-    """
-
-    def __init__(self, panoptic_seg, segments_info, metadata=None):
-        if segments_info is None:
-            assert metadata is not None
-            # If "segments_info" is None, we assume "panoptic_img" is a
-            # H*W int32 image storing the panoptic_id in the format of
-            # category_id * label_divisor + instance_id. We reserve -1 for
-            # VOID label.
-            label_divisor = metadata.label_divisor
-            segments_info = []
-            for panoptic_label in np.unique(panoptic_seg.numpy()):
-                if panoptic_label == -1:
-                    # VOID region.
-                    continue
-                pred_class = panoptic_label // label_divisor
-                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
-                segments_info.append(
-                    {
-                        "id": int(panoptic_label),
-                        "category_id": int(pred_class),
-                        "isthing": bool(isthing),
-                    }
-                )
-        del metadata
-
-        self._seg = panoptic_seg
-
-        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
-        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
-        areas = areas.numpy()
-        sorted_idxs = np.argsort(-areas)
-        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
-        self._seg_ids = self._seg_ids.tolist()
-        for sid, area in zip(self._seg_ids, self._seg_areas):
-            if sid in self._sinfo:
-                self._sinfo[sid]["area"] = float(area)
-
-    def non_empty_mask(self):
-        """
-        Returns:
-            (H, W) array, a mask for all pixels that have a prediction
-        """
-        empty_ids = []
-        for id in self._seg_ids:
-            if id not in self._sinfo:
-                empty_ids.append(id)
-        if len(empty_ids) == 0:
-            return np.zeros(self._seg.shape, dtype=np.uint8)
-        assert (
-            len(empty_ids) == 1
-        ), ">1 ids corresponds to no labels. This is currently not supported"
-        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
-
-    def semantic_masks(self):
-        for sid in self._seg_ids:
-            sinfo = self._sinfo.get(sid)
-            if sinfo is None or sinfo["isthing"]:
-                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
-                continue
-            yield (self._seg == sid).numpy().astype(np.bool), sinfo
-
-    def instance_masks(self):
-        for sid in self._seg_ids:
-            sinfo = self._sinfo.get(sid)
-            if sinfo is None or not sinfo["isthing"]:
-                continue
-            mask = (self._seg == sid).numpy().astype(np.bool)
-            if mask.sum() > 0:
-                yield mask, sinfo
-
-
-def _create_text_labels(classes, scores, class_names, is_crowd=None):
-    """
-    Args:
-        classes (list[int] or None):
-        scores (list[float] or None):
-        class_names (list[str] or None):
-        is_crowd (list[bool] or None):
-
-    Returns:
-        list[str] or None
-    """
-    labels = None
-    if classes is not None:
-        if class_names is not None and len(class_names) > 0:
-            labels = [class_names[i] for i in classes]
-        else:
-            labels = [str(i) for i in classes]
-    if scores is not None:
-        if labels is None:
-            labels = ["{:.0f}%".format(s * 100) for s in scores]
-        else:
-            labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
-    if labels is not None and is_crowd is not None:
-        labels = [l + ("|crowd" if crowd else "") for l, crowd in zip(labels, is_crowd)]
-    return labels
-
-
-class VisImage:
-    def __init__(self, img, scale=1.0):
-        """
-        Args:
-            img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255].
-            scale (float): scale the input image
-        """
-        self.img = img
-        self.scale = scale
-        self.width, self.height = img.shape[1], img.shape[0]
-        self._setup_figure(img)
-
-    def _setup_figure(self, img):
-        """
-        Args:
-            Same as in :meth:`__init__()`.
-
-        Returns:
-            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
-            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
-        """
-        fig = mplfigure.Figure(frameon=False)
-        self.dpi = fig.get_dpi()
-        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
-        # (https://github.com/matplotlib/matplotlib/issues/15363)
-        fig.set_size_inches(
-            (self.width * self.scale + 1e-2) / self.dpi,
-            (self.height * self.scale + 1e-2) / self.dpi,
-        )
-        self.canvas = FigureCanvasAgg(fig)
-        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
-        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
-        ax.axis("off")
-        self.fig = fig
-        self.ax = ax
-        self.reset_image(img)
-
-    def reset_image(self, img):
-        """
-        Args:
-            img: same as in __init__
-        """
-        img = img.astype("uint8")
-        self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
-
-    def save(self, filepath):
-        """
-        Args:
-            filepath (str): a string that contains the absolute path, including the file name, where
-                the visualized image will be saved.
-        """
-        self.fig.savefig(filepath)
-
-    def get_image(self):
-        """
-        Returns:
-            ndarray:
-                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
-                The shape is scaled w.r.t the input image using the given `scale` argument.
-        """
-        canvas = self.canvas
-        s, (width, height) = canvas.print_to_buffer()
-        # buf = io.BytesIO()  # works for cairo backend
-        # canvas.print_rgba(buf)
-        # width, height = self.width, self.height
-        # s = buf.getvalue()
-
-        buffer = np.frombuffer(s, dtype="uint8")
-
-        img_rgba = buffer.reshape(height, width, 4)
-        rgb, alpha = np.split(img_rgba, [3], axis=2)
-        return rgb.astype("uint8")
-
-
-class Visualizer:
-    """
-    Visualizer that draws data about detection/segmentation on images.
-
-    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
-    that draw primitive objects to images, as well as high-level wrappers like
-    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
-    that draw composite data in some pre-defined style.
-
-    Note that the exact visualization style for the high-level wrappers are subject to change.
-    Style such as color, opacity, label contents, visibility of labels, or even the visibility
-    of objects themselves (e.g. when the object is too small) may change according
-    to different heuristics, as long as the results still look visually reasonable.
-
-    To obtain a consistent style, you can implement custom drawing functions with the
-    abovementioned primitive methods instead. If you need more customized visualization
-    styles, you can process the data yourself following their format documented in
-    tutorials (:doc:`/tutorials/models`, :doc:`/tutorials/datasets`). This class does not
-    intend to satisfy everyone's preference on drawing styles.
-
-    This visualizer focuses on high rendering quality rather than performance. It is not
-    designed to be used for real-time applications.
-    """
-
-    # TODO implement a fast, rasterized version using OpenCV
-
-    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
-        """
-        Args:
-            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
-                the height and width of the image respectively. C is the number of
-                color channels. The image is required to be in RGB format since that
-                is a requirement of the Matplotlib library. The image is also expected
-                to be in the range [0, 255].
-            metadata (Metadata): dataset metadata (e.g. class names and colors)
-            instance_mode (ColorMode): defines one of the pre-defined style for drawing
-                instances on an image.
-        """
-        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
-        if metadata is None:
-            metadata = MetadataCatalog.get("__nonexist__")
-        self.metadata = metadata
-        self.output = VisImage(self.img, scale=scale)
-        self.cpu_device = torch.device("cpu")
-
-        # too small texts are useless, therefore clamp to 9
-        self._default_font_size = max(
-            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
-        )
-        self._default_font_size = 18
-        self._instance_mode = instance_mode
-        self.keypoint_threshold = _KEYPOINT_THRESHOLD
-
-        import matplotlib.colors as mcolors
-        css4_colors = mcolors.CSS4_COLORS
-        self.color_proposals = [list(mcolors.hex2color(color)) for color in css4_colors.values()]
-
-    def draw_instance_predictions(self, predictions):
-        """
-        Draw instance-level prediction results on an image.
-
-        Args:
-            predictions (Instances): the output of an instance detection/segmentation
-                model. Following fields will be used to draw:
-                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
-        scores = predictions.scores if predictions.has("scores") else None
-        classes = predictions.pred_classes.tolist() if predictions.has("pred_classes") else None
-        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
-        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
-
-        keep = (scores > 0.5).cpu()
-        boxes = boxes[keep]
-        scores = scores[keep]
-        classes = np.array(classes)
-        classes = classes[np.array(keep)]
-        labels = np.array(labels)
-        labels = labels[np.array(keep)]
-
-        if predictions.has("pred_masks"):
-            masks = np.asarray(predictions.pred_masks)
-            masks = masks[np.array(keep)]
-            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
-        else:
-            masks = None
-
-        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
-        # if self.metadata.get("thing_colors"):
-            colors = [
-                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
-            ]
-            alpha = 0.4
-        else:
-            colors = None
-            alpha = 0.4
-
-        if self._instance_mode == ColorMode.IMAGE_BW:
-            self.output.reset_image(
-                self._create_grayscale_image(
-                    (predictions.pred_masks.any(dim=0) > 0).numpy()
-                    if predictions.has("pred_masks")
-                    else None
-                )
-            )
-            alpha = 0.3
-        
-        self.overlay_instances(
-            masks=masks,
-            boxes=boxes,
-            labels=labels,
-            keypoints=keypoints,
-            assigned_colors=colors,
-            alpha=alpha,
-        )
-        return self.output
-
-    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.7):
-        """
-        Draw semantic segmentation predictions/labels.
-
-        Args:
-            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
-                Each value is the integer label of the pixel.
-            area_threshold (int): segments with less than `area_threshold` are not drawn.
-            alpha (float): the larger it is, the more opaque the segmentations are.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        if isinstance(sem_seg, torch.Tensor):
-            sem_seg = sem_seg.numpy()
-        labels, areas = np.unique(sem_seg, return_counts=True)
-        sorted_idxs = np.argsort(-areas).tolist()
-        labels = labels[sorted_idxs]
-        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
-            try:
-                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
-            except (AttributeError, IndexError):
-                mask_color = None
-
-            binary_mask = (sem_seg == label).astype(np.uint8)
-            text = self.metadata.stuff_classes[label]
-            self.draw_binary_mask(
-                binary_mask,
-                color=mask_color,
-                edge_color=_OFF_WHITE,
-                text=text,
-                alpha=alpha,
-                area_threshold=area_threshold,
-            )
-        return self.output
-
-    def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7):
-        """
-        Draw panoptic prediction annotations or results.
-
-        Args:
-            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
-                segment.
-            segments_info (list[dict] or None): Describe each segment in `panoptic_seg`.
-                If it is a ``list[dict]``, each dict contains keys "id", "category_id".
-                If None, category id of each pixel is computed by
-                ``pixel // metadata.label_divisor``.
-            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
-
-        if self._instance_mode == ColorMode.IMAGE_BW:
-            self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask()))
-
-        # draw mask for all semantic segments first i.e. "stuff"
-        for mask, sinfo in pred.semantic_masks():
-            category_idx = sinfo["category_id"]
-            try:
-                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
-            except AttributeError:
-                mask_color = None
-
-            text = self.metadata.stuff_classes[category_idx].replace('-other','').replace('-merged','')
-            self.draw_binary_mask(
-                mask,
-                color=mask_color,
-                edge_color=_OFF_WHITE,
-                text=text,
-                alpha=alpha,
-                area_threshold=area_threshold,
-            )
-
-        # draw mask for all instances second
-        all_instances = list(pred.instance_masks())
-        if len(all_instances) == 0:
-            return self.output
-        masks, sinfo = list(zip(*all_instances))
-        category_ids = [x["category_id"] for x in sinfo]
-
-        try:
-            scores = [x["score"] for x in sinfo]
-        except KeyError:
-            scores = None
-        class_names = [name.replace('-other','').replace('-merged','') for name in self.metadata.thing_classes]
-        labels = _create_text_labels(
-            category_ids, scores, class_names, [x.get("iscrowd", 0) for x in sinfo]
-        )
-
-        try:
-            colors = [
-                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
-            ]
-        except AttributeError:
-            colors = None
-        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
-
-        return self.output
-
-    draw_panoptic_seg_predictions = draw_panoptic_seg  # backward compatibility
-
-    def draw_dataset_dict(self, dic):
-        """
-        Draw annotations/segmentaions in Detectron2 Dataset format.
-
-        Args:
-            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        annos = dic.get("annotations", None)
-        if annos:
-            if "segmentation" in annos[0]:
-                masks = [x["segmentation"] for x in annos]
-            else:
-                masks = None
-            if "keypoints" in annos[0]:
-                keypts = [x["keypoints"] for x in annos]
-                keypts = np.array(keypts).reshape(len(annos), -1, 3)
-            else:
-                keypts = None
-
-            boxes = [
-                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
-                if len(x["bbox"]) == 4
-                else x["bbox"]
-                for x in annos
-            ]
-
-            colors = None
-            category_ids = [x["category_id"] for x in annos]
-            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
-                colors = [
-                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
-                    for c in category_ids
-                ]
-            names = self.metadata.get("thing_classes", None)
-            labels = _create_text_labels(
-                category_ids,
-                scores=None,
-                class_names=names,
-                is_crowd=[x.get("iscrowd", 0) for x in annos],
-            )
-            self.overlay_instances(
-                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
-            )
-
-        sem_seg = dic.get("sem_seg", None)
-        if sem_seg is None and "sem_seg_file_name" in dic:
-            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
-                sem_seg = Image.open(f)
-                sem_seg = np.asarray(sem_seg, dtype="uint8")
-        if sem_seg is not None:
-            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.4)
-
-        pan_seg = dic.get("pan_seg", None)
-        if pan_seg is None and "pan_seg_file_name" in dic:
-            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
-                pan_seg = Image.open(f)
-                pan_seg = np.asarray(pan_seg)
-                from panopticapi.utils import rgb2id
-
-                pan_seg = rgb2id(pan_seg)
-        if pan_seg is not None:
-            segments_info = dic["segments_info"]
-            pan_seg = torch.tensor(pan_seg)
-            self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.7)
-        return self.output
-
-    def overlay_instances(
-        self,
-        *,
-        boxes=None,
-        labels=None,
-        masks=None,
-        keypoints=None,
-        assigned_colors=None,
-        alpha=0.5,
-    ):
-        """
-        Args:
-            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
-                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
-                or a :class:`RotatedBoxes`,
-                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
-                for the N objects in a single image,
-            labels (list[str]): the text to be displayed for each instance.
-            masks (masks-like object): Supported types are:
-
-                * :class:`detectron2.structures.PolygonMasks`,
-                  :class:`detectron2.structures.BitMasks`.
-                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
-                  The first level of the list corresponds to individual instances. The second
-                  level to all the polygon that compose the instance, and the third level
-                  to the polygon coordinates. The third level should have the format of
-                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
-                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
-                * list[dict]: each dict is a COCO-style RLE.
-            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
-                where the N is the number of instances and K is the number of keypoints.
-                The last dimension corresponds to (x, y, visibility or score).
-            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
-                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
-                for full list of formats that the colors are accepted in.
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        num_instances = 0
-        if boxes is not None:
-            boxes = self._convert_boxes(boxes)
-            num_instances = len(boxes)
-        if masks is not None:
-            masks = self._convert_masks(masks)
-            if num_instances:
-                assert len(masks) == num_instances
-            else:
-                num_instances = len(masks)
-        if keypoints is not None:
-            if num_instances:
-                assert len(keypoints) == num_instances
-            else:
-                num_instances = len(keypoints)
-            keypoints = self._convert_keypoints(keypoints)
-        if labels is not None:
-            assert len(labels) == num_instances
-        if assigned_colors is None:
-            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
-        if num_instances == 0:
-            return self.output
-        if boxes is not None and boxes.shape[1] == 5:
-            return self.overlay_rotated_instances(
-                boxes=boxes, labels=labels, assigned_colors=assigned_colors
-            )
-
-        # Display in largest to smallest order to reduce occlusion.
-        areas = None
-        if boxes is not None:
-            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
-        elif masks is not None:
-            areas = np.asarray([x.area() for x in masks])
-
-        if areas is not None:
-            sorted_idxs = np.argsort(-areas).tolist()
-            # Re-order overlapped instances in descending order.
-            boxes = boxes[sorted_idxs] if boxes is not None else None
-            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
-            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
-            assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
-            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
-
-        for i in range(num_instances):
-            color = assigned_colors[i]
-            if boxes is not None:
-                self.draw_box(boxes[i], edge_color=color)
-
-            if masks is not None:
-                for segment in masks[i].polygons:
-                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
-
-            if labels is not None:
-                # first get a box
-                if boxes is not None:
-                    x0, y0, x1, y1 = boxes[i]
-                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
-                    horiz_align = "left"
-                elif masks is not None:
-                    # skip small mask without polygon
-                    if len(masks[i].polygons) == 0:
-                        continue
-
-                    x0, y0, x1, y1 = masks[i].bbox()
-
-                    # draw text in the center (defined by median) when box is not drawn
-                    # median is less sensitive to outliers.
-                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
-                    horiz_align = "center"
-                else:
-                    continue  # drawing the box confidence for keypoints isn't very useful.
-                # for small objects, draw text at the side to avoid occlusion
-                instance_area = (y1 - y0) * (x1 - x0)
-                if (
-                    instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
-                    or y1 - y0 < 40 * self.output.scale
-                ):
-                    if y1 >= self.output.height - 5:
-                        text_pos = (x1, y0)
-                    else:
-                        text_pos = (x0, y1)
-
-                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
-                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
-                font_size = (
-                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
-                    * 0.5
-                    * self._default_font_size
-                )
-                self.draw_text(
-                    labels[i],
-                    text_pos,
-                    color=lighter_color,
-                    horizontal_alignment=horiz_align,
-                    font_size=font_size,
-                )
-
-        # draw keypoints
-        if keypoints is not None:
-            for keypoints_per_instance in keypoints:
-                self.draw_and_connect_keypoints(keypoints_per_instance)
-
-        return self.output
-
-    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
-        """
-        Args:
-            boxes (ndarray): an Nx5 numpy array of
-                (x_center, y_center, width, height, angle_degrees) format
-                for the N objects in a single image.
-            labels (list[str]): the text to be displayed for each instance.
-            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
-                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
-                for full list of formats that the colors are accepted in.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        num_instances = len(boxes)
-
-        if assigned_colors is None:
-            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
-        if num_instances == 0:
-            return self.output
-
-        # Display in largest to smallest order to reduce occlusion.
-        if boxes is not None:
-            areas = boxes[:, 2] * boxes[:, 3]
-
-        sorted_idxs = np.argsort(-areas).tolist()
-        # Re-order overlapped instances in descending order.
-        boxes = boxes[sorted_idxs]
-        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
-        colors = [assigned_colors[idx] for idx in sorted_idxs]
-
-        for i in range(num_instances):
-            self.draw_rotated_box_with_label(
-                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
-            )
-
-        return self.output
-
-    def draw_and_connect_keypoints(self, keypoints):
-        """
-        Draws keypoints of an instance and follows the rules for keypoint connections
-        to draw lines between appropriate keypoints. This follows color heuristics for
-        line color.
-
-        Args:
-            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
-                and the last dimension corresponds to (x, y, probability).
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        visible = {}
-        keypoint_names = self.metadata.get("keypoint_names")
-        for idx, keypoint in enumerate(keypoints):
-
-            # draw keypoint
-            x, y, prob = keypoint
-            if prob > self.keypoint_threshold:
-                self.draw_circle((x, y), color=_RED)
-                if keypoint_names:
-                    keypoint_name = keypoint_names[idx]
-                    visible[keypoint_name] = (x, y)
-
-        if self.metadata.get("keypoint_connection_rules"):
-            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
-                if kp0 in visible and kp1 in visible:
-                    x0, y0 = visible[kp0]
-                    x1, y1 = visible[kp1]
-                    color = tuple(x / 255.0 for x in color)
-                    self.draw_line([x0, x1], [y0, y1], color=color)
-
-        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
-        # Note that this strategy is specific to person keypoints.
-        # For other keypoints, it should just do nothing
-        try:
-            ls_x, ls_y = visible["left_shoulder"]
-            rs_x, rs_y = visible["right_shoulder"]
-            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
-        except KeyError:
-            pass
-        else:
-            # draw line from nose to mid-shoulder
-            nose_x, nose_y = visible.get("nose", (None, None))
-            if nose_x is not None:
-                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
-
-            try:
-                # draw line from mid-shoulder to mid-hip
-                lh_x, lh_y = visible["left_hip"]
-                rh_x, rh_y = visible["right_hip"]
-            except KeyError:
-                pass
-            else:
-                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
-                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
-        return self.output
-
-    """
-    Primitive drawing functions:
-    """
-
-    def draw_text(
-        self,
-        text,
-        position,
-        *,
-        font_size=None,
-        color="g",
-        horizontal_alignment="center",
-        rotation=0,
-    ):
-        """
-        Args:
-            text (str): class label
-            position (tuple): a tuple of the x and y coordinates to place text on image.
-            font_size (int, optional): font of the text. If not provided, a font size
-                proportional to the image width is calculated and used.
-            color: color of the text. Refer to `matplotlib.colors` for full list
-                of formats that are accepted.
-            horizontal_alignment (str): see `matplotlib.text.Text`
-            rotation: rotation angle in degrees CCW
-
-        Returns:
-            output (VisImage): image object with text drawn.
-        """
-        if not font_size:
-            font_size = self._default_font_size
-
-        # since the text background is dark, we don't want the text to be dark
-        color = np.maximum(list(mplc.to_rgb(color)), 0.15)
-        color[np.argmax(color)] = max(0.8, np.max(color))
-
-        def contrasting_color(rgb):
-            """Returns 'white' or 'black' depending on which color contrasts more with the given RGB value."""
-            
-            # Decompose the RGB tuple
-            R, G, B = rgb
-
-            # Calculate the Y value
-            Y = 0.299 * R + 0.587 * G + 0.114 * B
-
-            # If Y value is greater than 128, it's closer to white so return black. Otherwise, return white.
-            return 'black' if Y > 128 else 'white'
-
-        bbox_background = contrasting_color(color*255)
-
-        x, y = position
-        self.output.ax.text(
-            x,
-            y,
-            text,
-            size=font_size * self.output.scale,
-            family="sans-serif",
-            bbox={"facecolor": bbox_background, "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
-            verticalalignment="top",
-            horizontalalignment=horizontal_alignment,
-            color=color,
-            zorder=10,
-            rotation=rotation,
-        )
-        return self.output
-
-    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
-        """
-        Args:
-            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
-                are the coordinates of the image's top left corner. x1 and y1 are the
-                coordinates of the image's bottom right corner.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
-                for full list of formats that are accepted.
-            line_style (string): the string to use to create the outline of the boxes.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        x0, y0, x1, y1 = box_coord
-        width = x1 - x0
-        height = y1 - y0
-
-        linewidth = max(self._default_font_size / 12, 1)
-
-        self.output.ax.add_patch(
-            mpl.patches.Rectangle(
-                (x0, y0),
-                width,
-                height,
-                fill=False,
-                edgecolor=edge_color,
-                linewidth=linewidth * self.output.scale,
-                alpha=alpha,
-                linestyle=line_style,
-            )
-        )
-        return self.output
-
-    def draw_rotated_box_with_label(
-        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
-    ):
-        """
-        Draw a rotated box with label on its top-left corner.
-
-        Args:
-            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
-                where cnt_x and cnt_y are the center coordinates of the box.
-                w and h are the width and height of the box. angle represents how
-                many degrees the box is rotated CCW with regard to the 0-degree box.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
-                for full list of formats that are accepted.
-            line_style (string): the string to use to create the outline of the boxes.
-            label (string): label for rotated box. It will not be rendered when set to None.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        cnt_x, cnt_y, w, h, angle = rotated_box
-        area = w * h
-        # use thinner lines when the box is small
-        linewidth = self._default_font_size / (
-            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
-        )
-
-        theta = angle * math.pi / 180.0
-        c = math.cos(theta)
-        s = math.sin(theta)
-        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
-        # x: left->right ; y: top->down
-        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
-        for k in range(4):
-            j = (k + 1) % 4
-            self.draw_line(
-                [rotated_rect[k][0], rotated_rect[j][0]],
-                [rotated_rect[k][1], rotated_rect[j][1]],
-                color=edge_color,
-                linestyle="--" if k == 1 else line_style,
-                linewidth=linewidth,
-            )
-
-        if label is not None:
-            text_pos = rotated_rect[1]  # topleft corner
-
-            height_ratio = h / np.sqrt(self.output.height * self.output.width)
-            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
-            font_size = (
-                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
-            )
-            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
-
-        return self.output
-
-    def draw_circle(self, circle_coord, color, radius=3):
-        """
-        Args:
-            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
-                of the center of the circle.
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            radius (int): radius of the circle.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        x, y = circle_coord
-        self.output.ax.add_patch(
-            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
-        )
-        return self.output
-
-    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
-        """
-        Args:
-            x_data (list[int]): a list containing x values of all the points being drawn.
-                Length of list should match the length of y_data.
-            y_data (list[int]): a list containing y values of all the points being drawn.
-                Length of list should match the length of x_data.
-            color: color of the line. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
-                for a full list of formats that are accepted.
-            linewidth (float or None): width of the line. When it's None,
-                a default value will be computed and used.
-
-        Returns:
-            output (VisImage): image object with line drawn.
-        """
-        if linewidth is None:
-            linewidth = self._default_font_size / 3
-        linewidth = max(linewidth, 1)
-        self.output.ax.add_line(
-            mpl.lines.Line2D(
-                x_data,
-                y_data,
-                linewidth=linewidth * self.output.scale,
-                color=color,
-                linestyle=linestyle,
-            )
-        )
-        return self.output
-
-    def draw_binary_mask(
-        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.7, area_threshold=10
-    ):
-        """
-        Args:
-            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
-                W is the image width. Each value in the array is either a 0 or 1 value of uint8
-                type.
-            color: color of the mask. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted. If None, will pick a random color.
-            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
-                full list of formats that are accepted.
-            text (str): if None, will be drawn on the object
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            area_threshold (float): a connected component smaller than this area will not be shown.
-
-        Returns:
-            output (VisImage): image object with mask drawn.
-        """
-        if color is None:
-            color = random_color(rgb=True, maximum=1)
-        color = mplc.to_rgb(color)
-
-        has_valid_segment = False
-        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
-        mask = GenericMask(binary_mask, self.output.height, self.output.width)
-        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
-
-        if not mask.has_holes:
-            # draw polygons for regular masks
-            for segment in mask.polygons:
-                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
-                if area < (area_threshold or 0):
-                    continue
-                has_valid_segment = True
-                segment = segment.reshape(-1, 2)
-                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
-        else:
-            # TODO: Use Path/PathPatch to draw vector graphics:
-            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
-            rgba = np.zeros(shape2d + (4,), dtype="float32")
-            rgba[:, :, :3] = color
-            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
-            has_valid_segment = True
-            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
-
-        if text is not None and has_valid_segment:
-            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
-            self._draw_text_in_mask(binary_mask, text, lighter_color)
-        return self.output
-    
-    def draw_binary_mask_with_number(
-        self, binary_mask, color=None, *, edge_color=None, text=None, label_mode='1', alpha=0.1, anno_mode=['Mask'], area_threshold=10
-    ):
-        """
-        Args:
-            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
-                W is the image width. Each value in the array is either a 0 or 1 value of uint8
-                type.
-            color: color of the mask. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted. If None, will pick a random color.
-            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
-                full list of formats that are accepted.
-            text (str): if None, will be drawn on the object
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            area_threshold (float): a connected component smaller than this area will not be shown.
-
-        Returns:
-            output (VisImage): image object with mask drawn.
-        """
-        if color is None:
-            randint = random.randint(0, len(self.color_proposals)-1)
-            color = self.color_proposals[randint]
-        color = mplc.to_rgb(color)
-
-        has_valid_segment = True
-        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
-        mask = GenericMask(binary_mask, self.output.height, self.output.width)
-        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
-        bbox = mask.bbox()
-
-        if 'Mask' in anno_mode:
-            if not mask.has_holes:
-                # draw polygons for regular masks
-                for segment in mask.polygons:
-                    area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
-                    if area < (area_threshold or 0):
-                        continue
-                    has_valid_segment = True
-                    segment = segment.reshape(-1, 2)
-                    self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
-            else:
-                # TODO: Use Path/PathPatch to draw vector graphics:
-                # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
-                rgba = np.zeros(shape2d + (4,), dtype="float32")
-                rgba[:, :, :3] = color
-                rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
-                has_valid_segment = True
-                self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
-
-        if 'Box' in anno_mode:
-            self.draw_box(bbox, edge_color=color, alpha=0.75)
-
-        if 'Mark' in anno_mode:
-            has_valid_segment = True
-        else:
-            has_valid_segment = False
-
-        if text is not None and has_valid_segment:
-            # lighter_color = tuple([x*0.2 for x in color])
-            lighter_color = [1,1,1] # self._change_color_brightness(color, brightness_factor=0.7)
-            self._draw_number_in_mask(binary_mask, text, lighter_color, label_mode)
-        return self.output
-
-    def draw_soft_mask(self, soft_mask, color=None, *, text=None, alpha=0.5):
-        """
-        Args:
-            soft_mask (ndarray): float array of shape (H, W), each value in [0, 1].
-            color: color of the mask. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted. If None, will pick a random color.
-            text (str): if None, will be drawn on the object
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-
-        Returns:
-            output (VisImage): image object with mask drawn.
-        """
-        if color is None:
-            color = random_color(rgb=True, maximum=1)
-        color = mplc.to_rgb(color)
-
-        shape2d = (soft_mask.shape[0], soft_mask.shape[1])
-        rgba = np.zeros(shape2d + (4,), dtype="float32")
-        rgba[:, :, :3] = color
-        rgba[:, :, 3] = soft_mask * alpha
-        self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
-
-        if text is not None:
-            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
-            binary_mask = (soft_mask > 0.5).astype("uint8")
-            self._draw_text_in_mask(binary_mask, text, lighter_color)
-        return self.output
-
-    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
-        """
-        Args:
-            segment: numpy array of shape Nx2, containing all the points in the polygon.
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
-                full list of formats that are accepted. If not provided, a darker shade
-                of the polygon color will be used instead.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-
-        Returns:
-            output (VisImage): image object with polygon drawn.
-        """
-        if edge_color is None:
-            # make edge color darker than the polygon color
-            if alpha > 0.8:
-                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
-            else:
-                edge_color = color
-        edge_color = mplc.to_rgb(edge_color) + (1,)
-
-        polygon = mpl.patches.Polygon(
-            segment,
-            fill=True,
-            facecolor=mplc.to_rgb(color) + (alpha,),
-            edgecolor=edge_color,
-            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
-        )
-        self.output.ax.add_patch(polygon)
-        return self.output
-
-    """
-    Internal methods:
-    """
-
-    def _jitter(self, color):
-        """
-        Randomly modifies given color to produce a slightly different color than the color given.
-
-        Args:
-            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
-                picked. The values in the list are in the [0.0, 1.0] range.
-
-        Returns:
-            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
-                color after being jittered. The values in the list are in the [0.0, 1.0] range.
-        """
-        color = mplc.to_rgb(color)
-        # np.random.seed(0)
-        vec = np.random.rand(3)
-        # better to do it in another color space
-        vec = vec / np.linalg.norm(vec) * 0.5
-        res = np.clip(vec + color, 0, 1)
-        return tuple(res)
-
-    def _create_grayscale_image(self, mask=None):
-        """
-        Create a grayscale version of the original image.
-        The colors in masked area, if given, will be kept.
-        """
-        img_bw = self.img.astype("f4").mean(axis=2)
-        img_bw = np.stack([img_bw] * 3, axis=2)
-        if mask is not None:
-            img_bw[mask] = self.img[mask]
-        return img_bw
-
-    def _change_color_brightness(self, color, brightness_factor):
-        """
-        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
-        less or more saturation than the original color.
-
-        Args:
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
-                0 will correspond to no change, a factor in [-1.0, 0) range will result in
-                a darker color and a factor in (0, 1.0] range will result in a lighter color.
-
-        Returns:
-            modified_color (tuple[double]): a tuple containing the RGB values of the
-                modified color. Each value in the tuple is in the [0.0, 1.0] range.
-        """
-        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
-        color = mplc.to_rgb(color)
-        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
-        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
-        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
-        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
-        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
-        return modified_color
-
-    def _convert_boxes(self, boxes):
-        """
-        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
-        """
-        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
-            return boxes.tensor.detach().numpy()
-        else:
-            return np.asarray(boxes)
-
-    def _convert_masks(self, masks_or_polygons):
-        """
-        Convert different format of masks or polygons to a tuple of masks and polygons.
-
-        Returns:
-            list[GenericMask]:
-        """
-
-        m = masks_or_polygons
-        if isinstance(m, PolygonMasks):
-            m = m.polygons
-        if isinstance(m, BitMasks):
-            m = m.tensor.numpy()
-        if isinstance(m, torch.Tensor):
-            m = m.numpy()
-        ret = []
-        for x in m:
-            if isinstance(x, GenericMask):
-                ret.append(x)
-            else:
-                ret.append(GenericMask(x, self.output.height, self.output.width))
-        return ret
-
-    def _draw_number_in_mask(self, binary_mask, text, color, label_mode='1'):
-        """
-        Find proper places to draw text given a binary mask.
-        """
-
-        def number_to_string(n):
-            chars = []
-            while n:
-                n, remainder = divmod(n-1, 26)
-                chars.append(chr(97 + remainder))
-            return ''.join(reversed(chars))
-
-        binary_mask = np.pad(binary_mask, ((1, 1), (1, 1)), 'constant')
-        mask_dt = cv2.distanceTransform(binary_mask, cv2.DIST_L2, 0)
-        mask_dt = mask_dt[1:-1, 1:-1]
-        max_dist = np.max(mask_dt)
-        coords_y, coords_x = np.where(mask_dt == max_dist)  # coords is [y, x]
-
-        if label_mode == 'a':
-            text = number_to_string(int(text))
-        else:
-            text = text
-
-        self.draw_text(text, (coords_x[len(coords_x)//2] + 2, coords_y[len(coords_y)//2] - 6), color=color)
-
-        # TODO sometimes drawn on wrong objects. the heuristics here can improve.
-        # _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
-        # if stats[1:, -1].size == 0:
-        #     return
-        # largest_component_id = np.argmax(stats[1:, -1]) + 1
-
-        # # draw text on the largest component, as well as other very large components.
-        # for cid in range(1, _num_cc):
-        #     if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
-        #         # median is more stable than centroid
-        #         # center = centroids[largest_component_id]
-        #         center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
-        #         # bottom=np.max((cc_labels == cid).nonzero(), axis=1)[::-1]
-        #         # center[1]=bottom[1]+2
-        #         self.draw_text(text, center, color=color)
-    
-    def _draw_text_in_mask(self, binary_mask, text, color):
-        """
-        Find proper places to draw text given a binary mask.
-        """
-        # TODO sometimes drawn on wrong objects. the heuristics here can improve.
-        _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
-        if stats[1:, -1].size == 0:
-            return
-        largest_component_id = np.argmax(stats[1:, -1]) + 1
-
-        # draw text on the largest component, as well as other very large components.
-        for cid in range(1, _num_cc):
-            if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
-                # median is more stable than centroid
-                # center = centroids[largest_component_id]
-                center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
-                bottom=np.max((cc_labels == cid).nonzero(), axis=1)[::-1]
-                center[1]=bottom[1]+2
-                self.draw_text(text, center, color=color)
-
-    def _convert_keypoints(self, keypoints):
-        if isinstance(keypoints, Keypoints):
-            keypoints = keypoints.tensor
-        keypoints = np.asarray(keypoints)
-        return keypoints
-
-    def get_output(self):
-        """
-        Returns:
-            output (VisImage): the image output containing the visualizations added
-            to the image.
-        """
-        return self.output
\ No newline at end of file

From 980f7290eb523603e62d776e217dba12382c0379 Mon Sep 17 00:00:00 2001
From: Siheng Zhao <1730625285@qq.com>
Date: Sat, 20 Jan 2024 19:44:52 +0800
Subject: [PATCH 09/13] add vscode examples

---
 desktop_env/evaluators/metrics/vscode.py      | 39 +++++++++++++++
 .../276cc624-87ea-4f08-ab93-f770e3790175.json | 40 ++++++++++++++++
 .../3486f395-ad68-459c-8c39-ea07de934dd4.json | 12 -----
 .../4e60007a-f5be-4bfc-9723-c39affa0a6d3.json | 46 ++++++++++++++++++
 .../515630d2-9b30-430c-b06a-e86b0143f7fb.json | 12 -----
 .../59ed65c7-e9a6-43db-833f-76d6730c0004.json |  2 +-
 .../6f7546b0-52f3-4938-9213-52f35454d314.json | 12 -----
 .../90f6eeeb-f3c2-4c98-873c-e77d78a45578.json | 12 -----
 .../930fdb3b-11a8-46fe-9bac-577332e2640e.json | 45 ++++++++++++++++++
 .../9439a27b-18ae-42d8-9778-5f68f891805e.json | 40 ++++++++++++++++
 .../9d425400-e9b2-4424-9a4b-d4c7abac4140.json | 40 ++++++++++++++++
 .../ae506c68-352c-4094-9caa-ee9d42052317.json | 39 +++++++++++++++
 .../e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2.json | 42 +++++++++++++++++
 .../ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae.json | 47 +++++++++++++++++++
 14 files changed, 379 insertions(+), 49 deletions(-)
 create mode 100644 evaluation_examples/examples/vs_code/276cc624-87ea-4f08-ab93-f770e3790175.json
 delete mode 100644 evaluation_examples/examples/vs_code/3486f395-ad68-459c-8c39-ea07de934dd4.json
 create mode 100644 evaluation_examples/examples/vs_code/4e60007a-f5be-4bfc-9723-c39affa0a6d3.json
 delete mode 100644 evaluation_examples/examples/vs_code/515630d2-9b30-430c-b06a-e86b0143f7fb.json
 delete mode 100644 evaluation_examples/examples/vs_code/6f7546b0-52f3-4938-9213-52f35454d314.json
 delete mode 100644 evaluation_examples/examples/vs_code/90f6eeeb-f3c2-4c98-873c-e77d78a45578.json
 create mode 100644 evaluation_examples/examples/vs_code/930fdb3b-11a8-46fe-9bac-577332e2640e.json
 create mode 100644 evaluation_examples/examples/vs_code/9439a27b-18ae-42d8-9778-5f68f891805e.json
 create mode 100644 evaluation_examples/examples/vs_code/9d425400-e9b2-4424-9a4b-d4c7abac4140.json
 create mode 100644 evaluation_examples/examples/vs_code/ae506c68-352c-4094-9caa-ee9d42052317.json
 create mode 100644 evaluation_examples/examples/vs_code/e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2.json
 create mode 100644 evaluation_examples/examples/vs_code/ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae.json

diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py
index ac98d72..55f6407 100644
--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -1,5 +1,44 @@
 from typing import Dict
+import json
 
+def check_json_keybindings(actual: str, expected: str, **options) -> float:
+    """
+    Args:
+        actual (str): path to result text file
+        expected (str): expected dict{}
+
+    Return:
+        float: the score
+    """
+
+    with open(actual) as f:
+        data = json.load(f)
+
+    if expected in data:
+        return 1.0
+    else:
+        return 0.0
+
+def check_json_settings(actual: str, expected: str, **options) -> float:
+    """
+    Args:
+        actual (str): path to result text file
+        expected (str): expected dict{}
+
+    Return:
+        float: the score
+    """
+
+    with open(actual) as f:
+        data = json.load(f)
+        
+    expect = set(expected.items())
+    json = set(data.items())
+
+    if expect.issubset(json):
+        return 1.0
+    else:
+        return 0.0
 
 def compare_text_file(actual: str, expected: str, **options) -> float:
     """
diff --git a/evaluation_examples/examples/vs_code/276cc624-87ea-4f08-ab93-f770e3790175.json b/evaluation_examples/examples/vs_code/276cc624-87ea-4f08-ab93-f770e3790175.json
new file mode 100644
index 0000000..629dbe7
--- /dev/null
+++ b/evaluation_examples/examples/vs_code/276cc624-87ea-4f08-ab93-f770e3790175.json
@@ -0,0 +1,40 @@
+{
+  "id": "276cc624-87ea-4f08-ab93-f770e3790175",
+  "snapshot": "vscode",
+  "instruction": "Could you help me set the line length to 50 characters for current user in VS Code?",
+  "source": "https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "code"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Visual Studio Code"
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "vscode"
+  ],
+  "evaluator": {
+    "func": "check_json_settings",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": {"editor.rulers": [50]}
+      }
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/user/.config/Code/User/settings.json",
+      "dest": "settings.json"
+    }
+  }
+}
diff --git a/evaluation_examples/examples/vs_code/3486f395-ad68-459c-8c39-ea07de934dd4.json b/evaluation_examples/examples/vs_code/3486f395-ad68-459c-8c39-ea07de934dd4.json
deleted file mode 100644
index d99df8c..0000000
--- a/evaluation_examples/examples/vs_code/3486f395-ad68-459c-8c39-ea07de934dd4.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "id": "3486f395-ad68-459c-8c39-ea07de934dd4",
-  "snapshot": "vscode",
-  "instruction": "Find me the keyboard shortcut of toggling integrated terminal. ",
-  "source": "https://www.youtube.com/watch?v=VqCgcpAypFQ",
-  "config": [],
-  "trajectory": "trajectories/3486f395-ad68-459c-8c39-ea07de934dd4",
-  "related_apps": [
-    "vscode"
-  ],
-  "evaluator": "evaluation_dir"
-}
diff --git a/evaluation_examples/examples/vs_code/4e60007a-f5be-4bfc-9723-c39affa0a6d3.json b/evaluation_examples/examples/vs_code/4e60007a-f5be-4bfc-9723-c39affa0a6d3.json
new file mode 100644
index 0000000..b36a7c8
--- /dev/null
+++ b/evaluation_examples/examples/vs_code/4e60007a-f5be-4bfc-9723-c39affa0a6d3.json
@@ -0,0 +1,46 @@
+{
+  "id": "4e60007a-f5be-4bfc-9723-c39affa0a6d3",
+  "snapshot": "vscode",
+  "instruction": "Install autoDocstring extension.",
+  "source": "https://campbell-muscle-lab.github.io/howtos_Python/pages/documentation/best_practices/vscode_docstring_extension/vscode_docstring_extension.html#:~:text=Type%2C%20Ctrl%20%2B%20Shift%20%2B%20P,select%20the%20NumPy%20docstring%20format.",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "code"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Visual Studio Code"
+      }
+    }
+  ],
+  "trajectory": "trajectories/4e60007a-f5be-4bfc-9723-c39affa0a6d3",
+  "related_apps": [
+    "vscode"
+  ],
+  "evaluator": {
+    "func": "is_extension_installed",
+    "result": {
+      "type": "vm_command_line",
+      "command": [
+        "code",
+        "--list-extensions",
+        "|",
+        "grep",
+        "njpwerner.autodocstring"
+      ]
+    },
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "type": "contain",
+        "expected": "njpwerner.autodocstring"
+      }
+    }
+  }
+}
diff --git a/evaluation_examples/examples/vs_code/515630d2-9b30-430c-b06a-e86b0143f7fb.json b/evaluation_examples/examples/vs_code/515630d2-9b30-430c-b06a-e86b0143f7fb.json
deleted file mode 100644
index 8f8bb5c..0000000
--- a/evaluation_examples/examples/vs_code/515630d2-9b30-430c-b06a-e86b0143f7fb.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "id": "515630d2-9b30-430c-b06a-e86b0143f7fb",
-  "snapshot": "vscode",
-  "instruction": "Help me enable automatically run code in VS code",
-  "source": "https://www.quora.com/How-do-I-automatically-run-code-in-Visual-Studio-Code",
-  "config": [],
-  "trajectory": "trajectories/",
-  "related_apps": [
-    "vscode"
-  ],
-  "evaluator": "evaluation_dir"
-}
diff --git a/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json b/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json
index 956138d..86250c7 100644
--- a/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json
+++ b/evaluation_examples/examples/vs_code/59ed65c7-e9a6-43db-833f-76d6730c0004.json
@@ -37,7 +37,7 @@
     "expected": {
       "type": "rule",
       "rules": {
-        "expect": "100"
+        "expect": "1"
       }
     },
     "result": {
diff --git a/evaluation_examples/examples/vs_code/6f7546b0-52f3-4938-9213-52f35454d314.json b/evaluation_examples/examples/vs_code/6f7546b0-52f3-4938-9213-52f35454d314.json
deleted file mode 100644
index 1dcb21e..0000000
--- a/evaluation_examples/examples/vs_code/6f7546b0-52f3-4938-9213-52f35454d314.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "id": "6f7546b0-52f3-4938-9213-52f35454d314",
-  "snapshot": "vscode",
-  "instruction": "Help me ask chatGPT to generate html and css code for a scroll bar?",
-  "source": "https://www.tiktok.com/@akramovdev/video/7243349980897922306",
-  "config": [],
-  "trajectory": "trajectories/",
-  "related_apps": [
-    "vscode"
-  ],
-  "evaluator": "evaluation_dir"
-}
diff --git a/evaluation_examples/examples/vs_code/90f6eeeb-f3c2-4c98-873c-e77d78a45578.json b/evaluation_examples/examples/vs_code/90f6eeeb-f3c2-4c98-873c-e77d78a45578.json
deleted file mode 100644
index 955efa2..0000000
--- a/evaluation_examples/examples/vs_code/90f6eeeb-f3c2-4c98-873c-e77d78a45578.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "id": "90f6eeeb-f3c2-4c98-873c-e77d78a45578",
-  "snapshot": "vscode",
-  "instruction": "Help me sync extensions and settings across all profiles.",
-  "source": "https://stackoverflow.com/questions/75866801/how-do-i-sync-extensions-and-their-settings-between-vs-code-profiles",
-  "config": [],
-  "trajectory": "trajectories/",
-  "related_apps": [
-    "vscode"
-  ],
-  "evaluator": "evaluation_dir"
-}
diff --git a/evaluation_examples/examples/vs_code/930fdb3b-11a8-46fe-9bac-577332e2640e.json b/evaluation_examples/examples/vs_code/930fdb3b-11a8-46fe-9bac-577332e2640e.json
new file mode 100644
index 0000000..a66ecdd
--- /dev/null
+++ b/evaluation_examples/examples/vs_code/930fdb3b-11a8-46fe-9bac-577332e2640e.json
@@ -0,0 +1,45 @@
+{
+  "id": "930fdb3b-11a8-46fe-9bac-577332e2640e",
+  "snapshot": "vscode",
+  "instruction": "I want to create a shortcut to shift my focus cursor from terminal to Editor in VS Code. Please help me create this shortcut to be 'ctrl+j'.",
+  "source": "https://superuser.com/questions/1270103/how-to-switch-the-cursor-between-terminal-and-code-in-vscode",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "code"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Visual Studio Code"
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "vscode"
+  ],
+  "evaluator": {
+    "func": "check_json_keybindings",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": 
+            {
+              "key": "ctrl+j",
+              "command": "workbench.action.focusActiveEditorGroup",
+              "when": "terminalFocus"
+            }
+      }
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/user/.config/Code/User/keybindings.json",
+      "dest": "keybindings.json"
+    }
+  }
+}
diff --git a/evaluation_examples/examples/vs_code/9439a27b-18ae-42d8-9778-5f68f891805e.json b/evaluation_examples/examples/vs_code/9439a27b-18ae-42d8-9778-5f68f891805e.json
new file mode 100644
index 0000000..2f848b2
--- /dev/null
+++ b/evaluation_examples/examples/vs_code/9439a27b-18ae-42d8-9778-5f68f891805e.json
@@ -0,0 +1,40 @@
+{
+  "id": "9439a27b-18ae-42d8-9778-5f68f891805e",
+  "snapshot": "vscode",
+  "instruction": "I want to keep my cursor focus in debug console when debugging in VS Code, instead of focusing back to Editor. So please help me modify the setting of VS Code accordingly.",
+  "source": "https://stackoverflow.com/questions/75832474/how-to-keep-cursor-in-debug-console-when-debugging-in-visual-studio-code",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "code"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Visual Studio Code"
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "vscode"
+  ],
+  "evaluator": {
+    "func": "check_json_settings",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": {"debug.focusEditorOnBreak": false}
+      }
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/user/.config/Code/User/settings.json",
+      "dest": "settings.json"
+    }
+  }
+}
diff --git a/evaluation_examples/examples/vs_code/9d425400-e9b2-4424-9a4b-d4c7abac4140.json b/evaluation_examples/examples/vs_code/9d425400-e9b2-4424-9a4b-d4c7abac4140.json
new file mode 100644
index 0000000..080bdf1
--- /dev/null
+++ b/evaluation_examples/examples/vs_code/9d425400-e9b2-4424-9a4b-d4c7abac4140.json
@@ -0,0 +1,40 @@
+{
+  "id": "9d425400-e9b2-4424-9a4b-d4c7abac4140",
+  "snapshot": "vscode",
+  "instruction": "I want to make tabs wrapped over multiple lines when exceeding available space, please help modify the setting of VS Code.",
+  "source": "https://superuser.com/questions/1466771/is-there-a-way-to-make-editor-tabs-stack-in-vs-code",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "code"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Visual Studio Code"
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "vscode"
+  ],
+  "evaluator": {
+    "func": "check_json_settings",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": {"workbench.editor.wrapTabs": true}
+      }
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/user/.config/Code/User/settings.json",
+      "dest": "settings.json"
+    }
+  }
+}
diff --git a/evaluation_examples/examples/vs_code/ae506c68-352c-4094-9caa-ee9d42052317.json b/evaluation_examples/examples/vs_code/ae506c68-352c-4094-9caa-ee9d42052317.json
new file mode 100644
index 0000000..6bac7ce
--- /dev/null
+++ b/evaluation_examples/examples/vs_code/ae506c68-352c-4094-9caa-ee9d42052317.json
@@ -0,0 +1,39 @@
+{
+  "id": "ae506c68-352c-4094-9caa-ee9d42052317",
+  "snapshot": "vscode",
+  "instruction": "Could you store the full terminal history of my VS Code terminal into '/home/user/Desktop/history.txt'?",
+  "source": "",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "code"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Visual Studio Code"
+      }
+    }
+  ],
+  "trajectory": "trajectories/ae506c68-352c-4094-9caa-ee9d42052317",
+  "related_apps": [
+    "vscode"
+  ],
+  "evaluator": {
+    "func": "compare_text_file",
+    "expected": {
+      "type": "cloud_file",
+      "path": "",
+      "dest": "gold_history.txt"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "Desktop/history.txt",
+      "dest": "history.txt"
+    }
+  }
+}
diff --git a/evaluation_examples/examples/vs_code/e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2.json b/evaluation_examples/examples/vs_code/e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2.json
new file mode 100644
index 0000000..306871a
--- /dev/null
+++ b/evaluation_examples/examples/vs_code/e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2.json
@@ -0,0 +1,42 @@
+{
+  "id": "e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
+  "snapshot": "vscode",
+  "instruction": "I want to disable the missing imports reporting of python error, please modify the setting of VS Code for me.",
+  "source": "https://superuser.com/questions/1386061/how-to-suppress-some-python-errors-warnings-in-vs-code",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "code"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Visual Studio Code"
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "vscode"
+  ],
+  "evaluator": {
+    "func": "check_json_settings",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": {
+          "python.analysis.diagnosticSeverityOverrides": {"reportMissingImports": "none"}
+        }
+      }
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/user/.config/Code/User/settings.json",
+      "dest": "settings.json"
+    }
+  }
+}
diff --git a/evaluation_examples/examples/vs_code/ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae.json b/evaluation_examples/examples/vs_code/ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae.json
new file mode 100644
index 0000000..f2667e9
--- /dev/null
+++ b/evaluation_examples/examples/vs_code/ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae.json
@@ -0,0 +1,47 @@
+{
+  "id": "ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
+  "snapshot": "vscode",
+  "instruction": "I want to remove the shortcut 'cmd+f' for Tree view Find (Explorer search) in VS Code explorer view due to shortcut conflict. Can you help me remove this shortcut?",
+  "source": ["https://superuser.com/questions/1748097/vs-code-disable-tree-view-find-explorer-search",
+              "https://superuser.com/questions/1417361/how-to-disable-file-filtering-in-vs-code-sidebar-explorer?rq=1"
+            ],
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "code"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Visual Studio Code"
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "vscode"
+  ],
+  "evaluator": {
+    "func": "check_json_keybindings",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "expect": 
+          {
+              "key": "cmd+f",
+              "command": "-list.find",
+              "when": "listFocus && listSupportsFind"
+          }
+      }
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/user/.config/Code/User/keybindings.json",
+      "dest": "keybindings.json"
+    }
+  }
+}

From 6807cc0cc2c5c3d699d49933ee335ea300007e49 Mon Sep 17 00:00:00 2001
From: tsuky_chen <3107760494@qq.com>
Date: Sat, 20 Jan 2024 22:22:06 +0800
Subject: [PATCH 10/13] add get Source 2 Doc Func

---
 .../Source2Doc/Get_Source_Doc - Sheet1.csv    | 272 ++++++++++++++++++
 .../Source2Doc/get_Source_Doc.py              |  89 ++++++
 2 files changed, 361 insertions(+)
 create mode 100644 resouce_collection/Source2Doc/Get_Source_Doc - Sheet1.csv
 create mode 100644 resouce_collection/Source2Doc/get_Source_Doc.py

diff --git a/resouce_collection/Source2Doc/Get_Source_Doc - Sheet1.csv b/resouce_collection/Source2Doc/Get_Source_Doc - Sheet1.csv
new file mode 100644
index 0000000..1b06f2c
--- /dev/null
+++ b/resouce_collection/Source2Doc/Get_Source_Doc - Sheet1.csv	
@@ -0,0 +1,272 @@
+id,Source
+94d95f96-9699-4208-98ba-3c3119edf9c2,https://help.ubuntu.com/lts/ubuntu-help/addremove-install.html.en
+bedcedc4-4d72-425e-ad62-21960b11fe0d,https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s
+43c2d64c-bab5-4dcb-a30c-b888321c319a,https://ubuntu.com/tutorials/command-line-for-beginners#4-creating-folders-and-files
+7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82,https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files
+ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3,https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s
+a462a795-fdc7-4b23-b689-e8b6df786b78,https://help.ubuntu.com/lts/ubuntu-help/shell-exit.html.en
+f9be0997-4b7c-45c5-b05c-4612b44a6118,https://help.ubuntu.com/lts/ubuntu-help/shell-notifications.html.en
+ae039631-2b12-4637-84f6-c67d51511be3,https://help.ubuntu.com/lts/ubuntu-help/net-default-browser.html.en
+e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15,https://help.ubuntu.com/lts/ubuntu-help/contacts-add-remove.html.en
+28cc3b7e-b194-4bc9-8353-d04c0f4d56d2,https://help.ubuntu.com/lts/ubuntu-help/sound-volume.html.en
+5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57,https://help.ubuntu.com/lts/ubuntu-help/files-recover.html.en
+e0df059f-28a6-4169-924f-b9623e7184cc,https://help.ubuntu.com/lts/ubuntu-help/files-rename.html.en
+ddc75b62-7311-4af8-bfb3-859558542b36,https://help.ubuntu.com/lts/ubuntu-help/addremove-remove.html.en
+5c433d22-ed9a-4e31-91f5-54cf3e8acd63,https://help.ubuntu.com/lts/ubuntu-help/session-language.html.zh-CN
+b6781586-6346-41cd-935a-a6b1487918fc,https://help.ubuntu.com/lts/ubuntu-help/clock-timezone.html.en
+b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa,https://help.ubuntu.com/lts/ubuntu-help/bluetooth-turn-on-off.html.en
+3ce045a0-877b-42aa-8d2c-b4a863336ab8,https://help.ubuntu.com/lts/ubuntu-help/a11y-font-size.html.en
+fe41f596-a71b-4c2f-9b2f-9dcd40b568c3,https://help.ubuntu.com/lts/ubuntu-help/power-percentage.html.en
+a4d98375-215b-4a4d-aee9-3d4370fccc41,https://help.ubuntu.com/lts/ubuntu-help/privacy-screen-lock.html.en
+765d2b74-88a7-4d50-bf51-34e4106fd24a,https://help.ubuntu.com/lts/ubuntu-help/files-delete.html.en
+cc9d4f34-1ca0-4a1b-8ff2-09302696acb9,https://superuser.com/questions/178587/how-do-i-detach-a-process-from-terminal-entirely
+5812b315-e7bd-4265-b51f-863c02174c28,https://superuser.com/questions/149404/create-an-ssh-user-who-only-has-permission-to-access-specific-folders
+c56de254-a3ec-414e-81a6-83d2ce8c41fa,https://superuser.com/questions/28426/how-to-extract-text-with-ocr-from-a-pdf-on-linux
+6ebbfb01-ea72-4226-a2a6-dc428e111ed2,https://superuser.com/questions/46748/how-do-i-make-bash-my-default-shell-on-ubuntu
+4d2b519e-e872-4100-8ea3-fe71ab0f9133,https://stackoverflow.com/questions/11530090/adding-a-new-entry-to-the-path-variable-in-zsh
+c288e301-e626-4b98-a1ab-159dcb162af5,https://stackoverflow.com/questions/41986507/unable-to-set-default-python-version-to-python3-in-ubuntu
+13584542-872b-42d8-b299-866967b5c3ef,https://superuser.com/questions/72176/linux-set-default-terminal-size-and-screen-position
+23393935-50c7-4a86-aeea-2b78fd089c5c,https://superuser.com/questions/91307/copying-only-jpg-from-a-directory-structure-to-another-location-linux
+f10b16e1-c160-4cb3-989f-7b2ec89bc073,https://www.wikihow.com/Install-Gnome-on-Ubuntu
+eb03d19a-b88d-4de4-8a64-ca0ac66f426b,https://www.youtube.com/shorts/t9JLUaT55UQ
+0bf05a7d-b28b-44d2-955a-50b41e24012a,https://www.youtube.com/shorts/FPAQaDTS8VY
+7b802dad-6e0f-4204-9815-d4e3f57627d8,https://www.youtube.com/shorts/Of-lzeP1usE
+7a4e4bc8-922c-4c84-865c-25ba34136be1,https://www.youtube.com/shorts/bvUhr1AHs44
+2bd59342-0664-4ccb-ba87-79379096cc08,https://www.youtube.com/shorts/L3Z-F1QTQFY
+a9f325aa-8c05-4e4f-8341-9e4358565f4f,https://www.youtube.com/shorts/A0gmEBRKXWs
+ecb0df7a-4e8d-4a03-b162-053391d3afaf,https://www.youtube.com/shorts/tXOovKn0H68
+7efeb4b1-3d19-4762-b163-63328d66303b,https://www.youtube.com/shorts/4jzXfZNhfmk
+4e6fcf72-daf3-439f-a232-c434ce416af6,https://www.youtube.com/shorts/0uxJccNCKcE
+6054afcb-5bab-4702-90a0-b259b5d3217c,https://www.youtube.com/shorts/JTbZ8sRxkdU
+abed40dc-063f-4598-8ba5-9fe749c0615d,https://www.youtube.com/shorts/xgf4ZpsEx5M
+01b269ae-2111-4a07-81fd-3fcd711993b0,https://www.youtube.com/shorts/VrUzPTIwQ04
+8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14,https://www.youtube.com/shorts/Hbcwu6IQ1ns
+af2b02f7-acee-4be4-8b66-499fab394915,https://www.youtube.com/shorts/AwKsb5VmtBI
+da1d63b8-fa12-417b-ba18-f748e5f770f3,https://www.youtube.com/shorts/hquscnbz2-U
+636380ea-d5f6-4474-b6ca-b2ed578a20f1,https://www.youtube.com/shorts/_BYL6VOHLGw
+5ba77536-05c5-4aae-a9ff-6e298d094c3e,https://www.youtube.com/shorts/CuBC1evUS5I
+4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b,https://www.youtube.com/shorts/1adQWfjN-tI
+672a1b02-c62f-4ae2-acf0-37f5fb3052b0,https://www.youtube.com/shorts/2rhdQXI4Lng
+648fe544-16ba-44af-a587-12ccbe280ea6,https://www.youtube.com/shorts/sOPBMWaC6Uc
+8985d1e4-5b99-4711-add4-88949ebb2308,https://www.youtube.com/shorts/J5ts2Acv9Pc
+9e606842-2e27-43bf-b1d1-b43289c9589b,https://www.youtube.com/shorts/B-mGYDFOyUs
+fcb6e45b-25c4-4087-9483-03d714f473a9,https://www.youtube.com/shorts/GZipp7nOZS0
+68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2,https://www.youtube.com/shorts/JEH5TsK-cCk
+fff629ea-046e-4793-8eec-1a5a15c3eb35,https://www.youtube.com/shorts/8WybtCdUT6w
+5c9a206c-bb00-4fb6-bb46-ee675c187df5,https://www.youtube.com/shorts/VbQtMNnq9i4
+e975ae74-79bd-4672-8d1c-dc841a85781d,https://www.youtube.com/shorts/GjT7gGe5Sr8
+34a6938a-58da-4897-8639-9b90d6db5391,https://www.youtube.com/shorts/gW37x2TkzOY
+b5a22759-b4eb-4bf2-aeed-ad14e8615f19,https://www.youtube.com/shorts/3xLa-D0C7Ic
+2f9913a1-51ed-4db6-bfe0-7e1c95b3139e,https://www.youtube.com/shorts/dGLRcmfVO6Q
+2558031e-401d-4579-8e00-3ecf540fb492,https://www.mrexcel.com/board/threads/sales-for-the-first-6-weeks.1249213/
+39aa4e37-dc91-482e-99af-132a612d40f3,"Add, Insert, Delete, Copy, Move, Rename a Worksheet in LibreOffice Calc (libreofficehelp.com)"
+0cecd4f3-74de-457b-ba94-29ad6b5dafb6,"Add, Insert, Delete, Copy, Move, Rename a Worksheet in LibreOffice Calc (libreofficehelp.com)"
+4188d3a4-077d-46b7-9c86-23e1a036f6c1,How to Freeze and Unfreeze Rows & columns in LibreOffice Calc - libreofficehelp.com
+51b11269-2ca8-4b2a-9163-f21758420e78,How to arrange numbers in a column from minimum to maximum or vice versa ? : LibreOfficeCalc (reddit.com)
+7e429b8d-a3f0-4ed0-9b58-08957d00b127,https://medium.com/@divyangichaudhari17/how-to-use-vlookup-and-hlookup-in-libre-calc-3370698bb3ff
+f5a90742-3fa2-40fc-a564-f29b054e0337,LibreOffice Calc: How to apply functions to columns? - Super User
+22df9241-f8d7-4509-b7f1-37e501a823f7,How do you move cells in LibreOffice Calc - Super User
+1434ca3e-f9e3-4db8-9ca7-b4c653be7d17,How to Remove Duplicates in Open Office Calc: 5 Steps (wikihow.com)
+347ef137-7eeb-4c80-a3bb-0951f26a8aff,Libre Office 7 Calc Spreadsheets Beginners Tutorial (youtube.com)
+6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5,Learn LibreOffice Calc 6 in Under 30 MInutes | Complete Tutorial for Beginners (youtube.com)
+3aaa4e37-dc91-482e-99af-132a612d40f3,How to import/export CSV files with LibreOffice Calc (or OpenOffice) - Quora
+0decd4f3-74de-457b-ba94-29ad6b5dafb6,https://justclickhere.co.uk/resources/checkboxes-tick-boxes-libreoffice-calc/
+37608790-6147-45d0-9f20-1137bb35703d,https://www.youtube.com/shorts/uzPo_CPCHH8
+f9584479-3d0d-4c79-affa-9ad7afdd8850,https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb
+d681960f-7bc3-4286-9913-a8812ba3261a,https://www.youtube.com/shorts/d7U1S_IsTVM
+f6a90742-3fa2-40fc-a564-f29b054e0337,https://www.excel-easy.com/examples/drop-down-list.html
+21df9241-f8d7-4509-b7f1-37e501a823f7,https://www.youtube.com/watch?v=p5C4V_AO1UU
+1334ca3e-f9e3-4db8-9ca7-b4c653be7d17,https://techcommunity.microsoft.com/t5/excel/excel-workbook-top-way-too-big-can-t-see-rows-and-columns/m-p/4014694
+357ef137-7eeb-4c80-a3bb-0951f26a8aff,https://www.reddit.com/r/excel/comments/17zny8u/calculating_total_amount_earned_from_total_hours/
+6f99a1ad-07d2-4b66-a1ce-ece6d99c20a5,https://techcommunity.microsoft.com/t5/excel/sumarize-the-sheetnames/m-p/4014716
+aa3a8974-2e85-438b-b29e-a64df44deb4b,https://www.quora.com/Libre-Office-Calc-How-do-I-resize-all-cells-in-a-sheet-to-make-them-fit-to-1-page-for-printing-and-exporting-as-PDF
+a01fbce3-2793-461f-ab86-43680ccbae25,https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc
+4f07fbe9-70de-4927-a4d5-bb28bc12c52c,https://superuser.com/questions/1081048/libreoffice-calc-how-to-pad-number-to-fixed-decimals-when-used-within-formula
+e3b1d5fa-ed00-4129-bda1-1452bd2b6772,https://www.reddit.com/r/libreoffice/comments/tel112/calc_how_to_calculate_sum_by_categories/
+ca6a9524-f8e9-4d2f-9364-ab0cad567739,https://www.reddit.com/r/libreoffice/comments/113gmyc/how_to_remove_certain_text_from_cells_in_calc/
+a455e8d0-930f-40d2-9575-5e8d2d222f58,https://superuser.com/questions/562944/quickly-fill-blank-cells-in-a-list-in-libreoffice-calc
+83ee22c6-7737-49ce-9b5a-138c3e92af04,https://superuser.com/questions/661102/currency-conversion-in-libreoffice-calc
+819f61c2-ec77-4d3f-9996-0838ae5aacc8,https://superuser.com/questions/381696/creating-a-column-of-working-days-in-libreoffice-calc
+69d577b3-004e-4bca-89b2-0d7c2f6049e3,https://superuser.com/questions/387106/libreoffice-calc-how-to-get-total-for-hhmmss-cells
+0a1bf4ca-d4ea-4618-baa5-6e8dc1b46d82,https://superuser.com/questions/571915/sum-up-to-n-highest-value-out-of-a-series
+ac9bb6cb-1888-43ab-81e4-a98a547918cd,https://superuser.com/questions/1674211/how-to-change-colour-of-slide-number-in-libre-office
+5d901039-a89c-4bfb-967b-bf66f4df075e,https://superuser.com/questions/986776/how-can-i-stretch-an-image-in-a-libreoffice-impress-presentation-to-fill-the-pag
+071d4ace-091a-4ec3-886e-f4be55ae375d,https://superuser.com/questions/706860/hide-slide-numbers-and-slide-footer-on-first-and-second-slide-in-libreoffice-imp?rq=1
+550ce7e7-747b-495f-b122-acdc4d0b8e54,https://superuser.com/questions/1211035/libreoffice-impress-animations-how-to-strikethrough-on-click?rq=1
+455d3c66-7dc6-4537-a39a-36d3e9119df7,https://stackoverflow.com/questions/75626383/how-export-libreoffice-impress-to-image
+af23762e-2bfd-4a1d-aada-20fa8de9ce07,https://superuser.com/questions/1059080/how-to-make-a-summary-slide-in-impress-listing-the-titles-of-all-slides-autom
+c59742c0-4323-4b9d-8a02-723c251deaa0,https://www.reddit.com/r/libreoffice/comments/17lcdrp/audio_not_supported_in_libreoffice_impress/
+39478d4a-1049-456f-aa77-407811393add,https://www.reddit.com/r/libreoffice/comments/jul3o8/putting_cap_or_hat_or_carat_symbol_in_libre/
+82615ce3-95fb-4bba-abcd-daa85753e282,https://stackoverflow.com/questions/72898114/how-to-change-the-appearance-of-a-button-once-it-has-been-clicked-in-libre-impre
+c3ad4442-499f-4e58-bc4e-1a1417ea9b8c,http://maharajacollege.ac.in/material/Libreofficeimpresspdf.pdf
+ef9d12bd-bcee-4ba0-a40e-918400f43ddf,(1) I closed the slide pannel on the left and idk how to get it back please help : libreoffice (reddit.com)
+9ec204e4-f0a3-42f8-8458-b772a6797cab,How to Copy a LibreOffice Impress Master Slide｜TikTok 搜索
+35761501-3a99-48d1-8ec6-cea8242349b6,(1) How do I compress a selection of images in LibreOffice Impress? - Quora
+0f84bef9-9790-432e-92b7-eece357603fb,ubuntu - how to disable LibreOffice Impress to use multiple display? - Stack Overflow
+ce88f674-ab7a-43da-9201-468d38539e4a,How to Change Slides to Portrait in LibreOffice Impress - Just Click Here - IT Training
+f0a334af-f91b-4c03-b578-aac9bec2b543,Insert Video in Impress Presentation on LibreOffice (libreofficehelp.com)
+3b27600c-3668-4abd-8f84-7bcdebbccbdb,Change Slide Background Color in LibreOffice Impress (libreofficehelp.com)
+a097acff-6266-4291-9fbd-137af7ecd439,https://www.youtube.com/watch?v=DDmEvjs4iBw
+21760ecb-8f62-40d2-8d85-0cee5725cb72,Add Animations and Transitions in LibreOffice Impress Slides - libreofficehelp.com
+3cc4f35d-fa2e-4555-afb9-741b7c062a74,Impress Guide 7.6 (libreoffice.org)
+6ada715d-3aae-4a32-a6a7-429b2e43fb93,https://www.quora.com/How-do-you-insert-images-into-a-LibreOffice-Writer-document
+ecc2413d-8a48-416e-a3a2-d30106ca36cb,https://www.quora.com/How-can-I-insert-a-blank-page-on-libreoffice
+0e47de2a-32e0-456c-a366-8c607ef7a9d2,https://ask.libreoffice.org/t/how-to-start-page-numbering-on-a-certain-page/39931/4
+4bcb1253-a636-4df4-8cb0-a35c04dfef31,https://www.libreofficehelp.com/save-export-writer-documents-in-pdf-epub-format/
+0810415c-bde4-4443-9047-d5f70165a697,https://www.youtube.com/watch?v=Q_AaL6ljudU
+e528b65e-1107-4b8c-8988-490e4fece599,https://www.youtube.com/watch?v=l25Evu4ohKg
+66399b0d-8fda-4618-95c4-bfc6191617e9,https://www.youtube.com/watch?v=l25Evu4ohKg
+936321ce-5236-426a-9a20-e0e3c5dc536f,https://www.youtube.com/watch?v=l25Evu4ohKg
+663876c7-3471-43db-ba51-f410b13d9d7d,https://askubuntu.com/questions/319593/how-to-type-science-equations-in-libre-office
+3ef2b351-8a84-4ff2-8724-d86eae9b842e,https://askubuntu.com/questions/1066351/how-do-you-center-align-in-libreoffice#:~:text=Ctrl%20%2B%20e%20will%20Center%20align%20the%20cursor%20for%20you.
+45d61a06-6545-4422-97b7-bc76cfa964c1,https://stackoverflow.com/questions/71685737/how-to-replace-all-newlines-with-paragraph-marks-in-libreoffice-write
+0b17a146-2934-46c7-8727-73ff6b6483e8,https://askubuntu.com/questions/245695/how-do-you-insert-subscripts-and-superscripts-into-ordinary-non-formula-text-i
+0e763496-b6bb-4508-a427-fad0b6c3e195,https://ask.libreoffice.org/t/how-do-i-change-the-font-for-the-whole-document-in-writer/9220
+f178a4a9-d090-4b56-bc4c-4b72a61a035d,https://ask.libreoffice.org/t/how-do-i-make-times-new-roman-the-default-font-in-lo/64604
+0a0faba3-5580-44df-965d-f562a99b291c,https://stackoverflow.com/questions/64528055/how-to-make-part-of-my-sentence-left-aligned-and-rest-as-right-aligned
+e246f6d8-78d7-44ac-b668-fcf47946cb50,https://ask.libreoffice.org/t/how-to-change-text-size-color-of-italic-font/77712
+8472fece-c7dd-4241-8d65-9b3cd1a0b568,https://stackoverflow.com/questions/37259827/libreoffice-writer-how-to-set-different-colors-to-each-letter
+88fe4b2d-3040-4c70-9a70-546a47764b48,https://stackoverflow.com/questions/56554555/libreoffice-writer-how-to-create-empty-line-space-after-every-period-in-a-par
+6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2,https://superuser.com/questions/762500/how-do-i-find-all-highlighted-text-in-libreoffice-writer
+d53ff5ee-3b1a-431e-b2be-30ed2673079b,https://ask.libreoffice.org/t/how-to-convert-all-uppercase-to-lowercase/53341
+72b810ef-4156-4d09-8f08-a0cf57e7cefe,https://superuser.com/questions/657792/libreoffice-writer-how-to-apply-strikethrough-text-formatting?rq=1
+6f81754e-285d-4ce0-b59e-af7edb02d108,https://superuser.com/questions/789473/remove-duplicate-lines-in-libreoffice-openoffice-writer
+41c621f7-3544-49e1-af8d-dafd0f834f75,https://superuser.com/questions/1668018/how-to-auto-format-lines-in-libre-office-writer
+b21acd93-60fd-4127-8a43-2f5178f4a830,https://superuser.com/questions/1097199/how-can-i-double-space-a-document-in-libreoffice?rq=1
+59f21cfb-0120-4326-b255-a5b827b38967,Media — VLC Desktop User Documentation 3.0 documentation (videolan.me)
+8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89,Recording what your are playing — VLC Desktop User Documentation 3.0 documentation (videolan.me)
+8f080098-ddb1-424c-b438-4e96e5e4786e,https://medium.com/@jetscribe_ai/how-to-extract-mp3-audio-from-videos-using-vlc-media-player-beeef644ebfb
+bba3381f-b5eb-4439-bd9e-80c22218d5a7,(1) How do I play online videos using the VLC media player? - Quora
+a1c3ab35-02de-4999-a7ed-2fd12c972c6e,(1) How do I compress a video with VLC? - Quora
+fba2c100-79e8-42df-ae74-b592418d54f4,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s
+d70666e4-7348-42c7-a06a-664094c5df3c,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s
+efcf0d81-0835-4880-b2fd-d866e8bc2294,"https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s, https://help.ubuntu.com/stable/ubuntu-help/look-background.html.en"
+8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s
+aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6,https://videoconverter.wondershare.com/vlc/how-to-rotate-a-video-using-vlc.html?gad_source=1&gclid=CjwKCAiA-vOsBhAAEiwAIWR0TaGSOLkYiBeVQGZSyfeUP3g-tIvYxffl5RFIu0-zrUL1IF41eCw1JRoCnCMQAvD_BwE
+386dbd0e-0241-4a0a-b6a2-6704fba26b1c,https://superuser.com/questions/1708415/pause-and-play-vlc-in-background?rq=1
+9195653c-f4aa-453d-aa95-787f6ccfaae9,https://superuser.com/questions/1513285/how-can-i-increase-the-maximum-volume-output-by-vlc?rq=1
+5ac2891a-eacd-4954-b339-98abba077adb,"https://superuser.com/questions/1412810/how-to-prevent-vlc-media-player-from-auto-closing-after-video-end#:%7E:text=Click%20on%20%22Media%22on%20the,VLC%20player%20after%20video%20ending"
+2f24d255-a3af-4ab3-9dc3-66ff792417e7,https://superuser.com/questions/1753588/vlc-crop-hide-part-of-screen
+0d95d28a-9587-433b-a805-1fbe5467d598,https://superuser.com/questions/1299036/vlc-how-to-open-the-folder-of-the-current-playing-video?noredirect=1&lq=1
+d06f0d4d-2cd5-4ede-8de9-598629438c6e,https://superuser.com/questions/1039392/changing-colour-of-vlc-volume-slider
+a5bbbcd5-b398-4c91-83d4-55e1e31bbb81,https://superuser.com/questions/776056/how-to-hide-bottom-toolbar-in-vlc
+f3977615-2b45-4ac5-8bba-80c17dbe2a37,https://www.reddit.com/r/Fedora/comments/rhljzd/how_to_run_multiple_instances_of_vlc_media_player/
+c669a35f-d45a-450e-b1f2-f473748337bb,https://www.quora.com/How-do-I-fast-forward-a-video-in-VLC-player
+d1ba14d0-fef8-4026-8418-5b581dc68ca0,https://superuser.com/questions/306154/how-to-use-a-b-repeat-feature-of-vlc
+215dfd39-f493-4bc3-a027-8a97d72c61bf,https://superuser.com/questions/1224784/how-to-change-vlcs-splash-screen
+bb5e4c0d-f964-439c-97b6-bdb9747de3f4,How to Remove an Email Account from Thunderbird: 4 Steps (wikihow.com)
+7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,How to Access Gmail With Mozilla Thunderbird: 7 Steps (wikihow.com)
+b188fe10-ae67-4db8-a154-26a0b8ff8f1e,(1) Restore readability in message list pane : Thunderbird (reddit.com)
+12086550-11c0-466b-b367-1d9e75b3910e,https://www.bitrecover.com/blog/manage-thunderbird-profiles/
+06fe7178-4491-4589-810f-2e2bc9502122,(1) How do I backup email files in Mozilla Thunderbird? - Quora
+6766f2b8-8a72-417f-a9e5-56fcaa735837,"https://www.adsigner.com/user-manual/signatures/setup-email-client-thunderbird/#:~:text=is%20probably%20hidden.-,Right%20click%20on%20the%20empty%20space%20at%20the%20top%20of,signature%20from%20a%20file%20instead."
+e1e75309-3ddb-4d09-92ec-de869c928143,https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters
+3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5,https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters
+35253b65-1c19-4304-8aa4-6884b8218fc0,https://support.mozilla.org/en-US/questions/1259354
+d088f539-cab4-4f9a-ac92-9999fc3a656e,https://support.mozilla.org/en-US/kb/how-use-attachments
+2ad9387a-65d8-4e33-ad5b-7580065a27ca,"https://support.mozilla.org/bm/questions/1027435, https://www.wikihow.tech/Create-Folders-in-Mozilla-Thunderbird"
+480bcfea-d68f-4aaa-a0a9-2589ef319381,https://www.reddit.com/r/Thunderbird/comments/182dg5p/unified_inbox_howto/
+37b9808f-b2b4-4177-ab00-9ddfae4bad27,https://www.quora.com/How-can-I-schedule-Mozilla-Thunderbird-to-turn-off-automatically
+af630914-714e-4a24-a7bb-f9af687d3b91,https://stackoverflow.com/questions/11333148/adding-a-toolbar-button-to-a-thundebird-compose-message-window?rq=3
+ae78f875-5b98-4907-bbb5-9c737fc68c03,https://stackoverflow.com/questions/22323746/thunderbird-extension-add-button-in-message-reader-toolbar?rq=3
+3299584d-8f11-4457-bf4c-ce98f7600250,https://superuser.com/questions/1643561/would-like-to-see-the-email-address-from-sender-in-the-column
+030eeff7-b492-4218-b312-701ec99ee0cc,https://superuser.com/questions/1781004/how-do-i-remove-the-indentation-and-character-in-quoted-text-of-a-reply-mess
+94760984-3ff5-41ee-8347-cf1af709fea0,https://superuser.com/questions/1757333/how-can-i-view-thunderbird-in-full-dark-mode
+99146c54-4f37-4ab8-9327-5f3291665e1e,https://superuser.com/questions/1764409/how-to-send-email-with-thunderbird-without-configuring-an-incoming-email-service
+9656a811-9b5b-4ddf-99c7-5117bcef0626,https://superuser.com/questions/205240/is-there-a-way-to-get-a-popup-confirmation-box-when-you-send-an-email-in-thunder?rq=1
+c9e7eaf2-b1a1-4efc-a982-721972fa9f02,https://superuser.com/questions/544480/how-to-apply-automatic-message-filters-to-subfolders-too?noredirect=1&lq=1
+bb5e4c0d-f964-439c-97b6-bdb9747de3f4,Set your default search engine & site search shortcuts - Computer - Google Chrome Help
+7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,"Clear, allow and manage cookies in Chrome - Computer - Google Chrome Help"
+12086550-11c0-466b-b367-1d9e75b3910e,(1) What are the cool tricks to use Google Chrome? - Quora
+06fe7178-4491-4589-810f-2e2bc9502122,4 Ways to Switch Tabs in Chrome - wikiHow
+6766f2b8-8a72-417f-a9e5-56fcaa735837,https://support.google.com/chrome/thread/205881926/it-s-possible-to-load-unpacked-extension-automatically-in-chrome?hl=en
+e1e75309-3ddb-4d09-92ec-de869c928143,https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php
+3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5,https://in5stepstutorials.com/google-chrome/add-change-delete-autofill-address.php
+35253b65-1c19-4304-8aa4-6884b8218fc0,"https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome, https://www.reddit.com/r/chrome/comments/13xcbap/crete_shortcut_option_missing/"
+d088f539-cab4-4f9a-ac92-9999fc3a656e,https://medium.com/@inkverseuk2/useful-tips-and-tricks-for-the-google-chrome-browser-ac7d0d24b3cc
+2ad9387a-65d8-4e33-ad5b-7580065a27ca,https://www.youtube.com/watch?v=IN-Eq_UripQ
+7a5a7856-f1b6-42a4-ade9-1ca81ca0f263,https://www.youtube.com/watch?v=ZaZ8GcTxjXA
+3720f614-37fd-4d04-8a6b-76f54f8c222d,https://superuser.com/questions/984668/change-interface-language-of-chrome-to-english
+b63059a2-53bc-4163-a89f-3ac948c74081,https://superuser.com/questions/1303418/how-do-i-make-chrome-block-absolutely-all-pop-ups?rq=1
+44ee5668-ecd5-4366-a6ce-c1c9b8d4e938,https://superuser.com/questions/1787991/clear-browsing-history-from-specific-site-on-chrome
+b5ebc8c6-6329-4373-85b4-9421c97375e9,https://superuser.com/questions/364470/is-there-a-way-to-view-google-chrome-browsing-history-past-three-months-ago?rq=1
+93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9,https://superuser.com/questions/1417973/how-to-disable-google-chrome-dark-mode
+2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3,https://superuser.com/questions/1393683/how-to-change-the-username-in-google-chrome-profiles?rq=1
+480bcfea-d68f-4aaa-a0a9-2589ef319381,https://bugartisan.medium.com/disable-the-new-chrome-ui-round-in-2023-db168271f71e
+37b9808f-b2b4-4177-ab00-9ddfae4bad27,https://www.reddit.com/r/chrome/comments/17niw3h/tutorial_how_to_disable_the_download_bubble_in/
+af630914-714e-4a24-a7bb-f9af687d3b91,https://www.howtogeek.com/680260/how-to-change-chromes-default-text-size/
+ae78f875-5b98-4907-bbb5-9c737fc68c03,https://support.google.com/chrome/thread/219988391/increase-search-results-per-page?hl=en
+0ed39f63-6049-43d4-ba4d-5fa2fe04a951,(1) How do you find and replace text in Visual Studio Code? - Quora
+b421106e-b282-4c41-af72-37c95493f95f,visual studio code - Launch VScode with new.txt file - Stack Overflow
+53ad5833-3455-407b-bbc6-45b4c79ab8fb,Visual Studio Code Tutorial for Beginners - Introduction (youtube.com)
+eabc805a-bfcf-4460-b250-ac92135819f6,Visual Studio Code Tutorial for Beginners - Introduction - YouTube
+3486f395-ad68-459c-8c39-ea07de934dd4,Visual Studio Code Tutorial for Beginners - Introduction - YouTube
+982d12a5-beab-424f-8d38-d2a48429e511,VSCode Tutorial For Beginners - Getting Started With VSCode (youtube.com)
+4e60007a-f5be-4bfc-9723-c39affa0a6d3,"https://campbell-muscle-lab.github.io/howtos_Python/pages/documentation/best_practices/vscode_docstring_extension/vscode_docstring_extension.html#:~:text=Type%2C%20Ctrl%20%2B%20Shift%20%2B%20P,select%20the%20NumPy%20docstring%20format."
+e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2,https://superuser.com/questions/1386061/how-to-suppress-some-python-errors-warnings-in-vs-code
+9439a27b-18ae-42d8-9778-5f68f891805e,https://stackoverflow.com/questions/75832474/how-to-keep-cursor-in-debug-console-when-debugging-in-visual-studio-code
+ae506c68-352c-4094-9caa-ee9d42052317,https://superuser.com/questions/1460404/get-visual-studio-code-terminal-history?rq=1
+ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae,https://superuser.com/questions/1748097/vs-code-disable-tree-view-find-explorer-search
+c714dcee-cad3-4e12-8f3c-12bdcfcdb048,https://superuser.com/questions/1417361/how-to-disable-file-filtering-in-vs-code-sidebar-explorer?rq=1
+930fdb3b-11a8-46fe-9bac-577332e2640e,https://superuser.com/questions/1270103/how-to-switch-the-cursor-between-terminal-and-code-in-vscode
+276cc624-87ea-4f08-ab93-f770e3790175,https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code
+9d425400-e9b2-4424-9a4b-d4c7abac4140,https://superuser.com/questions/1466771/is-there-a-way-to-make-editor-tabs-stack-in-vs-code
+7a4deb26-d57d-4ea9-9a73-630f66a7b568,https://www.quora.com/How-do-I-edit-a-photo-in-GIMP
+554785e9-4523-4e7a-b8e1-8016f565f56a,https://www.quora.com/How-do-I-edit-a-photo-in-GIMP
+77b8ab4d-994f-43ac-8930-8ca087d7c4b4,https://superuser.com/questions/1636113/how-to-get-gimp-to-recognize-images-or-pictures-folder-as-the-default-folder-for
+f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce,https://superuser.com/questions/612338/how-do-i-select-and-move-an-object-in-gimp
+d52d6308-ec58-42b7-a2c9-de80e4837b2b,https://superuser.com/questions/1447106/how-to-get-rid-of-the-gimp-tool-options-box
+2a729ded-3296-423d-aec4-7dd55ed5fbb3,https://www.youtube.com/watch?v=lOzSiOIipSM
+b148e375-fe0b-4bec-90e7-38632b0d73c2,https://www.quora.com/How-do-I-add-layers-in-GIMP
+a746add2-cab0-4740-ac36-c3769d9bfb46,https://www.youtube.com/watch?v=_L_MMU22bAw
+7b7617bd-57cc-468e-9c91-40c4ec2bcb3d,https://www.youtube.com/watch?v=G_PjQAy0iiU
+d16c99dc-2a1e-46f2-b350-d97c86c85c15,https://stackoverflow.com/questions/75185543/use-gimp-to-resize-image-in-one-layer-only
+573f79b5-abfe-4507-b455-251d45fe6198,https://stackoverflow.com/questions/45196895/gimp-add-padding-to-multiple-images
+06ca5602-62ca-47f6-ad4f-da151cde54cc,https://stackoverflow.com/questions/74664666/how-to-export-palette-based-png-in-gimp
+fa9b1e10-4d2d-4a13-af76-7efa822b6a8b,https://stackoverflow.com/questions/24626608/how-to-combine-several-png-images-as-layers-in-a-single-xcf-image
+6b2b72ed-3a10-4849-876a-750f7cdf3886,https://stackoverflow.com/questions/21018007/resize-image-to-fit-canvas-gimp
+d0e42fd2-d290-46b3-b598-a6e2b7be9c85,https://stackoverflow.com/questions/56758689/stop-gimp-from-merging-layers-when-de-selecting
+e2dd0213-26db-4349-abe5-d5667bfd725c,https://superuser.com/questions/839650/how-to-move-an-inserted-text-box-in-gimp
+f723c744-e62c-4ae6-98d1-750d3cd7d79d,https://www.reddit.com/r/GIMP/comments/12e57w8/how_to_use_gimp_to_exaggerate_contrast/
+8d6b1c9c-1aab-47fe-9ba5-e84c838d0c57,https://www.quora.com/How-can-email-attachments-be-converted-into-a-word-document-using-Mozilla-Thunderbird
+11e1e614-9696-4d94-88c9-8e556880d41a,https://ifttt.com/applets/L2A89geP-send-chrome-software-update-release-alerts-to-email
+57956154-f0fe-486b-88b8-e7126da035a9,https://zapier.com/apps/email/integrations/google-sheets/547/get-email-notifications-for-new-rows-in-a-google-sheets-spreadsheet
+ec14c524-b245-456d-abd6-ec12c746e9f8,https://zapier.com/apps/gmail/integrations/google-sheets/2618/save-new-gmail-emails-matching-certain-traits-to-a-google-spreadsheet
+cbf5fbda-425e-4619-bcf2-0ea8d4c0bfa3,https://zapier.com/apps/google-sheets/integrations/google-slides/13919/refresh-charts-on-a-google-slides-presentation-when-rows-are-updated-on-google-sheets
+a54284d0-7b93-4327-bfcc-3a421516dbdd,https://superuser.com/questions/655622/cannot-drag-images-from-thunderbird-to-word
+58565672-7bfe-48ab-b828-db349231de6b,https://superuser.com/questions/1792660/open-link-from-other-application-does-not-open-the-url-in-firefox
+6d72aad6-187a-4392-a4c4-ed87269c51cf,https://superuser.com/questions/923171/converting-openoffice-impress-presentation-to-video-without-screen-recording
+937087b6-f668-4ba6-9110-60682ee33441,https://superuser.com/questions/187440/set-default-ubuntu-video-player-as-vlc
+f8cfa149-d1c1-4215-8dac-4a0932bad3c2,https://superuser.com/questions/1803088/libreoffice-calc-clears-clipboard
+5e974913-6905-4c3f-8b65-d7837f3931cc,https://stackoverflow.com/questions/61856141/how-can-i-start-thunderbird-and-minimize-the-window-on-startup-in-ubuntu
+7c179dad-f1c7-4892-b53f-d1c4023d23c7,https://stackoverflow.com/questions/21155085/pasting-excel-tables-in-thunderbird-e-mail-client
+4a68b2dd-70f2-4532-9bc1-d21878bd8cb2,https://stackoverflow.com/questions/65669955/thunderbird-how-to-send-a-mail-to-all-receivers-of-a-folder
+c8457fde-b14b-4aba-b402-144842ea29e1,https://stackoverflow.com/questions/65788200/how-to-open-xlsx-files-in-ms-excel-from-vs-code
+81c425f5-78f3-4771-afd6-3d2973825947,1) https://www.youtube.com/watch?v=PkITfDF-aOw    2) https://www.zyxware.com/articles/3770/how-to-transfer-data-in-libreoffice-calc-to-libreoffice-writer-in-table-format
+bb83cab4-e5c7-42c7-a67b-e46068032b86,https://ask.libreoffice.org/t/save-impress-presentation-as-writer-document/5291/4
+227d2f97-562b-4ccb-ae47-a5ec9e142fbb,https://discourse.gnome.org/t/gimp-and-libre-office-writer/15430/4
+a6bbc08c-51e9-4ee4-9327-83d05075d960,https://forum.openoffice.org/en/forum/viewtopic.php?t=105055
+964e6e03-ba31-466b-8c15-5a351a81f675,https://www.maketecheasier.com/mail-merge-thunderbird-calc/
+2fe4b718-3bd7-46ec-bdce-b184f5653624,https://www.thewindowsclub.com/how-to-create-animated-gif-from-a-video-file-using-vlc-and-gimp
+d02b9364-6bb0-4c7e-9dbd-4db62822bc26,https://stackoverflow.com/questions/38306910/simple-python-script-to-get-a-libreoffice-base-field-and-play-on-vlc
+57fb469b-127a-46fa-8281-bbb3840efdf5,https://support.mozilla.org/en-US/questions/1150626
+3680a5ee-6870-426a-a997-eba929a0d25c,https://unix.stackexchange.com/questions/510850/how-to-open-calc-from-terminal-and-insert-files
+2d8c8a20-6f54-4c2e-ad56-61fbe7af6b78,https://www.quora.com/How-do-I-force-LibreOffice-Calc-to-recalculate-a-spreadsheet-from-the-command-line
+ee9a3c83-f437-4879-8918-be5efbb9fac7,https://stackoverflow.com/questions/64589140/convert-ods-to-csv-using-command-line-when-libreoffice-instance-is-running
+f7dfbef3-7697-431c-883a-db8583a4e4f9,https://www.thegeekdiary.com/libreoffice-command-examples-in-linux/
+2b9493d7-49b8-493a-a71b-56cd1f4d6908,https://devicetests.com/kill-libreoffice-writer-command-line-ubuntu
+51f5801c-18b3-4f25-b0c3-02f85507a078,https://github.com/danielrcollins1/ImpressExtractNotes
+81de345e-5473-4cb6-a74d-b6abf3475a6a,https://stackoverflow.com/questions/45588952/how-can-i-compose-and-send-email-in-thunderbird-from-commandline
+2c9fc0de-3ee7-45e1-a5df-c86206ad78b5,https://nikki-ricks.medium.com/how-to-use-git-add-commit-and-push-in-vs-code-and-command-line-35c0e8c47b62
+510f64c8-9bcc-4be1-8d30-638705850618,https://www.geeksforgeeks.org/how-to-start-vs-code-from-the-terminal-command-line/
+9ff484f7-5c09-4398-ae29-d5904e59e138,https://stackoverflow.com/questions/38606973/playing-opening-and-pausing-vlc-command-line-executed-from-python-scripts
+d9b7c649-c975-4f53-88f5-940b29c47247,Extract the first 1000 Gmail Emails from the current month in a new Google Sheets report - RPA Component | UiPath Marketplace | Overview
+be4ef0dc-0f70-4936-81d8-3cd2b04482f8,Table Data Extraction for Sales Opportunities to Excel workbook - RPA Component | UiPath Marketplace | Overview
+78aed49a-a710-4321-a793-b611a7c5b56b,Upload email attachments from Gmail to Google Drive - RPA Component | UiPath Marketplace | Overview
+897e3b53-5d4d-444b-85cb-2cdc8a97d903,Convert Word file to PDF and store in OneDrive - RPA Component | UiPath Marketplace | Overview
+4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc,Extract data from a new invoice file in Google Drive and store it in Google Sheets - RPA Component | UiPath Marketplace | Overview
+b52b40a5-ad70-4c53-b5b0-5650a8387052,Merge PDFs from Gmail email attachments and upload to Gogle Drive - RPA Component | UiPath Marketplace | Overview
+46407397-a7d5-4c6b-92c6-dbe038b1457b,Upload to Google Drive images from PDF attachments received via Gmail - RPA Component | UiPath Marketplace | Overview
+a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb,Backup important emails to OneDrive or SharePoint - RPA Component | UiPath Marketplace | Overview
+665f4af1-617d-4009-baff-84ff66071e6a,https://www.howtogeek.com/663927/how-to-open-google-chrome-using-command-prompt-on-windows-10/#open-chrome-straight-to-a-specific-website
+e6313b30-3903-4ed9-8c7d-4c47bf51fc96,https://stackoverflow.com/questions/12258086/how-do-i-run-google-chrome-as-root
\ No newline at end of file
diff --git a/resouce_collection/Source2Doc/get_Source_Doc.py b/resouce_collection/Source2Doc/get_Source_Doc.py
new file mode 100644
index 0000000..959e4cf
--- /dev/null
+++ b/resouce_collection/Source2Doc/get_Source_Doc.py
@@ -0,0 +1,89 @@
+import csv
+import os
+import yt_dlp as youtube_dl
+from docx import Document
+import requests
+from bs4 import BeautifulSoup
+from PIL import Image
+import pytesseract
+from io import BytesIO
+from docx import Document
+import re
+
+# convert .vtt file to .docx file
+def vtt_to_docx(vtt_filepath, docx_filepath):
+    doc = Document()
+    
+    # open .vtt file
+    with open(vtt_filepath, 'r', encoding='utf-8') as file:
+        lines = file.readlines()
+    
+    # apply regex to each line to check if it is a timecode
+    vtt_text_pattern = re.compile(r'^\d{2}:\d{2}:\d{2}.\d{3} --> \d{2}:\d{2}:\d{2}.\d{3}')
+    
+    # deal with each line
+    for line in lines:
+        # if it is a timecode, skip it
+        if vtt_text_pattern.match(line) or 'WEBVTT' in line:
+            continue
+        # else, add it to the document
+        if line.strip(): 
+            doc.add_paragraph(line.strip())
+
+    doc.save(docx_filepath)
+
+
+# download youtube subtitles and convert them to .docx file
+def download_youtube_subtitles(video_url, doc_filename):
+    ydl_opts = {
+        'skip_download': True,
+        'writeautomaticsub': True,
+        'subtitleslangs': ['en'],
+        'outtmpl': f'{doc_filename}.%(ext)s',
+        'quiet': True,
+    }
+    
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([video_url])
+    
+    # call vtt_to_docx function to convert .vtt file to .docx file
+    vtt_to_docx(f'/content/{doc_filename}.en.vtt', f'/content/{doc_filename}.docx')
+
+# scrape and OCR a forum
+def scrape_and_ocr_forum(url, doc):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    
+    text_elements = soup.find_all(['h1', 'h2', 'h3', 'p', 'li'])
+    for element in text_elements:
+        doc.add_paragraph(element.get_text())
+    
+    image_elements = soup.find_all('img')
+    for image in image_elements:
+        image_url = image['src']
+        if image_url.startswith('http'):
+            img_response = requests.get(image_url, stream=True)
+            img = Image.open(BytesIO(img_response.content))
+            ocr_text = pytesseract.image_to_string(img)
+            if not ocr_text:
+              doc.add_paragraph(ocr_text)
+
+# process a url
+def process_url(url, doc_id):
+    doc_filepath = f"{doc_id}.docx"
+    doc = Document()
+    
+    if 'youtube.com' in url or 'youtu.be' in url:
+        download_youtube_subtitles(url, doc_id)
+    else:
+        scrape_and_ocr_forum(url, doc)
+    
+    doc.save(doc_filepath)
+
+# read csv file and process each row
+csv_filepath = './Get_Source_Doc - Sheet1.csv' 
+with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile:
+    reader = csv.DictReader(csvfile)
+    for row in reader:
+        process_url(row['Source'], row['id'])
+        print(row)
\ No newline at end of file

From 613a2199edc013fe5c9eee1f3572fcdfc5408f45 Mon Sep 17 00:00:00 2001
From: tsuky_chen <3107760494@qq.com>
Date: Sun, 21 Jan 2024 00:43:50 +0800
Subject: [PATCH 11/13] modify libreoffice impress examples

---
 .../39478d4a-1049-456f-aa77-407811393add.json |  2 +-
 .../550ce7e7-747b-495f-b122-acdc4d0b8e54.json |  2 +-
 .../a097acff-6266-4291-9fbd-137af7ecd439.json | 32 ++++++++++++++++---
 .../af23762e-2bfd-4a1d-aada-20fa8de9ce07.json |  2 +-
 4 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/evaluation_examples/examples/libreoffice_impress/39478d4a-1049-456f-aa77-407811393add.json b/evaluation_examples/examples/libreoffice_impress/39478d4a-1049-456f-aa77-407811393add.json
index e5ee5d6..6ba438f 100644
--- a/evaluation_examples/examples/libreoffice_impress/39478d4a-1049-456f-aa77-407811393add.json
+++ b/evaluation_examples/examples/libreoffice_impress/39478d4a-1049-456f-aa77-407811393add.json
@@ -9,7 +9,7 @@
             "parameters": {
                 "files": [
                     {
-                        "url": "https://drive.usercontent.google.com/download?id=1WT1-L0iiIlF2kuIK77IDxTfBaQ0X0BbX&export=download&authuser=0&confirm=t&uuid=0b69767e-1f3e-49ce-88a7-1036ef25bcaf&at=APZUnTXZ_sqEZUrHNx1edWep017b:1705337750065",
+                        "url": "https://drive.usercontent.google.com/download?id=1WT1-L0iiIlF2kuIK77IDxTfBaQ0X0BbX&export=download&authuser=0&confirm=t&uuid=3daac1dc-0f6e-449b-b6bc-09fd246697aa&at=APZUnTVgf_yEeeaARnUISIE4wr4E:1705768410739",
                         "path": "Desktop/Ch5.pptx"
                     }
                 ]
diff --git a/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json b/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
index 9897ead..318b981 100644
--- a/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
+++ b/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
@@ -2,7 +2,7 @@
   "id": "550ce7e7-747b-495f-b122-acdc4d0b8e54",
   "snapshot": "libreoffice_impress",
   "instruction": "I am checking our soccer club's to-do list for the last semester and adding strike-through sign on the line we have already accomplished. Could you help me add a strike-through on the first and second line?",
-  "source": "https://superuser.com/questions/1211035/libreoffice-impress-animations-how-to-strikethrough-on-click?rq=1",
+  "source": "https://technical-tips.com/blog/software/text-in-libreoffice-strikethrough--6948#:~:text=To%20strikethrough%20Text%20in%20LibreOffice%201%20In%20your,effect%22%20can%20your%20additionally%2C%20for%20example%2C%20double%20underline.",
   "config": [
     {
       "type": "download",
diff --git a/evaluation_examples/examples/libreoffice_impress/a097acff-6266-4291-9fbd-137af7ecd439.json b/evaluation_examples/examples/libreoffice_impress/a097acff-6266-4291-9fbd-137af7ecd439.json
index 45330bf..3553fdd 100644
--- a/evaluation_examples/examples/libreoffice_impress/a097acff-6266-4291-9fbd-137af7ecd439.json
+++ b/evaluation_examples/examples/libreoffice_impress/a097acff-6266-4291-9fbd-137af7ecd439.json
@@ -1,12 +1,34 @@
 {
   "id": "a097acff-6266-4291-9fbd-137af7ecd439",
   "snapshot": "libreoffice_impress",
-  "instruction": "Could you help me save my slides to SAVE_PATH?",
+  "instruction": "Could you help me save my slides as pre.pptx on the Desktop?",
   "source": "https://www.youtube.com/watch?v=DDmEvjs4iBw",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1i_-m6mVrdesGJ392bulH5lveHarMwKk_&export=download&authuser=0&confirm=t&uuid=057973d3-52b7-45ac-8151-b2c6a1820f49&at=APZUnTU5SYajgO-YrxdDWSiJRfD4:1705768888387",
+            "path": "Desktop/Secrets-of-Monetizing-Video.pptx"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "Desktop/Secrets-of-Monetizing-Video.pptx"
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
-    ""
+    "libreoffice_impress"
   ],
-  "evaluator": "evaluation_dir"
-}
+  "evaluator": {
+    "func": "check_file_exists",
+    "file_name": "pre.pptx",
+    "directory": "/home/user/Desktop/"
+  }
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json b/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
index db71d72..af3d921 100644
--- a/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
+++ b/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
@@ -2,7 +2,7 @@
   "id": "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
   "snapshot": "libreoffice_impress",
   "instruction": "I am making PPT on LibreOffice Impress for presentation tomorrow. I need to summarize contents on one slide. Could you make a summary slide for me?",
-  "source": "https://superuser.com/questions/1059080/how-to-make-a-summary-slide-in-impress-listing-the-titles-of-all-slides-autom",
+  "source": "https://www.libreofficehelp.com/export-libreoffice-impress-slides-images/#:~:text=Exporting%20a%20single%20slide%20as.jpg%2C.png%2C%20etc%20image%20is,on%20the%20checkbox%20Selection.%20Provide%20jpg%20quality%20options.",
   "config": [
     {
       "type": "download",

From ce51d16bb33ebd8c960f622fd588d93b7e94c70b Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Mon, 22 Jan 2024 02:41:17 +0800
Subject: [PATCH 12/13] Loading Impress v1 batch

---
 desktop_env/evaluators/metrics/__init__.py    |   5 +-
 desktop_env/evaluators/metrics/impress.py     | 122 +++++++++++++-----
 .../455d3c66-7dc6-4537-a39a-36d3e9119df7.json |  14 +-
 .../550ce7e7-747b-495f-b122-acdc4d0b8e54.json |  10 +-
 .../5d901039-a89c-4bfb-967b-bf66f4df075e.json |   8 +-
 .../ac9bb6cb-1888-43ab-81e4-a98a547918cd.json |   2 +-
 .../af23762e-2bfd-4a1d-aada-20fa8de9ce07.json |   2 +-
 .../59f21cfb-0120-4326-b255-a5b827b38967.json |   2 +-
 8 files changed, 121 insertions(+), 44 deletions(-)

diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 0d7c9cc..23e9a7e 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -14,5 +14,6 @@ from .gimp import increase_saturation, decrease_brightness, check_file_exists, c
 from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3, check_json
 from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
 from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed
-from .impress import check_slide_numbers_color, compare_pptx_files, check_for_two_lines, check_for_audio, check_formula_shape, check_file_exists
-from .impress import check_slide_orientation_Portrait, contains_mp4_video
\ No newline at end of file
+from .impress import check_image_stretch_and_center, check_slide_numbers_color, compare_pptx_files, check_strikethrough, \
+    check_for_audio, check_formula_shape
+from .impress import check_slide_orientation_Portrait, contains_mp4_video
diff --git a/desktop_env/evaluators/metrics/impress.py b/desktop_env/evaluators/metrics/impress.py
index 749bc0e..b68a6fe 100644
--- a/desktop_env/evaluators/metrics/impress.py
+++ b/desktop_env/evaluators/metrics/impress.py
@@ -1,24 +1,56 @@
 from pptx import Presentation
-import os
+from pptx.util import Inches
+
+
+def check_image_stretch_and_center(modified_ppt, original_ppt):
+    # fixme: this func is overfit to this example libreoffice_impress
+    # Load the presentations
+    original_pres = Presentation(original_ppt)
+    modified_pres = Presentation(modified_ppt)
+
+    # Get the first slide of each presentation
+    original_slide = original_pres.slides[0]
+    modified_slide = modified_pres.slides[0]
+
+    # Get the image on the first slide of each presentation
+    original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13]
+    modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13]
+
+    the_image = original_slide_images[0]
+
+    # Get the images that modified in width and height
+    for modified_image in modified_slide_images:
+        if the_image.image.blob == modified_image.image.blob:
+            the_modified_image = modified_image
+
+    if (abs(the_modified_image.width - original_pres.slide_width) > Inches(0.1) or
+            abs(the_modified_image.height - original_pres.slide_height) > Inches(0.1) or
+            abs(the_modified_image.left - (original_pres.slide_width - the_modified_image.width) / 2) > Inches(0.1) or
+            abs(the_modified_image.top - (original_pres.slide_height - the_modified_image.height) / 2) > Inches(0.1)):
+        return False
+
+    return True
+
 
 def is_red_color(color):
-    #judge if the color is red
+    # judge if the color is red
     print(color.rgb)
     return color and color.rgb == (255, 0, 0)
 
+
 def get_master_placeholder_color(prs):
     # get the color of the placeholder
     masters = prs.slide_masters
     for idx, master in enumerate(masters):
-        for placeholder in master.placeholders:            
-            if placeholder.has_text_frame and placeholder.text == "<number>":            
+        for placeholder in master.placeholders:
+            if placeholder.has_text_frame and placeholder.text == "<number>":
                 text_frame = placeholder.text_frame
 
                 if text_frame.paragraphs:
                     first_paragraph = text_frame.paragraphs[0]
-                    return first_paragraph.font.color    
-    return None  
-    
+                    return first_paragraph.font.color
+    return None
+
 
 def check_slide_numbers_color(pptx_file_path):
     presentation = Presentation(pptx_file_path)
@@ -34,42 +66,65 @@ def check_slide_numbers_color(pptx_file_path):
                     print(font_color)
                     return 1 if font_color is not None and is_red_color(font_color) else 0
 
-def compare_pptx_files(file1_path, file2_path):
+
+def compare_pptx_files(file1_path, file2_path, **options):
+    # todo: not strictly match since not all information is compared because we cannot get the info through pptx
     prs1 = Presentation(file1_path)
     prs2 = Presentation(file2_path)
 
     # compare the number of slides
     if len(prs1.slides) != len(prs2.slides):
-        return 0
+        return False
 
     # compare the content of each slide
     for slide1, slide2 in zip(prs1.slides, prs2.slides):
         # check if the shapes are the same
         for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
+            if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
+                return False
+
             if hasattr(shape1, "text") and hasattr(shape2, "text"):
                 if shape1.text != shape2.text:
-                    return 0
-    return 1
+                    return False
 
-def has_two_lines_on_page(slide):
-    line_count = 0
-    for shape in slide.shapes:
-        if shape.shape_type == 1:  # 1 表示 Line 形状
-            line_count += 1
-            if line_count >= 2:
-                return True
-    return False
+                # check if the paragraphs are the same
+                for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
+                    # check if the runs are the same
+                    for run1, run2 in zip(para1.runs, para2.runs):
+                        if run1.text != run2.text:
+                            return False
 
-def check_for_two_lines(prs):
-    prs = Presentation(prs)
-    for i, slide in enumerate(prs.slides):
-        if has_two_lines_on_page(slide):
-            return 1
-    return 0
+                        # check if the font properties are the same
+                        if run1.font.name != run2.font.name or run1.font.size != run2.font.size or run1.font.bold != run2.font.bold or run1.font.italic != run2.font.italic or run1.font.color.rgb != run2.font.color.rgb:
+                            return False
+
+    return True
+
+
+def check_strikethrough(pptx_path, rules):
+    # Load the presentation
+    presentation = Presentation(pptx_path)
+
+    slide_index_s = rules["slide_index_s"]
+    shape_index_s = rules["shape_index_s"]
+    paragraph_index_s = rules["paragraph_index_s"]
+
+    for slide_index in slide_index_s:
+        # Get the slide
+        slide = presentation.slides[slide_index]
+
+        for shape_index in shape_index_s:
+            # Get the text box
+            paragraphs = slide.shapes[shape_index].text_frame.paragraphs
+
+            for paragraph_index in paragraph_index_s:
+                paragraph = paragraphs[paragraph_index]
+                run = paragraph.runs[0]
+                if 'strike' not in run.font._element.attrib:
+                    return False
+
+    return True
 
-def check_file_exists(directory, filename):
-    file_path = os.path.join(directory, filename)
-    return 1 if os.path.isfile(file_path) else 0
 
 def has_audio_on_page(slide):
     for shape in slide.shapes:
@@ -77,6 +132,7 @@ def has_audio_on_page(slide):
             return True
     return False
 
+
 def check_for_audio(prs):
     prs = Presentation(prs)
     for i, slide in enumerate(prs.slides):
@@ -84,26 +140,29 @@ def check_for_audio(prs):
             return 1
     return 0
 
+
 def check_formula_shape(prs):
     prs = Presentation(prs)
     slide = prs.slides[13]
-        
+
     for shape in slide.shapes:
 
         if shape.has_text_frame and shape.shape_type == 1:
             return 1
     return 0
 
+
 def check_slide_orientation_Portrait(pptx_path):
     presentation = Presentation(pptx_path)
-    
+
     slide_height = presentation.slide_height
     slide_width = presentation.slide_width
-    
+
     if slide_width < slide_height:
         return 1
     return 0
 
+
 def contains_mp4_video(pptx_path):
     prs = Presentation(pptx_path)
     for slide in prs.slides:
@@ -113,6 +172,7 @@ def contains_mp4_video(pptx_path):
                     return 1
     return 0
 
+
 if __name__ == "__main__":
     path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx"
     presentation = Presentation(path1)
diff --git a/evaluation_examples/examples/libreoffice_impress/455d3c66-7dc6-4537-a39a-36d3e9119df7.json b/evaluation_examples/examples/libreoffice_impress/455d3c66-7dc6-4537-a39a-36d3e9119df7.json
index 0a5ebb9..db194b0 100644
--- a/evaluation_examples/examples/libreoffice_impress/455d3c66-7dc6-4537-a39a-36d3e9119df7.json
+++ b/evaluation_examples/examples/libreoffice_impress/455d3c66-7dc6-4537-a39a-36d3e9119df7.json
@@ -27,8 +27,16 @@
     "libreoffice_impress"
   ],
   "evaluator": {
-    "func": "check_file_exists",
-    "file_name": "res.png",
-    "directory": "/home/user/Desktop/"
+    "func": "compare_images",
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://drive.usercontent.google.com/download?id=1XTDaQ2NlovrusKkuY6udi_BQfLwSP9th&export=download&authuser=0&confirm=t&uuid=d3c7883e-3cea-4bf3-8f83-d878622ee76d&at=APZUnTXQEnT0Gi4rB0oegvVGheyn:1705859805154",
+      "dest": "res_gold.png"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/user/Desktop/res.png",
+      "dest": "res.png"
+    }
   }
 }
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json b/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
index 318b981..6c0e229 100644
--- a/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
+++ b/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json
@@ -27,7 +27,15 @@
     "libreoffice_impress"
   ],
   "evaluator": {
-    "func": "check_for_two_lines",
+    "func": "check_strikethrough",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "slide_index_s": [4],
+        "shape_index_s": [1],
+        "paragraph_index_s": [1, 2]
+      }
+    },
     "result": {
       "type": "vm_file",
       "path": "Desktop/New_Club_Spring_2018_Training.pptx",
diff --git a/evaluation_examples/examples/libreoffice_impress/5d901039-a89c-4bfb-967b-bf66f4df075e.json b/evaluation_examples/examples/libreoffice_impress/5d901039-a89c-4bfb-967b-bf66f4df075e.json
index 6c1f0f0..773ef8c 100644
--- a/evaluation_examples/examples/libreoffice_impress/5d901039-a89c-4bfb-967b-bf66f4df075e.json
+++ b/evaluation_examples/examples/libreoffice_impress/5d901039-a89c-4bfb-967b-bf66f4df075e.json
@@ -9,7 +9,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive.usercontent.google.com/download?id=16K6TpGIRZpqOJUu-mtJQ_78kIwLcn-4D&export=download&authuser=0&confirm=t&uuid=945b6f33-53d2-4e87-ada9-efa8b938a499&at=APZUnTVw4fKyJPW0vAAJURruAJIP:1705250184439",
+            "url": "https://drive.usercontent.google.com/download?id=16K6TpGIRZpqOJUu-mtJQ_78kIwLcn-4D&export=download&authuser=0&confirm=t&uuid=41509e5c-eb95-453a-baad-4e12a839a120&at=APZUnTVygE_LL27vx1l6OEg_FRj0:1705849959413",
             "path": "Desktop/CPD_Background_Investigation_Process.pptx"
           }
         ]
@@ -27,11 +27,11 @@
     "libreoffice_impress"
   ],
   "evaluator": {
-    "func": "compare_pptx_files",
+    "func": "check_image_stretch_and_center",
     "expected": {
       "type": "cloud_file",
-      "path": "https://drive.usercontent.google.com/download?id=1rsvFPyHYiIPh1c8Nj8say0NJCG2VIDr7&export=download&authuser=0&confirm=t&uuid=aac08a92-6595-47d8-84dc-8f1ab1df987f&at=APZUnTXIWCn5B0CpLttvG2bsr_a7:1705250423565",
-      "dest": "CPD_Background_Investigation_Process_Gold.docx"
+      "path": "https://drive.usercontent.google.com/download?id=16K6TpGIRZpqOJUu-mtJQ_78kIwLcn-4D&export=download&authuser=0&confirm=t&uuid=41509e5c-eb95-453a-baad-4e12a839a120&at=APZUnTVygE_LL27vx1l6OEg_FRj0:1705849959413",
+      "dest": "CPD_Background_Investigation_Process_Original.pptx"
     },
     "result": {
       "type": "vm_file",
diff --git a/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json b/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json
index 7a8cb20..8b5dc98 100644
--- a/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json
+++ b/evaluation_examples/examples/libreoffice_impress/ac9bb6cb-1888-43ab-81e4-a98a547918cd.json
@@ -6,7 +6,7 @@
   "config": [],
   "trajectory": "trajectories/",
   "related_apps": [
-    ""
+    "libreoffice_impress"
   ],
   "evaluator": "evaluation_dir"
 }
diff --git a/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json b/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
index af3d921..a5213fe 100644
--- a/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
+++ b/evaluation_examples/examples/libreoffice_impress/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json
@@ -1,7 +1,7 @@
 {
   "id": "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
   "snapshot": "libreoffice_impress",
-  "instruction": "I am making PPT on LibreOffice Impress for presentation tomorrow. I need to summarize contents on one slide. Could you make a summary slide for me?",
+  "instruction": "I am making PPT on LibreOffice Impress for presentation tomorrow. I need to summarize contents on one slide use Impress \"Summary Slide\" feature. Could you make that for me?",
   "source": "https://www.libreofficehelp.com/export-libreoffice-impress-slides-images/#:~:text=Exporting%20a%20single%20slide%20as.jpg%2C.png%2C%20etc%20image%20is,on%20the%20checkbox%20Selection.%20Provide%20jpg%20quality%20options.",
   "config": [
     {
diff --git a/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json b/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
index 32d7570..14ce3cd 100644
--- a/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
+++ b/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
@@ -1,7 +1,7 @@
 {
   "id": "59f21cfb-0120-4326-b255-a5b827b38967",
   "snapshot": "base_setup",
-  "instruction": "Could you play the music video that's saved on my desktop for me?",
+  "instruction": "Could you play the music video that's saved on my desktop for me via vlc?",
   "source": "https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/media.html#playing-a-file",
   "config": [
     {

From ec23be3ecbf8e8bb7412e174392984af42d26703 Mon Sep 17 00:00:00 2001
From: tsuky_chen <3107760494@qq.com>
Date: Mon, 22 Jan 2024 05:07:12 +0800
Subject: [PATCH 13/13] update get source to docs; process
 youtube,stackoverflow,superuser,ubuntu and normal web respectively

---
 .../Source2Doc/Get_Source_Doc - Sheet1.csv    | 540 +++++++++---------
 .../Source2Doc/get_Source_Doc.py              | 249 ++++++--
 2 files changed, 465 insertions(+), 324 deletions(-)

diff --git a/resouce_collection/Source2Doc/Get_Source_Doc - Sheet1.csv b/resouce_collection/Source2Doc/Get_Source_Doc - Sheet1.csv
index 1b06f2c..86838a6 100644
--- a/resouce_collection/Source2Doc/Get_Source_Doc - Sheet1.csv	
+++ b/resouce_collection/Source2Doc/Get_Source_Doc - Sheet1.csv	
@@ -1,272 +1,268 @@
-id,Source
-94d95f96-9699-4208-98ba-3c3119edf9c2,https://help.ubuntu.com/lts/ubuntu-help/addremove-install.html.en
-bedcedc4-4d72-425e-ad62-21960b11fe0d,https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s
-43c2d64c-bab5-4dcb-a30c-b888321c319a,https://ubuntu.com/tutorials/command-line-for-beginners#4-creating-folders-and-files
-7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82,https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files
-ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3,https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s
-a462a795-fdc7-4b23-b689-e8b6df786b78,https://help.ubuntu.com/lts/ubuntu-help/shell-exit.html.en
-f9be0997-4b7c-45c5-b05c-4612b44a6118,https://help.ubuntu.com/lts/ubuntu-help/shell-notifications.html.en
-ae039631-2b12-4637-84f6-c67d51511be3,https://help.ubuntu.com/lts/ubuntu-help/net-default-browser.html.en
-e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15,https://help.ubuntu.com/lts/ubuntu-help/contacts-add-remove.html.en
-28cc3b7e-b194-4bc9-8353-d04c0f4d56d2,https://help.ubuntu.com/lts/ubuntu-help/sound-volume.html.en
-5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57,https://help.ubuntu.com/lts/ubuntu-help/files-recover.html.en
-e0df059f-28a6-4169-924f-b9623e7184cc,https://help.ubuntu.com/lts/ubuntu-help/files-rename.html.en
-ddc75b62-7311-4af8-bfb3-859558542b36,https://help.ubuntu.com/lts/ubuntu-help/addremove-remove.html.en
-5c433d22-ed9a-4e31-91f5-54cf3e8acd63,https://help.ubuntu.com/lts/ubuntu-help/session-language.html.zh-CN
-b6781586-6346-41cd-935a-a6b1487918fc,https://help.ubuntu.com/lts/ubuntu-help/clock-timezone.html.en
-b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa,https://help.ubuntu.com/lts/ubuntu-help/bluetooth-turn-on-off.html.en
-3ce045a0-877b-42aa-8d2c-b4a863336ab8,https://help.ubuntu.com/lts/ubuntu-help/a11y-font-size.html.en
-fe41f596-a71b-4c2f-9b2f-9dcd40b568c3,https://help.ubuntu.com/lts/ubuntu-help/power-percentage.html.en
-a4d98375-215b-4a4d-aee9-3d4370fccc41,https://help.ubuntu.com/lts/ubuntu-help/privacy-screen-lock.html.en
-765d2b74-88a7-4d50-bf51-34e4106fd24a,https://help.ubuntu.com/lts/ubuntu-help/files-delete.html.en
-cc9d4f34-1ca0-4a1b-8ff2-09302696acb9,https://superuser.com/questions/178587/how-do-i-detach-a-process-from-terminal-entirely
-5812b315-e7bd-4265-b51f-863c02174c28,https://superuser.com/questions/149404/create-an-ssh-user-who-only-has-permission-to-access-specific-folders
-c56de254-a3ec-414e-81a6-83d2ce8c41fa,https://superuser.com/questions/28426/how-to-extract-text-with-ocr-from-a-pdf-on-linux
-6ebbfb01-ea72-4226-a2a6-dc428e111ed2,https://superuser.com/questions/46748/how-do-i-make-bash-my-default-shell-on-ubuntu
-4d2b519e-e872-4100-8ea3-fe71ab0f9133,https://stackoverflow.com/questions/11530090/adding-a-new-entry-to-the-path-variable-in-zsh
-c288e301-e626-4b98-a1ab-159dcb162af5,https://stackoverflow.com/questions/41986507/unable-to-set-default-python-version-to-python3-in-ubuntu
-13584542-872b-42d8-b299-866967b5c3ef,https://superuser.com/questions/72176/linux-set-default-terminal-size-and-screen-position
-23393935-50c7-4a86-aeea-2b78fd089c5c,https://superuser.com/questions/91307/copying-only-jpg-from-a-directory-structure-to-another-location-linux
-f10b16e1-c160-4cb3-989f-7b2ec89bc073,https://www.wikihow.com/Install-Gnome-on-Ubuntu
-eb03d19a-b88d-4de4-8a64-ca0ac66f426b,https://www.youtube.com/shorts/t9JLUaT55UQ
-0bf05a7d-b28b-44d2-955a-50b41e24012a,https://www.youtube.com/shorts/FPAQaDTS8VY
-7b802dad-6e0f-4204-9815-d4e3f57627d8,https://www.youtube.com/shorts/Of-lzeP1usE
-7a4e4bc8-922c-4c84-865c-25ba34136be1,https://www.youtube.com/shorts/bvUhr1AHs44
-2bd59342-0664-4ccb-ba87-79379096cc08,https://www.youtube.com/shorts/L3Z-F1QTQFY
-a9f325aa-8c05-4e4f-8341-9e4358565f4f,https://www.youtube.com/shorts/A0gmEBRKXWs
-ecb0df7a-4e8d-4a03-b162-053391d3afaf,https://www.youtube.com/shorts/tXOovKn0H68
-7efeb4b1-3d19-4762-b163-63328d66303b,https://www.youtube.com/shorts/4jzXfZNhfmk
-4e6fcf72-daf3-439f-a232-c434ce416af6,https://www.youtube.com/shorts/0uxJccNCKcE
-6054afcb-5bab-4702-90a0-b259b5d3217c,https://www.youtube.com/shorts/JTbZ8sRxkdU
-abed40dc-063f-4598-8ba5-9fe749c0615d,https://www.youtube.com/shorts/xgf4ZpsEx5M
-01b269ae-2111-4a07-81fd-3fcd711993b0,https://www.youtube.com/shorts/VrUzPTIwQ04
-8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14,https://www.youtube.com/shorts/Hbcwu6IQ1ns
-af2b02f7-acee-4be4-8b66-499fab394915,https://www.youtube.com/shorts/AwKsb5VmtBI
-da1d63b8-fa12-417b-ba18-f748e5f770f3,https://www.youtube.com/shorts/hquscnbz2-U
-636380ea-d5f6-4474-b6ca-b2ed578a20f1,https://www.youtube.com/shorts/_BYL6VOHLGw
-5ba77536-05c5-4aae-a9ff-6e298d094c3e,https://www.youtube.com/shorts/CuBC1evUS5I
-4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b,https://www.youtube.com/shorts/1adQWfjN-tI
-672a1b02-c62f-4ae2-acf0-37f5fb3052b0,https://www.youtube.com/shorts/2rhdQXI4Lng
-648fe544-16ba-44af-a587-12ccbe280ea6,https://www.youtube.com/shorts/sOPBMWaC6Uc
-8985d1e4-5b99-4711-add4-88949ebb2308,https://www.youtube.com/shorts/J5ts2Acv9Pc
-9e606842-2e27-43bf-b1d1-b43289c9589b,https://www.youtube.com/shorts/B-mGYDFOyUs
-fcb6e45b-25c4-4087-9483-03d714f473a9,https://www.youtube.com/shorts/GZipp7nOZS0
-68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2,https://www.youtube.com/shorts/JEH5TsK-cCk
-fff629ea-046e-4793-8eec-1a5a15c3eb35,https://www.youtube.com/shorts/8WybtCdUT6w
-5c9a206c-bb00-4fb6-bb46-ee675c187df5,https://www.youtube.com/shorts/VbQtMNnq9i4
-e975ae74-79bd-4672-8d1c-dc841a85781d,https://www.youtube.com/shorts/GjT7gGe5Sr8
-34a6938a-58da-4897-8639-9b90d6db5391,https://www.youtube.com/shorts/gW37x2TkzOY
-b5a22759-b4eb-4bf2-aeed-ad14e8615f19,https://www.youtube.com/shorts/3xLa-D0C7Ic
-2f9913a1-51ed-4db6-bfe0-7e1c95b3139e,https://www.youtube.com/shorts/dGLRcmfVO6Q
-2558031e-401d-4579-8e00-3ecf540fb492,https://www.mrexcel.com/board/threads/sales-for-the-first-6-weeks.1249213/
-39aa4e37-dc91-482e-99af-132a612d40f3,"Add, Insert, Delete, Copy, Move, Rename a Worksheet in LibreOffice Calc (libreofficehelp.com)"
-0cecd4f3-74de-457b-ba94-29ad6b5dafb6,"Add, Insert, Delete, Copy, Move, Rename a Worksheet in LibreOffice Calc (libreofficehelp.com)"
-4188d3a4-077d-46b7-9c86-23e1a036f6c1,How to Freeze and Unfreeze Rows & columns in LibreOffice Calc - libreofficehelp.com
-51b11269-2ca8-4b2a-9163-f21758420e78,How to arrange numbers in a column from minimum to maximum or vice versa ? : LibreOfficeCalc (reddit.com)
-7e429b8d-a3f0-4ed0-9b58-08957d00b127,https://medium.com/@divyangichaudhari17/how-to-use-vlookup-and-hlookup-in-libre-calc-3370698bb3ff
-f5a90742-3fa2-40fc-a564-f29b054e0337,LibreOffice Calc: How to apply functions to columns? - Super User
-22df9241-f8d7-4509-b7f1-37e501a823f7,How do you move cells in LibreOffice Calc - Super User
-1434ca3e-f9e3-4db8-9ca7-b4c653be7d17,How to Remove Duplicates in Open Office Calc: 5 Steps (wikihow.com)
-347ef137-7eeb-4c80-a3bb-0951f26a8aff,Libre Office 7 Calc Spreadsheets Beginners Tutorial (youtube.com)
-6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5,Learn LibreOffice Calc 6 in Under 30 MInutes | Complete Tutorial for Beginners (youtube.com)
-3aaa4e37-dc91-482e-99af-132a612d40f3,How to import/export CSV files with LibreOffice Calc (or OpenOffice) - Quora
-0decd4f3-74de-457b-ba94-29ad6b5dafb6,https://justclickhere.co.uk/resources/checkboxes-tick-boxes-libreoffice-calc/
-37608790-6147-45d0-9f20-1137bb35703d,https://www.youtube.com/shorts/uzPo_CPCHH8
-f9584479-3d0d-4c79-affa-9ad7afdd8850,https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb
-d681960f-7bc3-4286-9913-a8812ba3261a,https://www.youtube.com/shorts/d7U1S_IsTVM
-f6a90742-3fa2-40fc-a564-f29b054e0337,https://www.excel-easy.com/examples/drop-down-list.html
-21df9241-f8d7-4509-b7f1-37e501a823f7,https://www.youtube.com/watch?v=p5C4V_AO1UU
-1334ca3e-f9e3-4db8-9ca7-b4c653be7d17,https://techcommunity.microsoft.com/t5/excel/excel-workbook-top-way-too-big-can-t-see-rows-and-columns/m-p/4014694
-357ef137-7eeb-4c80-a3bb-0951f26a8aff,https://www.reddit.com/r/excel/comments/17zny8u/calculating_total_amount_earned_from_total_hours/
-6f99a1ad-07d2-4b66-a1ce-ece6d99c20a5,https://techcommunity.microsoft.com/t5/excel/sumarize-the-sheetnames/m-p/4014716
-aa3a8974-2e85-438b-b29e-a64df44deb4b,https://www.quora.com/Libre-Office-Calc-How-do-I-resize-all-cells-in-a-sheet-to-make-them-fit-to-1-page-for-printing-and-exporting-as-PDF
-a01fbce3-2793-461f-ab86-43680ccbae25,https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc
-4f07fbe9-70de-4927-a4d5-bb28bc12c52c,https://superuser.com/questions/1081048/libreoffice-calc-how-to-pad-number-to-fixed-decimals-when-used-within-formula
-e3b1d5fa-ed00-4129-bda1-1452bd2b6772,https://www.reddit.com/r/libreoffice/comments/tel112/calc_how_to_calculate_sum_by_categories/
-ca6a9524-f8e9-4d2f-9364-ab0cad567739,https://www.reddit.com/r/libreoffice/comments/113gmyc/how_to_remove_certain_text_from_cells_in_calc/
-a455e8d0-930f-40d2-9575-5e8d2d222f58,https://superuser.com/questions/562944/quickly-fill-blank-cells-in-a-list-in-libreoffice-calc
-83ee22c6-7737-49ce-9b5a-138c3e92af04,https://superuser.com/questions/661102/currency-conversion-in-libreoffice-calc
-819f61c2-ec77-4d3f-9996-0838ae5aacc8,https://superuser.com/questions/381696/creating-a-column-of-working-days-in-libreoffice-calc
-69d577b3-004e-4bca-89b2-0d7c2f6049e3,https://superuser.com/questions/387106/libreoffice-calc-how-to-get-total-for-hhmmss-cells
-0a1bf4ca-d4ea-4618-baa5-6e8dc1b46d82,https://superuser.com/questions/571915/sum-up-to-n-highest-value-out-of-a-series
-ac9bb6cb-1888-43ab-81e4-a98a547918cd,https://superuser.com/questions/1674211/how-to-change-colour-of-slide-number-in-libre-office
-5d901039-a89c-4bfb-967b-bf66f4df075e,https://superuser.com/questions/986776/how-can-i-stretch-an-image-in-a-libreoffice-impress-presentation-to-fill-the-pag
-071d4ace-091a-4ec3-886e-f4be55ae375d,https://superuser.com/questions/706860/hide-slide-numbers-and-slide-footer-on-first-and-second-slide-in-libreoffice-imp?rq=1
-550ce7e7-747b-495f-b122-acdc4d0b8e54,https://superuser.com/questions/1211035/libreoffice-impress-animations-how-to-strikethrough-on-click?rq=1
-455d3c66-7dc6-4537-a39a-36d3e9119df7,https://stackoverflow.com/questions/75626383/how-export-libreoffice-impress-to-image
-af23762e-2bfd-4a1d-aada-20fa8de9ce07,https://superuser.com/questions/1059080/how-to-make-a-summary-slide-in-impress-listing-the-titles-of-all-slides-autom
-c59742c0-4323-4b9d-8a02-723c251deaa0,https://www.reddit.com/r/libreoffice/comments/17lcdrp/audio_not_supported_in_libreoffice_impress/
-39478d4a-1049-456f-aa77-407811393add,https://www.reddit.com/r/libreoffice/comments/jul3o8/putting_cap_or_hat_or_carat_symbol_in_libre/
-82615ce3-95fb-4bba-abcd-daa85753e282,https://stackoverflow.com/questions/72898114/how-to-change-the-appearance-of-a-button-once-it-has-been-clicked-in-libre-impre
-c3ad4442-499f-4e58-bc4e-1a1417ea9b8c,http://maharajacollege.ac.in/material/Libreofficeimpresspdf.pdf
-ef9d12bd-bcee-4ba0-a40e-918400f43ddf,(1) I closed the slide pannel on the left and idk how to get it back please help : libreoffice (reddit.com)
-9ec204e4-f0a3-42f8-8458-b772a6797cab,How to Copy a LibreOffice Impress Master Slide｜TikTok 搜索
-35761501-3a99-48d1-8ec6-cea8242349b6,(1) How do I compress a selection of images in LibreOffice Impress? - Quora
-0f84bef9-9790-432e-92b7-eece357603fb,ubuntu - how to disable LibreOffice Impress to use multiple display? - Stack Overflow
-ce88f674-ab7a-43da-9201-468d38539e4a,How to Change Slides to Portrait in LibreOffice Impress - Just Click Here - IT Training
-f0a334af-f91b-4c03-b578-aac9bec2b543,Insert Video in Impress Presentation on LibreOffice (libreofficehelp.com)
-3b27600c-3668-4abd-8f84-7bcdebbccbdb,Change Slide Background Color in LibreOffice Impress (libreofficehelp.com)
-a097acff-6266-4291-9fbd-137af7ecd439,https://www.youtube.com/watch?v=DDmEvjs4iBw
-21760ecb-8f62-40d2-8d85-0cee5725cb72,Add Animations and Transitions in LibreOffice Impress Slides - libreofficehelp.com
-3cc4f35d-fa2e-4555-afb9-741b7c062a74,Impress Guide 7.6 (libreoffice.org)
-6ada715d-3aae-4a32-a6a7-429b2e43fb93,https://www.quora.com/How-do-you-insert-images-into-a-LibreOffice-Writer-document
-ecc2413d-8a48-416e-a3a2-d30106ca36cb,https://www.quora.com/How-can-I-insert-a-blank-page-on-libreoffice
-0e47de2a-32e0-456c-a366-8c607ef7a9d2,https://ask.libreoffice.org/t/how-to-start-page-numbering-on-a-certain-page/39931/4
-4bcb1253-a636-4df4-8cb0-a35c04dfef31,https://www.libreofficehelp.com/save-export-writer-documents-in-pdf-epub-format/
-0810415c-bde4-4443-9047-d5f70165a697,https://www.youtube.com/watch?v=Q_AaL6ljudU
-e528b65e-1107-4b8c-8988-490e4fece599,https://www.youtube.com/watch?v=l25Evu4ohKg
-66399b0d-8fda-4618-95c4-bfc6191617e9,https://www.youtube.com/watch?v=l25Evu4ohKg
-936321ce-5236-426a-9a20-e0e3c5dc536f,https://www.youtube.com/watch?v=l25Evu4ohKg
-663876c7-3471-43db-ba51-f410b13d9d7d,https://askubuntu.com/questions/319593/how-to-type-science-equations-in-libre-office
-3ef2b351-8a84-4ff2-8724-d86eae9b842e,https://askubuntu.com/questions/1066351/how-do-you-center-align-in-libreoffice#:~:text=Ctrl%20%2B%20e%20will%20Center%20align%20the%20cursor%20for%20you.
-45d61a06-6545-4422-97b7-bc76cfa964c1,https://stackoverflow.com/questions/71685737/how-to-replace-all-newlines-with-paragraph-marks-in-libreoffice-write
-0b17a146-2934-46c7-8727-73ff6b6483e8,https://askubuntu.com/questions/245695/how-do-you-insert-subscripts-and-superscripts-into-ordinary-non-formula-text-i
-0e763496-b6bb-4508-a427-fad0b6c3e195,https://ask.libreoffice.org/t/how-do-i-change-the-font-for-the-whole-document-in-writer/9220
-f178a4a9-d090-4b56-bc4c-4b72a61a035d,https://ask.libreoffice.org/t/how-do-i-make-times-new-roman-the-default-font-in-lo/64604
-0a0faba3-5580-44df-965d-f562a99b291c,https://stackoverflow.com/questions/64528055/how-to-make-part-of-my-sentence-left-aligned-and-rest-as-right-aligned
-e246f6d8-78d7-44ac-b668-fcf47946cb50,https://ask.libreoffice.org/t/how-to-change-text-size-color-of-italic-font/77712
-8472fece-c7dd-4241-8d65-9b3cd1a0b568,https://stackoverflow.com/questions/37259827/libreoffice-writer-how-to-set-different-colors-to-each-letter
-88fe4b2d-3040-4c70-9a70-546a47764b48,https://stackoverflow.com/questions/56554555/libreoffice-writer-how-to-create-empty-line-space-after-every-period-in-a-par
-6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2,https://superuser.com/questions/762500/how-do-i-find-all-highlighted-text-in-libreoffice-writer
-d53ff5ee-3b1a-431e-b2be-30ed2673079b,https://ask.libreoffice.org/t/how-to-convert-all-uppercase-to-lowercase/53341
-72b810ef-4156-4d09-8f08-a0cf57e7cefe,https://superuser.com/questions/657792/libreoffice-writer-how-to-apply-strikethrough-text-formatting?rq=1
-6f81754e-285d-4ce0-b59e-af7edb02d108,https://superuser.com/questions/789473/remove-duplicate-lines-in-libreoffice-openoffice-writer
-41c621f7-3544-49e1-af8d-dafd0f834f75,https://superuser.com/questions/1668018/how-to-auto-format-lines-in-libre-office-writer
-b21acd93-60fd-4127-8a43-2f5178f4a830,https://superuser.com/questions/1097199/how-can-i-double-space-a-document-in-libreoffice?rq=1
-59f21cfb-0120-4326-b255-a5b827b38967,Media — VLC Desktop User Documentation 3.0 documentation (videolan.me)
-8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89,Recording what your are playing — VLC Desktop User Documentation 3.0 documentation (videolan.me)
-8f080098-ddb1-424c-b438-4e96e5e4786e,https://medium.com/@jetscribe_ai/how-to-extract-mp3-audio-from-videos-using-vlc-media-player-beeef644ebfb
-bba3381f-b5eb-4439-bd9e-80c22218d5a7,(1) How do I play online videos using the VLC media player? - Quora
-a1c3ab35-02de-4999-a7ed-2fd12c972c6e,(1) How do I compress a video with VLC? - Quora
-fba2c100-79e8-42df-ae74-b592418d54f4,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s
-d70666e4-7348-42c7-a06a-664094c5df3c,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s
-efcf0d81-0835-4880-b2fd-d866e8bc2294,"https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s, https://help.ubuntu.com/stable/ubuntu-help/look-background.html.en"
-8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s
-aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6,https://videoconverter.wondershare.com/vlc/how-to-rotate-a-video-using-vlc.html?gad_source=1&gclid=CjwKCAiA-vOsBhAAEiwAIWR0TaGSOLkYiBeVQGZSyfeUP3g-tIvYxffl5RFIu0-zrUL1IF41eCw1JRoCnCMQAvD_BwE
-386dbd0e-0241-4a0a-b6a2-6704fba26b1c,https://superuser.com/questions/1708415/pause-and-play-vlc-in-background?rq=1
-9195653c-f4aa-453d-aa95-787f6ccfaae9,https://superuser.com/questions/1513285/how-can-i-increase-the-maximum-volume-output-by-vlc?rq=1
-5ac2891a-eacd-4954-b339-98abba077adb,"https://superuser.com/questions/1412810/how-to-prevent-vlc-media-player-from-auto-closing-after-video-end#:%7E:text=Click%20on%20%22Media%22on%20the,VLC%20player%20after%20video%20ending"
-2f24d255-a3af-4ab3-9dc3-66ff792417e7,https://superuser.com/questions/1753588/vlc-crop-hide-part-of-screen
-0d95d28a-9587-433b-a805-1fbe5467d598,https://superuser.com/questions/1299036/vlc-how-to-open-the-folder-of-the-current-playing-video?noredirect=1&lq=1
-d06f0d4d-2cd5-4ede-8de9-598629438c6e,https://superuser.com/questions/1039392/changing-colour-of-vlc-volume-slider
-a5bbbcd5-b398-4c91-83d4-55e1e31bbb81,https://superuser.com/questions/776056/how-to-hide-bottom-toolbar-in-vlc
-f3977615-2b45-4ac5-8bba-80c17dbe2a37,https://www.reddit.com/r/Fedora/comments/rhljzd/how_to_run_multiple_instances_of_vlc_media_player/
-c669a35f-d45a-450e-b1f2-f473748337bb,https://www.quora.com/How-do-I-fast-forward-a-video-in-VLC-player
-d1ba14d0-fef8-4026-8418-5b581dc68ca0,https://superuser.com/questions/306154/how-to-use-a-b-repeat-feature-of-vlc
-215dfd39-f493-4bc3-a027-8a97d72c61bf,https://superuser.com/questions/1224784/how-to-change-vlcs-splash-screen
-bb5e4c0d-f964-439c-97b6-bdb9747de3f4,How to Remove an Email Account from Thunderbird: 4 Steps (wikihow.com)
-7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,How to Access Gmail With Mozilla Thunderbird: 7 Steps (wikihow.com)
-b188fe10-ae67-4db8-a154-26a0b8ff8f1e,(1) Restore readability in message list pane : Thunderbird (reddit.com)
-12086550-11c0-466b-b367-1d9e75b3910e,https://www.bitrecover.com/blog/manage-thunderbird-profiles/
-06fe7178-4491-4589-810f-2e2bc9502122,(1) How do I backup email files in Mozilla Thunderbird? - Quora
-6766f2b8-8a72-417f-a9e5-56fcaa735837,"https://www.adsigner.com/user-manual/signatures/setup-email-client-thunderbird/#:~:text=is%20probably%20hidden.-,Right%20click%20on%20the%20empty%20space%20at%20the%20top%20of,signature%20from%20a%20file%20instead."
-e1e75309-3ddb-4d09-92ec-de869c928143,https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters
-3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5,https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters
-35253b65-1c19-4304-8aa4-6884b8218fc0,https://support.mozilla.org/en-US/questions/1259354
-d088f539-cab4-4f9a-ac92-9999fc3a656e,https://support.mozilla.org/en-US/kb/how-use-attachments
-2ad9387a-65d8-4e33-ad5b-7580065a27ca,"https://support.mozilla.org/bm/questions/1027435, https://www.wikihow.tech/Create-Folders-in-Mozilla-Thunderbird"
-480bcfea-d68f-4aaa-a0a9-2589ef319381,https://www.reddit.com/r/Thunderbird/comments/182dg5p/unified_inbox_howto/
-37b9808f-b2b4-4177-ab00-9ddfae4bad27,https://www.quora.com/How-can-I-schedule-Mozilla-Thunderbird-to-turn-off-automatically
-af630914-714e-4a24-a7bb-f9af687d3b91,https://stackoverflow.com/questions/11333148/adding-a-toolbar-button-to-a-thundebird-compose-message-window?rq=3
-ae78f875-5b98-4907-bbb5-9c737fc68c03,https://stackoverflow.com/questions/22323746/thunderbird-extension-add-button-in-message-reader-toolbar?rq=3
-3299584d-8f11-4457-bf4c-ce98f7600250,https://superuser.com/questions/1643561/would-like-to-see-the-email-address-from-sender-in-the-column
-030eeff7-b492-4218-b312-701ec99ee0cc,https://superuser.com/questions/1781004/how-do-i-remove-the-indentation-and-character-in-quoted-text-of-a-reply-mess
-94760984-3ff5-41ee-8347-cf1af709fea0,https://superuser.com/questions/1757333/how-can-i-view-thunderbird-in-full-dark-mode
-99146c54-4f37-4ab8-9327-5f3291665e1e,https://superuser.com/questions/1764409/how-to-send-email-with-thunderbird-without-configuring-an-incoming-email-service
-9656a811-9b5b-4ddf-99c7-5117bcef0626,https://superuser.com/questions/205240/is-there-a-way-to-get-a-popup-confirmation-box-when-you-send-an-email-in-thunder?rq=1
-c9e7eaf2-b1a1-4efc-a982-721972fa9f02,https://superuser.com/questions/544480/how-to-apply-automatic-message-filters-to-subfolders-too?noredirect=1&lq=1
-bb5e4c0d-f964-439c-97b6-bdb9747de3f4,Set your default search engine & site search shortcuts - Computer - Google Chrome Help
-7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,"Clear, allow and manage cookies in Chrome - Computer - Google Chrome Help"
-12086550-11c0-466b-b367-1d9e75b3910e,(1) What are the cool tricks to use Google Chrome? - Quora
-06fe7178-4491-4589-810f-2e2bc9502122,4 Ways to Switch Tabs in Chrome - wikiHow
-6766f2b8-8a72-417f-a9e5-56fcaa735837,https://support.google.com/chrome/thread/205881926/it-s-possible-to-load-unpacked-extension-automatically-in-chrome?hl=en
-e1e75309-3ddb-4d09-92ec-de869c928143,https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php
-3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5,https://in5stepstutorials.com/google-chrome/add-change-delete-autofill-address.php
-35253b65-1c19-4304-8aa4-6884b8218fc0,"https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome, https://www.reddit.com/r/chrome/comments/13xcbap/crete_shortcut_option_missing/"
-d088f539-cab4-4f9a-ac92-9999fc3a656e,https://medium.com/@inkverseuk2/useful-tips-and-tricks-for-the-google-chrome-browser-ac7d0d24b3cc
-2ad9387a-65d8-4e33-ad5b-7580065a27ca,https://www.youtube.com/watch?v=IN-Eq_UripQ
-7a5a7856-f1b6-42a4-ade9-1ca81ca0f263,https://www.youtube.com/watch?v=ZaZ8GcTxjXA
-3720f614-37fd-4d04-8a6b-76f54f8c222d,https://superuser.com/questions/984668/change-interface-language-of-chrome-to-english
-b63059a2-53bc-4163-a89f-3ac948c74081,https://superuser.com/questions/1303418/how-do-i-make-chrome-block-absolutely-all-pop-ups?rq=1
-44ee5668-ecd5-4366-a6ce-c1c9b8d4e938,https://superuser.com/questions/1787991/clear-browsing-history-from-specific-site-on-chrome
-b5ebc8c6-6329-4373-85b4-9421c97375e9,https://superuser.com/questions/364470/is-there-a-way-to-view-google-chrome-browsing-history-past-three-months-ago?rq=1
-93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9,https://superuser.com/questions/1417973/how-to-disable-google-chrome-dark-mode
-2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3,https://superuser.com/questions/1393683/how-to-change-the-username-in-google-chrome-profiles?rq=1
-480bcfea-d68f-4aaa-a0a9-2589ef319381,https://bugartisan.medium.com/disable-the-new-chrome-ui-round-in-2023-db168271f71e
-37b9808f-b2b4-4177-ab00-9ddfae4bad27,https://www.reddit.com/r/chrome/comments/17niw3h/tutorial_how_to_disable_the_download_bubble_in/
-af630914-714e-4a24-a7bb-f9af687d3b91,https://www.howtogeek.com/680260/how-to-change-chromes-default-text-size/
-ae78f875-5b98-4907-bbb5-9c737fc68c03,https://support.google.com/chrome/thread/219988391/increase-search-results-per-page?hl=en
-0ed39f63-6049-43d4-ba4d-5fa2fe04a951,(1) How do you find and replace text in Visual Studio Code? - Quora
-b421106e-b282-4c41-af72-37c95493f95f,visual studio code - Launch VScode with new.txt file - Stack Overflow
-53ad5833-3455-407b-bbc6-45b4c79ab8fb,Visual Studio Code Tutorial for Beginners - Introduction (youtube.com)
-eabc805a-bfcf-4460-b250-ac92135819f6,Visual Studio Code Tutorial for Beginners - Introduction - YouTube
-3486f395-ad68-459c-8c39-ea07de934dd4,Visual Studio Code Tutorial for Beginners - Introduction - YouTube
-982d12a5-beab-424f-8d38-d2a48429e511,VSCode Tutorial For Beginners - Getting Started With VSCode (youtube.com)
-4e60007a-f5be-4bfc-9723-c39affa0a6d3,"https://campbell-muscle-lab.github.io/howtos_Python/pages/documentation/best_practices/vscode_docstring_extension/vscode_docstring_extension.html#:~:text=Type%2C%20Ctrl%20%2B%20Shift%20%2B%20P,select%20the%20NumPy%20docstring%20format."
-e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2,https://superuser.com/questions/1386061/how-to-suppress-some-python-errors-warnings-in-vs-code
-9439a27b-18ae-42d8-9778-5f68f891805e,https://stackoverflow.com/questions/75832474/how-to-keep-cursor-in-debug-console-when-debugging-in-visual-studio-code
-ae506c68-352c-4094-9caa-ee9d42052317,https://superuser.com/questions/1460404/get-visual-studio-code-terminal-history?rq=1
-ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae,https://superuser.com/questions/1748097/vs-code-disable-tree-view-find-explorer-search
-c714dcee-cad3-4e12-8f3c-12bdcfcdb048,https://superuser.com/questions/1417361/how-to-disable-file-filtering-in-vs-code-sidebar-explorer?rq=1
-930fdb3b-11a8-46fe-9bac-577332e2640e,https://superuser.com/questions/1270103/how-to-switch-the-cursor-between-terminal-and-code-in-vscode
-276cc624-87ea-4f08-ab93-f770e3790175,https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code
-9d425400-e9b2-4424-9a4b-d4c7abac4140,https://superuser.com/questions/1466771/is-there-a-way-to-make-editor-tabs-stack-in-vs-code
-7a4deb26-d57d-4ea9-9a73-630f66a7b568,https://www.quora.com/How-do-I-edit-a-photo-in-GIMP
-554785e9-4523-4e7a-b8e1-8016f565f56a,https://www.quora.com/How-do-I-edit-a-photo-in-GIMP
-77b8ab4d-994f-43ac-8930-8ca087d7c4b4,https://superuser.com/questions/1636113/how-to-get-gimp-to-recognize-images-or-pictures-folder-as-the-default-folder-for
-f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce,https://superuser.com/questions/612338/how-do-i-select-and-move-an-object-in-gimp
-d52d6308-ec58-42b7-a2c9-de80e4837b2b,https://superuser.com/questions/1447106/how-to-get-rid-of-the-gimp-tool-options-box
-2a729ded-3296-423d-aec4-7dd55ed5fbb3,https://www.youtube.com/watch?v=lOzSiOIipSM
-b148e375-fe0b-4bec-90e7-38632b0d73c2,https://www.quora.com/How-do-I-add-layers-in-GIMP
-a746add2-cab0-4740-ac36-c3769d9bfb46,https://www.youtube.com/watch?v=_L_MMU22bAw
-7b7617bd-57cc-468e-9c91-40c4ec2bcb3d,https://www.youtube.com/watch?v=G_PjQAy0iiU
-d16c99dc-2a1e-46f2-b350-d97c86c85c15,https://stackoverflow.com/questions/75185543/use-gimp-to-resize-image-in-one-layer-only
-573f79b5-abfe-4507-b455-251d45fe6198,https://stackoverflow.com/questions/45196895/gimp-add-padding-to-multiple-images
-06ca5602-62ca-47f6-ad4f-da151cde54cc,https://stackoverflow.com/questions/74664666/how-to-export-palette-based-png-in-gimp
-fa9b1e10-4d2d-4a13-af76-7efa822b6a8b,https://stackoverflow.com/questions/24626608/how-to-combine-several-png-images-as-layers-in-a-single-xcf-image
-6b2b72ed-3a10-4849-876a-750f7cdf3886,https://stackoverflow.com/questions/21018007/resize-image-to-fit-canvas-gimp
-d0e42fd2-d290-46b3-b598-a6e2b7be9c85,https://stackoverflow.com/questions/56758689/stop-gimp-from-merging-layers-when-de-selecting
-e2dd0213-26db-4349-abe5-d5667bfd725c,https://superuser.com/questions/839650/how-to-move-an-inserted-text-box-in-gimp
-f723c744-e62c-4ae6-98d1-750d3cd7d79d,https://www.reddit.com/r/GIMP/comments/12e57w8/how_to_use_gimp_to_exaggerate_contrast/
-8d6b1c9c-1aab-47fe-9ba5-e84c838d0c57,https://www.quora.com/How-can-email-attachments-be-converted-into-a-word-document-using-Mozilla-Thunderbird
-11e1e614-9696-4d94-88c9-8e556880d41a,https://ifttt.com/applets/L2A89geP-send-chrome-software-update-release-alerts-to-email
-57956154-f0fe-486b-88b8-e7126da035a9,https://zapier.com/apps/email/integrations/google-sheets/547/get-email-notifications-for-new-rows-in-a-google-sheets-spreadsheet
-ec14c524-b245-456d-abd6-ec12c746e9f8,https://zapier.com/apps/gmail/integrations/google-sheets/2618/save-new-gmail-emails-matching-certain-traits-to-a-google-spreadsheet
-cbf5fbda-425e-4619-bcf2-0ea8d4c0bfa3,https://zapier.com/apps/google-sheets/integrations/google-slides/13919/refresh-charts-on-a-google-slides-presentation-when-rows-are-updated-on-google-sheets
-a54284d0-7b93-4327-bfcc-3a421516dbdd,https://superuser.com/questions/655622/cannot-drag-images-from-thunderbird-to-word
-58565672-7bfe-48ab-b828-db349231de6b,https://superuser.com/questions/1792660/open-link-from-other-application-does-not-open-the-url-in-firefox
-6d72aad6-187a-4392-a4c4-ed87269c51cf,https://superuser.com/questions/923171/converting-openoffice-impress-presentation-to-video-without-screen-recording
-937087b6-f668-4ba6-9110-60682ee33441,https://superuser.com/questions/187440/set-default-ubuntu-video-player-as-vlc
-f8cfa149-d1c1-4215-8dac-4a0932bad3c2,https://superuser.com/questions/1803088/libreoffice-calc-clears-clipboard
-5e974913-6905-4c3f-8b65-d7837f3931cc,https://stackoverflow.com/questions/61856141/how-can-i-start-thunderbird-and-minimize-the-window-on-startup-in-ubuntu
-7c179dad-f1c7-4892-b53f-d1c4023d23c7,https://stackoverflow.com/questions/21155085/pasting-excel-tables-in-thunderbird-e-mail-client
-4a68b2dd-70f2-4532-9bc1-d21878bd8cb2,https://stackoverflow.com/questions/65669955/thunderbird-how-to-send-a-mail-to-all-receivers-of-a-folder
-c8457fde-b14b-4aba-b402-144842ea29e1,https://stackoverflow.com/questions/65788200/how-to-open-xlsx-files-in-ms-excel-from-vs-code
-81c425f5-78f3-4771-afd6-3d2973825947,1) https://www.youtube.com/watch?v=PkITfDF-aOw    2) https://www.zyxware.com/articles/3770/how-to-transfer-data-in-libreoffice-calc-to-libreoffice-writer-in-table-format
-bb83cab4-e5c7-42c7-a67b-e46068032b86,https://ask.libreoffice.org/t/save-impress-presentation-as-writer-document/5291/4
-227d2f97-562b-4ccb-ae47-a5ec9e142fbb,https://discourse.gnome.org/t/gimp-and-libre-office-writer/15430/4
-a6bbc08c-51e9-4ee4-9327-83d05075d960,https://forum.openoffice.org/en/forum/viewtopic.php?t=105055
-964e6e03-ba31-466b-8c15-5a351a81f675,https://www.maketecheasier.com/mail-merge-thunderbird-calc/
-2fe4b718-3bd7-46ec-bdce-b184f5653624,https://www.thewindowsclub.com/how-to-create-animated-gif-from-a-video-file-using-vlc-and-gimp
-d02b9364-6bb0-4c7e-9dbd-4db62822bc26,https://stackoverflow.com/questions/38306910/simple-python-script-to-get-a-libreoffice-base-field-and-play-on-vlc
-57fb469b-127a-46fa-8281-bbb3840efdf5,https://support.mozilla.org/en-US/questions/1150626
-3680a5ee-6870-426a-a997-eba929a0d25c,https://unix.stackexchange.com/questions/510850/how-to-open-calc-from-terminal-and-insert-files
-2d8c8a20-6f54-4c2e-ad56-61fbe7af6b78,https://www.quora.com/How-do-I-force-LibreOffice-Calc-to-recalculate-a-spreadsheet-from-the-command-line
-ee9a3c83-f437-4879-8918-be5efbb9fac7,https://stackoverflow.com/questions/64589140/convert-ods-to-csv-using-command-line-when-libreoffice-instance-is-running
-f7dfbef3-7697-431c-883a-db8583a4e4f9,https://www.thegeekdiary.com/libreoffice-command-examples-in-linux/
-2b9493d7-49b8-493a-a71b-56cd1f4d6908,https://devicetests.com/kill-libreoffice-writer-command-line-ubuntu
-51f5801c-18b3-4f25-b0c3-02f85507a078,https://github.com/danielrcollins1/ImpressExtractNotes
-81de345e-5473-4cb6-a74d-b6abf3475a6a,https://stackoverflow.com/questions/45588952/how-can-i-compose-and-send-email-in-thunderbird-from-commandline
-2c9fc0de-3ee7-45e1-a5df-c86206ad78b5,https://nikki-ricks.medium.com/how-to-use-git-add-commit-and-push-in-vs-code-and-command-line-35c0e8c47b62
-510f64c8-9bcc-4be1-8d30-638705850618,https://www.geeksforgeeks.org/how-to-start-vs-code-from-the-terminal-command-line/
-9ff484f7-5c09-4398-ae29-d5904e59e138,https://stackoverflow.com/questions/38606973/playing-opening-and-pausing-vlc-command-line-executed-from-python-scripts
-d9b7c649-c975-4f53-88f5-940b29c47247,Extract the first 1000 Gmail Emails from the current month in a new Google Sheets report - RPA Component | UiPath Marketplace | Overview
-be4ef0dc-0f70-4936-81d8-3cd2b04482f8,Table Data Extraction for Sales Opportunities to Excel workbook - RPA Component | UiPath Marketplace | Overview
-78aed49a-a710-4321-a793-b611a7c5b56b,Upload email attachments from Gmail to Google Drive - RPA Component | UiPath Marketplace | Overview
-897e3b53-5d4d-444b-85cb-2cdc8a97d903,Convert Word file to PDF and store in OneDrive - RPA Component | UiPath Marketplace | Overview
-4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc,Extract data from a new invoice file in Google Drive and store it in Google Sheets - RPA Component | UiPath Marketplace | Overview
-b52b40a5-ad70-4c53-b5b0-5650a8387052,Merge PDFs from Gmail email attachments and upload to Gogle Drive - RPA Component | UiPath Marketplace | Overview
-46407397-a7d5-4c6b-92c6-dbe038b1457b,Upload to Google Drive images from PDF attachments received via Gmail - RPA Component | UiPath Marketplace | Overview
-a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb,Backup important emails to OneDrive or SharePoint - RPA Component | UiPath Marketplace | Overview
-665f4af1-617d-4009-baff-84ff66071e6a,https://www.howtogeek.com/663927/how-to-open-google-chrome-using-command-prompt-on-windows-10/#open-chrome-straight-to-a-specific-website
-e6313b30-3903-4ed9-8c7d-4c47bf51fc96,https://stackoverflow.com/questions/12258086/how-do-i-run-google-chrome-as-root
\ No newline at end of file
+id,Source,InvolvedApp
+94d95f96-9699-4208-98ba-3c3119edf9c2,https://help.ubuntu.com/lts/ubuntu-help/addremove-install.html.en,OS
+bedcedc4-4d72-425e-ad62-21960b11fe0d,https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s,OS
+43c2d64c-bab5-4dcb-a30c-b888321c319a,https://ubuntu.com/tutorials/command-line-for-beginners#4-creating-folders-and-files,OS
+7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82,https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files,OS
+ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3,https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s,OS
+a462a795-fdc7-4b23-b689-e8b6df786b78,https://help.ubuntu.com/lts/ubuntu-help/shell-exit.html.en,OS
+f9be0997-4b7c-45c5-b05c-4612b44a6118,https://help.ubuntu.com/lts/ubuntu-help/shell-notifications.html.en,OS
+ae039631-2b12-4637-84f6-c67d51511be3,https://help.ubuntu.com/lts/ubuntu-help/net-default-browser.html.en,OS
+e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15,https://help.ubuntu.com/lts/ubuntu-help/contacts-add-remove.html.en,OS
+28cc3b7e-b194-4bc9-8353-d04c0f4d56d2,https://help.ubuntu.com/lts/ubuntu-help/sound-volume.html.en,OS
+5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57,https://help.ubuntu.com/lts/ubuntu-help/files-recover.html.en,OS
+e0df059f-28a6-4169-924f-b9623e7184cc,https://help.ubuntu.com/lts/ubuntu-help/files-rename.html.en,OS
+ddc75b62-7311-4af8-bfb3-859558542b36,https://help.ubuntu.com/lts/ubuntu-help/addremove-remove.html.en,OS
+5c433d22-ed9a-4e31-91f5-54cf3e8acd63,https://help.ubuntu.com/lts/ubuntu-help/session-language.html.zh-CN,OS
+b6781586-6346-41cd-935a-a6b1487918fc,https://help.ubuntu.com/lts/ubuntu-help/clock-timezone.html.en,OS
+b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa,https://help.ubuntu.com/lts/ubuntu-help/bluetooth-turn-on-off.html.en,OS
+3ce045a0-877b-42aa-8d2c-b4a863336ab8,https://help.ubuntu.com/lts/ubuntu-help/a11y-font-size.html.en,OS
+fe41f596-a71b-4c2f-9b2f-9dcd40b568c3,https://help.ubuntu.com/lts/ubuntu-help/power-percentage.html.en,OS
+a4d98375-215b-4a4d-aee9-3d4370fccc41,https://help.ubuntu.com/lts/ubuntu-help/privacy-screen-lock.html.en,OS
+765d2b74-88a7-4d50-bf51-34e4106fd24a,https://help.ubuntu.com/lts/ubuntu-help/files-delete.html.en,OS
+cc9d4f34-1ca0-4a1b-8ff2-09302696acb9,https://superuser.com/questions/178587/how-do-i-detach-a-process-from-terminal-entirely,OS
+5812b315-e7bd-4265-b51f-863c02174c28,https://superuser.com/questions/149404/create-an-ssh-user-who-only-has-permission-to-access-specific-folders,OS
+c56de254-a3ec-414e-81a6-83d2ce8c41fa,https://superuser.com/questions/28426/how-to-extract-text-with-ocr-from-a-pdf-on-linux,OS
+6ebbfb01-ea72-4226-a2a6-dc428e111ed2,https://superuser.com/questions/46748/how-do-i-make-bash-my-default-shell-on-ubuntu,OS
+4d2b519e-e872-4100-8ea3-fe71ab0f9133,https://stackoverflow.com/questions/11530090/adding-a-new-entry-to-the-path-variable-in-zsh,OS
+c288e301-e626-4b98-a1ab-159dcb162af5,https://stackoverflow.com/questions/41986507/unable-to-set-default-python-version-to-python3-in-ubuntu,OS
+13584542-872b-42d8-b299-866967b5c3ef,https://superuser.com/questions/72176/linux-set-default-terminal-size-and-screen-position,OS
+23393935-50c7-4a86-aeea-2b78fd089c5c,https://superuser.com/questions/91307/copying-only-jpg-from-a-directory-structure-to-another-location-linux,OS
+f10b16e1-c160-4cb3-989f-7b2ec89bc073,https://www.wikihow.com/Install-Gnome-on-Ubuntu,OS
+eb03d19a-b88d-4de4-8a64-ca0ac66f426b,https://www.youtube.com/shorts/t9JLUaT55UQ,MS Excel
+0bf05a7d-b28b-44d2-955a-50b41e24012a,https://www.youtube.com/shorts/FPAQaDTS8VY,MS Excel
+7b802dad-6e0f-4204-9815-d4e3f57627d8,https://www.youtube.com/shorts/Of-lzeP1usE,MS Excel
+7a4e4bc8-922c-4c84-865c-25ba34136be1,https://www.youtube.com/shorts/bvUhr1AHs44,MS Excel
+2bd59342-0664-4ccb-ba87-79379096cc08,https://www.youtube.com/shorts/L3Z-F1QTQFY,MS Excel
+a9f325aa-8c05-4e4f-8341-9e4358565f4f,https://www.youtube.com/shorts/A0gmEBRKXWs,MS Excel
+ecb0df7a-4e8d-4a03-b162-053391d3afaf,https://www.youtube.com/shorts/tXOovKn0H68,MS Excel
+7efeb4b1-3d19-4762-b163-63328d66303b,https://www.youtube.com/shorts/4jzXfZNhfmk,MS Excel
+4e6fcf72-daf3-439f-a232-c434ce416af6,https://www.youtube.com/shorts/0uxJccNCKcE,MS Excel
+6054afcb-5bab-4702-90a0-b259b5d3217c,https://www.youtube.com/shorts/JTbZ8sRxkdU,MS Excel
+abed40dc-063f-4598-8ba5-9fe749c0615d,https://www.youtube.com/shorts/xgf4ZpsEx5M,MS Excel
+01b269ae-2111-4a07-81fd-3fcd711993b0,https://www.youtube.com/shorts/VrUzPTIwQ04,MS Excel
+8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14,https://www.youtube.com/shorts/Hbcwu6IQ1ns,MS Excel
+af2b02f7-acee-4be4-8b66-499fab394915,https://www.youtube.com/shorts/AwKsb5VmtBI,MS Excel
+da1d63b8-fa12-417b-ba18-f748e5f770f3,https://www.youtube.com/shorts/hquscnbz2-U,MS Excel
+636380ea-d5f6-4474-b6ca-b2ed578a20f1,https://www.youtube.com/shorts/_BYL6VOHLGw,"MS Excel, Edge"
+5ba77536-05c5-4aae-a9ff-6e298d094c3e,https://www.youtube.com/shorts/CuBC1evUS5I,MS Excel
+4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b,https://www.youtube.com/shorts/1adQWfjN-tI,MS Excel
+672a1b02-c62f-4ae2-acf0-37f5fb3052b0,https://www.youtube.com/shorts/2rhdQXI4Lng,MS Excel
+648fe544-16ba-44af-a587-12ccbe280ea6,https://www.youtube.com/shorts/sOPBMWaC6Uc,MS Excel
+8985d1e4-5b99-4711-add4-88949ebb2308,https://www.youtube.com/shorts/J5ts2Acv9Pc,MS Excel
+9e606842-2e27-43bf-b1d1-b43289c9589b,https://www.youtube.com/shorts/B-mGYDFOyUs,MS Excel
+fcb6e45b-25c4-4087-9483-03d714f473a9,https://www.youtube.com/shorts/GZipp7nOZS0,MS Excel
+68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2,https://www.youtube.com/shorts/JEH5TsK-cCk,"MS Excel, Edge"
+fff629ea-046e-4793-8eec-1a5a15c3eb35,https://www.youtube.com/shorts/8WybtCdUT6w,MS Excel
+5c9a206c-bb00-4fb6-bb46-ee675c187df5,https://www.youtube.com/shorts/VbQtMNnq9i4,MS Excel
+e975ae74-79bd-4672-8d1c-dc841a85781d,https://www.youtube.com/shorts/GjT7gGe5Sr8,MS Excel
+34a6938a-58da-4897-8639-9b90d6db5391,https://www.youtube.com/shorts/gW37x2TkzOY,MS Excel
+b5a22759-b4eb-4bf2-aeed-ad14e8615f19,https://www.youtube.com/shorts/3xLa-D0C7Ic,MS Excel
+2f9913a1-51ed-4db6-bfe0-7e1c95b3139e,https://www.youtube.com/shorts/dGLRcmfVO6Q,MS Excel
+2558031e-401d-4579-8e00-3ecf540fb492,https://www.mrexcel.com/board/threads/sales-for-the-first-6-weeks.1249213/,MS Excel
+39aa4e37-dc91-482e-99af-132a612d40f3,https://www.libreofficehelp.com/add-insert-delete-copy-move-rename-a-worksheet-in-libreoffice-calc/,LibreOffice Calc
+0cecd4f3-74de-457b-ba94-29ad6b5dafb6,https://www.libreofficehelp.com/add-insert-delete-copy-move-rename-a-worksheet-in-libreoffice-calc/,LibreOffice Calc
+4188d3a4-077d-46b7-9c86-23e1a036f6c1,https://www.libreofficehelp.com/freeze-unfreeze-rows-columns-ranges-calc/,LibreOffice Calc
+51b11269-2ca8-4b2a-9163-f21758420e78,https://www.reddit.com/r/LibreOfficeCalc/comments/186pcc6/how_to_arrange_numbers_in_a_column_from_minimum/,LibreOffice Calc
+7e429b8d-a3f0-4ed0-9b58-08957d00b127,https://medium.com/@divyangichaudhari17/how-to-use-vlookup-and-hlookup-in-libre-calc-3370698bb3ff,LibreOffice Calc
+f5a90742-3fa2-40fc-a564-f29b054e0337,https://superuser.com/questions/1236149/libreoffice-calc-how-to-apply-functions-to-columns,LibreOffice Calc
+22df9241-f8d7-4509-b7f1-37e501a823f7,https://superuser.com/questions/1767185/how-do-you-move-cells-in-libreoffice-calc,LibreOffice Calc
+1434ca3e-f9e3-4db8-9ca7-b4c653be7d17,https://www.wikihow.com/Remove-Duplicates-in-Open-Office-Calc,LibreOffice Calc
+347ef137-7eeb-4c80-a3bb-0951f26a8aff,https://www.youtube.com/watch?v=bgO40-CjYNY,LibreOffice Calc
+6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5,https://www.youtube.com/watch?v=nl-nXjJurhQ,LibreOffice Calc
+3aaa4e37-dc91-482e-99af-132a612d40f3,https://www.quora.com/How-can-you-import-export-CSV-files-with-LibreOffice-Calc-or-OpenOffice,LibreOffice Calc
+0decd4f3-74de-457b-ba94-29ad6b5dafb6,https://justclickhere.co.uk/resources/checkboxes-tick-boxes-libreoffice-calc/,LibreOffice Calc
+37608790-6147-45d0-9f20-1137bb35703d,https://www.youtube.com/shorts/uzPo_CPCHH8,MS Excel
+f9584479-3d0d-4c79-affa-9ad7afdd8850,https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb,LibreOffice Calc
+d681960f-7bc3-4286-9913-a8812ba3261a,https://www.youtube.com/shorts/d7U1S_IsTVM,LibreOffice Calc
+f6a90742-3fa2-40fc-a564-f29b054e0337,https://www.excel-easy.com/examples/drop-down-list.html,LibreOffice Calc
+21df9241-f8d7-4509-b7f1-37e501a823f7,https://www.youtube.com/watch?v=p5C4V_AO1UU,LibreOffice Calc
+1334ca3e-f9e3-4db8-9ca7-b4c653be7d17,https://techcommunity.microsoft.com/t5/excel/excel-workbook-top-way-too-big-can-t-see-rows-and-columns/m-p/4014694,LibreOffice Calc
+357ef137-7eeb-4c80-a3bb-0951f26a8aff,https://www.reddit.com/r/excel/comments/17zny8u/calculating_total_amount_earned_from_total_hours/,LibreOffice Calc
+6f99a1ad-07d2-4b66-a1ce-ece6d99c20a5,https://techcommunity.microsoft.com/t5/excel/sumarize-the-sheetnames/m-p/4014716,LibreOffice Calc
+aa3a8974-2e85-438b-b29e-a64df44deb4b,https://www.quora.com/Libre-Office-Calc-How-do-I-resize-all-cells-in-a-sheet-to-make-them-fit-to-1-page-for-printing-and-exporting-as-PDF,LibreOffice Calc
+a01fbce3-2793-461f-ab86-43680ccbae25,https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc,LibreOffice Calc
+4f07fbe9-70de-4927-a4d5-bb28bc12c52c,https://superuser.com/questions/1081048/libreoffice-calc-how-to-pad-number-to-fixed-decimals-when-used-within-formula,LibreOffice Calc
+e3b1d5fa-ed00-4129-bda1-1452bd2b6772,https://www.reddit.com/r/libreoffice/comments/tel112/calc_how_to_calculate_sum_by_categories/,LibreOffice Calc
+ca6a9524-f8e9-4d2f-9364-ab0cad567739,https://www.reddit.com/r/libreoffice/comments/113gmyc/how_to_remove_certain_text_from_cells_in_calc/,LibreOffice Calc
+a455e8d0-930f-40d2-9575-5e8d2d222f58,https://superuser.com/questions/562944/quickly-fill-blank-cells-in-a-list-in-libreoffice-calc,LibreOffice Calc
+83ee22c6-7737-49ce-9b5a-138c3e92af04,https://superuser.com/questions/661102/currency-conversion-in-libreoffice-calc,LibreOffice Calc
+819f61c2-ec77-4d3f-9996-0838ae5aacc8,https://superuser.com/questions/381696/creating-a-column-of-working-days-in-libreoffice-calc,LibreOffice Calc
+69d577b3-004e-4bca-89b2-0d7c2f6049e3,https://superuser.com/questions/387106/libreoffice-calc-how-to-get-total-for-hhmmss-cells,LibreOffice Calc
+0a1bf4ca-d4ea-4618-baa5-6e8dc1b46d82,https://superuser.com/questions/571915/sum-up-to-n-highest-value-out-of-a-series,LibreOffice Calc
+ac9bb6cb-1888-43ab-81e4-a98a547918cd,https://superuser.com/questions/1674211/how-to-change-colour-of-slide-number-in-libre-office,LibreOffice Impress
+5d901039-a89c-4bfb-967b-bf66f4df075e,https://superuser.com/questions/986776/how-can-i-stretch-an-image-in-a-libreoffice-impress-presentation-to-fill-the-pag,LibreOffice Impress
+071d4ace-091a-4ec3-886e-f4be55ae375d,https://superuser.com/questions/706860/hide-slide-numbers-and-slide-footer-on-first-and-second-slide-in-libreoffice-imp?rq=1,LibreOffice Impress
+550ce7e7-747b-495f-b122-acdc4d0b8e54,"https://technical-tips.com/blog/software/text-in-libreoffice-strikethrough--6948#:~:text=To%20strikethrough%20Text%20in%20LibreOffice%201%20In%20your,effect%22%20can%20your%20additionally%2C%20for%20example%2C%20double%20underline.",LibreOffice Impress
+455d3c66-7dc6-4537-a39a-36d3e9119df7,"https://www.libreofficehelp.com/export-libreoffice-impress-slides-images/#:~:text=Exporting%20a%20single%20slide%20as.jpg%2C.png%2C%20etc%20image%20is,on%20the%20checkbox%20Selection.%20Provide%20jpg%20quality%20options.",LibreOffice Impress
+af23762e-2bfd-4a1d-aada-20fa8de9ce07,https://superuser.com/questions/1059080/how-to-make-a-summary-slide-in-impress-listing-the-titles-of-all-slides-autom,LibreOffice Impress
+c59742c0-4323-4b9d-8a02-723c251deaa0,https://www.reddit.com/r/libreoffice/comments/17lcdrp/audio_not_supported_in_libreoffice_impress/,LibreOffice Impress
+39478d4a-1049-456f-aa77-407811393add,https://www.reddit.com/r/libreoffice/comments/jul3o8/putting_cap_or_hat_or_carat_symbol_in_libre/,LibreOffice Impress
+c3ad4442-499f-4e58-bc4e-1a1417ea9b8c,http://maharajacollege.ac.in/material/Libreofficeimpresspdf.pdf,LibreOffice Impress
+ef9d12bd-bcee-4ba0-a40e-918400f43ddf,https://www.reddit.com/r/libreoffice/comments/18elh3y/i_closed_the_slide_pannel_on_the_left_and_idk_how/,LibreOffice Impress
+9ec204e4-f0a3-42f8-8458-b772a6797cab,https://www.tiktok.com/@lil.d1rt_/video/7247574148887629083,LibreOffice Impress
+0f84bef9-9790-432e-92b7-eece357603fb,https://stackoverflow.com/questions/29036788/how-to-disable-libreoffice-impress-to-use-multiple-display,LibreOffice Impress
+ce88f674-ab7a-43da-9201-468d38539e4a,https://justclickhere.co.uk/resources/change-slides-in-impress-to-portrait/,LibreOffice Impress
+f0a334af-f91b-4c03-b578-aac9bec2b543,https://www.libreofficehelp.com/insert-video-impress-presentation/#Inserting_a_Video_in_Impress,LibreOffice Impress
+3b27600c-3668-4abd-8f84-7bcdebbccbdb,https://www.libreofficehelp.com/change-slide-background-impress/#All_Slides,LibreOffice Impress
+a097acff-6266-4291-9fbd-137af7ecd439,https://www.youtube.com/watch?v=DDmEvjs4iBw,LibreOffice Impress
+21760ecb-8f62-40d2-8d85-0cee5725cb72,https://www.libreofficehelp.com/add-animations-transitions-libreoffice-impress-slides/,LibreOffice Impress
+3cc4f35d-fa2e-4555-afb9-741b7c062a74,https://documentation.libreoffice.org/assets/Uploads/Documentation/en/IG7.6/IG76-ImpressGuide.pdf,LibreOffice Impress
+6ada715d-3aae-4a32-a6a7-429b2e43fb93,https://www.quora.com/How-do-you-insert-images-into-a-LibreOffice-Writer-document,LibreOffice Writer
+ecc2413d-8a48-416e-a3a2-d30106ca36cb,https://www.quora.com/How-can-I-insert-a-blank-page-on-libreoffice,LibreOffice Writer
+0e47de2a-32e0-456c-a366-8c607ef7a9d2,https://ask.libreoffice.org/t/how-to-start-page-numbering-on-a-certain-page/39931/4,LibreOffice Writer
+4bcb1253-a636-4df4-8cb0-a35c04dfef31,https://www.libreofficehelp.com/save-export-writer-documents-in-pdf-epub-format/,LibreOffice Writer
+0810415c-bde4-4443-9047-d5f70165a697,https://www.youtube.com/watch?v=Q_AaL6ljudU,LibreOffice Writer
+e528b65e-1107-4b8c-8988-490e4fece599,https://www.youtube.com/watch?v=l25Evu4ohKg,LibreOffice Writer
+66399b0d-8fda-4618-95c4-bfc6191617e9,https://www.youtube.com/watch?v=l25Evu4ohKg,LibreOffice Writer
+936321ce-5236-426a-9a20-e0e3c5dc536f,https://www.youtube.com/watch?v=l25Evu4ohKg,LibreOffice Writer
+663876c7-3471-43db-ba51-f410b13d9d7d,https://askubuntu.com/questions/319593/how-to-type-science-equations-in-libre-office,LibreOffice Writer
+3ef2b351-8a84-4ff2-8724-d86eae9b842e,https://askubuntu.com/questions/1066351/how-do-you-center-align-in-libreoffice#:~:text=Ctrl%20%2B%20e%20will%20Center%20align%20the%20cursor%20for%20you.,LibreOffice Writer
+45d61a06-6545-4422-97b7-bc76cfa964c1,https://stackoverflow.com/questions/71685737/how-to-replace-all-newlines-with-paragraph-marks-in-libreoffice-write,LibreOffice Writer
+0b17a146-2934-46c7-8727-73ff6b6483e8,https://askubuntu.com/questions/245695/how-do-you-insert-subscripts-and-superscripts-into-ordinary-non-formula-text-i,LibreOffice Writer
+0e763496-b6bb-4508-a427-fad0b6c3e195,https://ask.libreoffice.org/t/how-do-i-change-the-font-for-the-whole-document-in-writer/9220,LibreOffice Writer
+f178a4a9-d090-4b56-bc4c-4b72a61a035d,https://ask.libreoffice.org/t/how-do-i-make-times-new-roman-the-default-font-in-lo/64604,LibreOffice Writer
+0a0faba3-5580-44df-965d-f562a99b291c,https://stackoverflow.com/questions/64528055/how-to-make-part-of-my-sentence-left-aligned-and-rest-as-right-aligned,LibreOffice Writer
+e246f6d8-78d7-44ac-b668-fcf47946cb50,https://ask.libreoffice.org/t/how-to-change-text-size-color-of-italic-font/77712,LibreOffice Writer
+8472fece-c7dd-4241-8d65-9b3cd1a0b568,https://stackoverflow.com/questions/37259827/libreoffice-writer-how-to-set-different-colors-to-each-letter,LibreOffice Writer
+88fe4b2d-3040-4c70-9a70-546a47764b48,https://stackoverflow.com/questions/56554555/libreoffice-writer-how-to-create-empty-line-space-after-every-period-in-a-par,LibreOffice Writer
+6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2,https://superuser.com/questions/762500/how-do-i-find-all-highlighted-text-in-libreoffice-writer,LibreOffice Writer
+d53ff5ee-3b1a-431e-b2be-30ed2673079b,https://ask.libreoffice.org/t/how-to-convert-all-uppercase-to-lowercase/53341,LibreOffice Writer
+72b810ef-4156-4d09-8f08-a0cf57e7cefe,https://superuser.com/questions/657792/libreoffice-writer-how-to-apply-strikethrough-text-formatting?rq=1,LibreOffice Writer
+6f81754e-285d-4ce0-b59e-af7edb02d108,https://superuser.com/questions/789473/remove-duplicate-lines-in-libreoffice-openoffice-writer,LibreOffice Writer
+41c621f7-3544-49e1-af8d-dafd0f834f75,https://superuser.com/questions/1668018/how-to-auto-format-lines-in-libre-office-writer,LibreOffice Writer
+b21acd93-60fd-4127-8a43-2f5178f4a830,https://superuser.com/questions/1097199/how-can-i-double-space-a-document-in-libreoffice?rq=1,LibreOffice Writer
+59f21cfb-0120-4326-b255-a5b827b38967,https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/media.html#playing-a-file,VLC player
+8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89,https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/recording/playing.html#choose-your-recordings-folder,VLC player
+8f080098-ddb1-424c-b438-4e96e5e4786e,https://medium.com/@jetscribe_ai/how-to-extract-mp3-audio-from-videos-using-vlc-media-player-beeef644ebfb,VLC player
+bba3381f-b5eb-4439-bd9e-80c22218d5a7,https://www.quora.com/How-do-I-play-online-videos-using-the-VLC-media-player,VLC player
+a1c3ab35-02de-4999-a7ed-2fd12c972c6e,https://www.quora.com/How-do-I-compress-a-video-with-VLC,VLC player
+fba2c100-79e8-42df-ae74-b592418d54f4,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s,VLC player
+d70666e4-7348-42c7-a06a-664094c5df3c,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s,VLC player
+efcf0d81-0835-4880-b2fd-d866e8bc2294,"https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s, https://help.ubuntu.com/stable/ubuntu-help/look-background.html.en",VLC player
+8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s,VLC player
+aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6,https://videoconverter.wondershare.com/vlc/how-to-rotate-a-video-using-vlc.html?gad_source=1&gclid=CjwKCAiA-vOsBhAAEiwAIWR0TaGSOLkYiBeVQGZSyfeUP3g-tIvYxffl5RFIu0-zrUL1IF41eCw1JRoCnCMQAvD_BwE,VLC player
+386dbd0e-0241-4a0a-b6a2-6704fba26b1c,https://superuser.com/questions/1708415/pause-and-play-vlc-in-background?rq=1,VLC player
+9195653c-f4aa-453d-aa95-787f6ccfaae9,https://superuser.com/questions/1513285/how-can-i-increase-the-maximum-volume-output-by-vlc?rq=1,VLC player
+5ac2891a-eacd-4954-b339-98abba077adb,"https://superuser.com/questions/1412810/how-to-prevent-vlc-media-player-from-auto-closing-after-video-end#:%7E:text=Click%20on%20%22Media%22on%20the,VLC%20player%20after%20video%20ending",VLC player
+0d95d28a-9587-433b-a805-1fbe5467d598,https://superuser.com/questions/1299036/vlc-how-to-open-the-folder-of-the-current-playing-video?noredirect=1&lq=1,VLC player
+d06f0d4d-2cd5-4ede-8de9-598629438c6e,https://superuser.com/questions/1039392/changing-colour-of-vlc-volume-slider,VLC player
+a5bbbcd5-b398-4c91-83d4-55e1e31bbb81,https://superuser.com/questions/776056/how-to-hide-bottom-toolbar-in-vlc,VLC player
+f3977615-2b45-4ac5-8bba-80c17dbe2a37,https://www.reddit.com/r/Fedora/comments/rhljzd/how_to_run_multiple_instances_of_vlc_media_player/,VLC player
+c669a35f-d45a-450e-b1f2-f473748337bb,https://www.quora.com/How-do-I-fast-forward-a-video-in-VLC-player,VLC player
+d1ba14d0-fef8-4026-8418-5b581dc68ca0,https://superuser.com/questions/306154/how-to-use-a-b-repeat-feature-of-vlc,VLC player
+215dfd39-f493-4bc3-a027-8a97d72c61bf,https://superuser.com/questions/1224784/how-to-change-vlcs-splash-screen,VLC player
+bb5e4c0d-f964-439c-97b6-bdb9747de3f4,https://www.wikihow.com/Remove-an-Email-Account-from-Thunderbird,ThunderBird
+7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,https://www.wikihow.com/Access-Gmail-With-Mozilla-Thunderbird,ThunderBird
+b188fe10-ae67-4db8-a154-26a0b8ff8f1e,https://www.reddit.com/r/Thunderbird/comments/17vv2os/restore_readability_in_message_list_pane/,ThunderBird
+12086550-11c0-466b-b367-1d9e75b3910e,https://www.bitrecover.com/blog/manage-thunderbird-profiles/,ThunderBird
+06fe7178-4491-4589-810f-2e2bc9502122,https://www.quora.com/How-do-I-backup-email-files-in-Mozilla-Thunderbird,ThunderBird
+6766f2b8-8a72-417f-a9e5-56fcaa735837,"https://www.adsigner.com/user-manual/signatures/setup-email-client-thunderbird/#:~:text=is%20probably%20hidden.-,Right%20click%20on%20the%20empty%20space%20at%20the%20top%20of,signature%20from%20a%20file%20instead.",ThunderBird
+e1e75309-3ddb-4d09-92ec-de869c928143,https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters,ThunderBird
+3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5,https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters,ThunderBird
+35253b65-1c19-4304-8aa4-6884b8218fc0,https://support.mozilla.org/en-US/questions/1259354,ThunderBird
+d088f539-cab4-4f9a-ac92-9999fc3a656e,https://support.mozilla.org/en-US/kb/how-use-attachments,ThunderBird
+2ad9387a-65d8-4e33-ad5b-7580065a27ca,"https://support.mozilla.org/bm/questions/1027435, https://www.wikihow.tech/Create-Folders-in-Mozilla-Thunderbird",ThunderBird
+480bcfea-d68f-4aaa-a0a9-2589ef319381,https://www.reddit.com/r/Thunderbird/comments/182dg5p/unified_inbox_howto/,ThunderBird
+37b9808f-b2b4-4177-ab00-9ddfae4bad27,https://www.quora.com/How-can-I-schedule-Mozilla-Thunderbird-to-turn-off-automatically,ThunderBird
+af630914-714e-4a24-a7bb-f9af687d3b91,https://stackoverflow.com/questions/11333148/adding-a-toolbar-button-to-a-thundebird-compose-message-window?rq=3,ThunderBird
+3299584d-8f11-4457-bf4c-ce98f7600250,https://superuser.com/questions/1643561/would-like-to-see-the-email-address-from-sender-in-the-column,ThunderBird
+030eeff7-b492-4218-b312-701ec99ee0cc,https://superuser.com/questions/1781004/how-do-i-remove-the-indentation-and-character-in-quoted-text-of-a-reply-mess,ThunderBird
+94760984-3ff5-41ee-8347-cf1af709fea0,https://superuser.com/questions/1757333/how-can-i-view-thunderbird-in-full-dark-mode,ThunderBird
+99146c54-4f37-4ab8-9327-5f3291665e1e,https://superuser.com/questions/1764409/how-to-send-email-with-thunderbird-without-configuring-an-incoming-email-service,ThunderBird
+9656a811-9b5b-4ddf-99c7-5117bcef0626,https://superuser.com/questions/205240/is-there-a-way-to-get-a-popup-confirmation-box-when-you-send-an-email-in-thunder?rq=1,ThunderBird
+c9e7eaf2-b1a1-4efc-a982-721972fa9f02,https://superuser.com/questions/544480/how-to-apply-automatic-message-filters-to-subfolders-too?noredirect=1&lq=1,ThunderBird
+bb5e4c0d-f964-439c-97b6-bdb9747de3f4,https://support.google.com/chrome/answer/95426?sjid=16867045591165135686-AP,Chrome
+7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site,Chrome
+12086550-11c0-466b-b367-1d9e75b3910e,https://www.quora.com/What-are-the-cool-tricks-to-use-Google-Chrome,Chrome
+06fe7178-4491-4589-810f-2e2bc9502122,https://www.wikihow.com/Switch-Tabs-in-Chrome,Chrome
+6766f2b8-8a72-417f-a9e5-56fcaa735837,https://support.google.com/chrome/thread/205881926/it-s-possible-to-load-unpacked-extension-automatically-in-chrome?hl=en,Chrome
+e1e75309-3ddb-4d09-92ec-de869c928143,https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php,Chrome
+3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5,https://in5stepstutorials.com/google-chrome/add-change-delete-autofill-address.php,Chrome
+35253b65-1c19-4304-8aa4-6884b8218fc0,"https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome, https://www.reddit.com/r/chrome/comments/13xcbap/crete_shortcut_option_missing/",Chrome
+d088f539-cab4-4f9a-ac92-9999fc3a656e,https://medium.com/@inkverseuk2/useful-tips-and-tricks-for-the-google-chrome-browser-ac7d0d24b3cc,Chrome
+2ad9387a-65d8-4e33-ad5b-7580065a27ca,https://www.youtube.com/watch?v=IN-Eq_UripQ,Chrome
+7a5a7856-f1b6-42a4-ade9-1ca81ca0f263,https://www.youtube.com/watch?v=ZaZ8GcTxjXA,Chrome
+3720f614-37fd-4d04-8a6b-76f54f8c222d,https://superuser.com/questions/984668/change-interface-language-of-chrome-to-english,Chrome
+b63059a2-53bc-4163-a89f-3ac948c74081,https://superuser.com/questions/1303418/how-do-i-make-chrome-block-absolutely-all-pop-ups?rq=1,Chrome
+44ee5668-ecd5-4366-a6ce-c1c9b8d4e938,https://superuser.com/questions/1787991/clear-browsing-history-from-specific-site-on-chrome,Chrome
+b5ebc8c6-6329-4373-85b4-9421c97375e9,https://superuser.com/questions/364470/is-there-a-way-to-view-google-chrome-browsing-history-past-three-months-ago?rq=1,Chrome
+93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9,https://superuser.com/questions/1417973/how-to-disable-google-chrome-dark-mode,Chrome
+2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3,https://superuser.com/questions/1393683/how-to-change-the-username-in-google-chrome-profiles?rq=1,Chrome
+480bcfea-d68f-4aaa-a0a9-2589ef319381,https://bugartisan.medium.com/disable-the-new-chrome-ui-round-in-2023-db168271f71e,Chrome
+37b9808f-b2b4-4177-ab00-9ddfae4bad27,https://www.reddit.com/r/chrome/comments/17niw3h/tutorial_how_to_disable_the_download_bubble_in/,Chrome
+af630914-714e-4a24-a7bb-f9af687d3b91,https://www.howtogeek.com/680260/how-to-change-chromes-default-text-size/,Chrome
+ae78f875-5b98-4907-bbb5-9c737fc68c03,https://support.google.com/chrome/thread/219988391/increase-search-results-per-page?hl=en,Chrome
+0ed39f63-6049-43d4-ba4d-5fa2fe04a951,https://www.quora.com/How-do-you-find-and-replace-text-in-Visual-Studio-Code,VS Code
+b421106e-b282-4c41-af72-37c95493f95f,https://stackoverflow.com/questions/74153883/launch-vscode-with-new-txt-file,VS Code
+53ad5833-3455-407b-bbc6-45b4c79ab8fb,https://www.youtube.com/watch?v=VqCgcpAypFQ,VS Code
+eabc805a-bfcf-4460-b250-ac92135819f6,https://www.youtube.com/watch?v=VqCgcpAypFQ,VS Code
+3486f395-ad68-459c-8c39-ea07de934dd4,https://www.youtube.com/watch?v=VqCgcpAypFQ,VS Code
+982d12a5-beab-424f-8d38-d2a48429e511,https://www.youtube.com/watch?v=ORrELERGIHs,VS Code
+4e60007a-f5be-4bfc-9723-c39affa0a6d3,"https://campbell-muscle-lab.github.io/howtos_Python/pages/documentation/best_practices/vscode_docstring_extension/vscode_docstring_extension.html#:~:text=Type%2C%20Ctrl%20%2B%20Shift%20%2B%20P,select%20the%20NumPy%20docstring%20format.",VS Code
+e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2,https://superuser.com/questions/1386061/how-to-suppress-some-python-errors-warnings-in-vs-code,VS Code
+9439a27b-18ae-42d8-9778-5f68f891805e,https://stackoverflow.com/questions/75832474/how-to-keep-cursor-in-debug-console-when-debugging-in-visual-studio-code,VS Code
+ae506c68-352c-4094-9caa-ee9d42052317,https://superuser.com/questions/1460404/get-visual-studio-code-terminal-history?rq=1,VS Code
+ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae,https://superuser.com/questions/1748097/vs-code-disable-tree-view-find-explorer-search,VS Code
+c714dcee-cad3-4e12-8f3c-12bdcfcdb048,https://superuser.com/questions/1417361/how-to-disable-file-filtering-in-vs-code-sidebar-explorer?rq=1,VS Code
+930fdb3b-11a8-46fe-9bac-577332e2640e,https://superuser.com/questions/1270103/how-to-switch-the-cursor-between-terminal-and-code-in-vscode,VS Code
+276cc624-87ea-4f08-ab93-f770e3790175,https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code,VS Code
+9d425400-e9b2-4424-9a4b-d4c7abac4140,https://superuser.com/questions/1466771/is-there-a-way-to-make-editor-tabs-stack-in-vs-code,VS Code
+7a4deb26-d57d-4ea9-9a73-630f66a7b568,https://www.quora.com/How-do-I-edit-a-photo-in-GIMP,GIMP
+554785e9-4523-4e7a-b8e1-8016f565f56a,https://www.quora.com/How-do-I-edit-a-photo-in-GIMP,GIMP
+77b8ab4d-994f-43ac-8930-8ca087d7c4b4,https://superuser.com/questions/1636113/how-to-get-gimp-to-recognize-images-or-pictures-folder-as-the-default-folder-for,GIMP
+f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce,https://superuser.com/questions/612338/how-do-i-select-and-move-an-object-in-gimp,GIMP
+d52d6308-ec58-42b7-a2c9-de80e4837b2b,https://superuser.com/questions/1447106/how-to-get-rid-of-the-gimp-tool-options-box,GIMP
+2a729ded-3296-423d-aec4-7dd55ed5fbb3,https://www.youtube.com/watch?v=lOzSiOIipSM,GIMP
+b148e375-fe0b-4bec-90e7-38632b0d73c2,https://www.quora.com/How-do-I-add-layers-in-GIMP,GIMP
+a746add2-cab0-4740-ac36-c3769d9bfb46,https://www.youtube.com/watch?v=_L_MMU22bAw,GIMP
+7b7617bd-57cc-468e-9c91-40c4ec2bcb3d,https://www.youtube.com/watch?v=G_PjQAy0iiU,GIMP
+d16c99dc-2a1e-46f2-b350-d97c86c85c15,https://stackoverflow.com/questions/75185543/use-gimp-to-resize-image-in-one-layer-only,GIMP
+573f79b5-abfe-4507-b455-251d45fe6198,https://stackoverflow.com/questions/45196895/gimp-add-padding-to-multiple-images,GIMP
+06ca5602-62ca-47f6-ad4f-da151cde54cc,https://stackoverflow.com/questions/74664666/how-to-export-palette-based-png-in-gimp,GIMP
+fa9b1e10-4d2d-4a13-af76-7efa822b6a8b,https://stackoverflow.com/questions/24626608/how-to-combine-several-png-images-as-layers-in-a-single-xcf-image,GIMP
+6b2b72ed-3a10-4849-876a-750f7cdf3886,https://stackoverflow.com/questions/21018007/resize-image-to-fit-canvas-gimp,GIMP
+d0e42fd2-d290-46b3-b598-a6e2b7be9c85,https://stackoverflow.com/questions/56758689/stop-gimp-from-merging-layers-when-de-selecting,GIMP
+e2dd0213-26db-4349-abe5-d5667bfd725c,https://superuser.com/questions/839650/how-to-move-an-inserted-text-box-in-gimp,GIMP
+f723c744-e62c-4ae6-98d1-750d3cd7d79d,https://www.reddit.com/r/GIMP/comments/12e57w8/how_to_use_gimp_to_exaggerate_contrast/,GIMP
+8d6b1c9c-1aab-47fe-9ba5-e84c838d0c57,https://www.quora.com/How-can-email-attachments-be-converted-into-a-word-document-using-Mozilla-Thunderbird,multiple
+11e1e614-9696-4d94-88c9-8e556880d41a,https://ifttt.com/applets/L2A89geP-send-chrome-software-update-release-alerts-to-email,multiple
+57956154-f0fe-486b-88b8-e7126da035a9,https://zapier.com/apps/email/integrations/google-sheets/547/get-email-notifications-for-new-rows-in-a-google-sheets-spreadsheet,multiple
+ec14c524-b245-456d-abd6-ec12c746e9f8,https://zapier.com/apps/gmail/integrations/google-sheets/2618/save-new-gmail-emails-matching-certain-traits-to-a-google-spreadsheet,multiple
+cbf5fbda-425e-4619-bcf2-0ea8d4c0bfa3,https://zapier.com/apps/google-sheets/integrations/google-slides/13919/refresh-charts-on-a-google-slides-presentation-when-rows-are-updated-on-google-sheets,multiple
+a54284d0-7b93-4327-bfcc-3a421516dbdd,https://superuser.com/questions/655622/cannot-drag-images-from-thunderbird-to-word,multiple
+58565672-7bfe-48ab-b828-db349231de6b,https://superuser.com/questions/1792660/open-link-from-other-application-does-not-open-the-url-in-firefox,multiple
+6d72aad6-187a-4392-a4c4-ed87269c51cf,https://superuser.com/questions/923171/converting-openoffice-impress-presentation-to-video-without-screen-recording,multiple
+937087b6-f668-4ba6-9110-60682ee33441,https://superuser.com/questions/187440/set-default-ubuntu-video-player-as-vlc,multiple
+f8cfa149-d1c1-4215-8dac-4a0932bad3c2,https://superuser.com/questions/1803088/libreoffice-calc-clears-clipboard,multiple
+5e974913-6905-4c3f-8b65-d7837f3931cc,https://stackoverflow.com/questions/61856141/how-can-i-start-thunderbird-and-minimize-the-window-on-startup-in-ubuntu,multiple
+7c179dad-f1c7-4892-b53f-d1c4023d23c7,https://stackoverflow.com/questions/21155085/pasting-excel-tables-in-thunderbird-e-mail-client,multiple
+4a68b2dd-70f2-4532-9bc1-d21878bd8cb2,https://stackoverflow.com/questions/65669955/thunderbird-how-to-send-a-mail-to-all-receivers-of-a-folder,multiple
+c8457fde-b14b-4aba-b402-144842ea29e1,https://stackoverflow.com/questions/65788200/how-to-open-xlsx-files-in-ms-excel-from-vs-code,multiple
+81c425f5-78f3-4771-afd6-3d2973825947,https://www.zyxware.com/articles/3770/how-to-transfer-data-in-libreoffice-calc-to-libreoffice-writer-in-table-format,multiple
+bb83cab4-e5c7-42c7-a67b-e46068032b86,https://ask.libreoffice.org/t/save-impress-presentation-as-writer-document/5291/4,multiple
+227d2f97-562b-4ccb-ae47-a5ec9e142fbb,https://discourse.gnome.org/t/gimp-and-libre-office-writer/15430/4,multiple
+a6bbc08c-51e9-4ee4-9327-83d05075d960,https://forum.openoffice.org/en/forum/viewtopic.php?t=105055,multiple
+964e6e03-ba31-466b-8c15-5a351a81f675,https://www.maketecheasier.com/mail-merge-thunderbird-calc/,multiple
+2fe4b718-3bd7-46ec-bdce-b184f5653624,https://www.thewindowsclub.com/how-to-create-animated-gif-from-a-video-file-using-vlc-and-gimp,multiple
+d02b9364-6bb0-4c7e-9dbd-4db62822bc26,https://stackoverflow.com/questions/38306910/simple-python-script-to-get-a-libreoffice-base-field-and-play-on-vlc,multiple
+57fb469b-127a-46fa-8281-bbb3840efdf5,https://support.mozilla.org/en-US/questions/1150626,multiple
+3680a5ee-6870-426a-a997-eba929a0d25c,https://unix.stackexchange.com/questions/510850/how-to-open-calc-from-terminal-and-insert-files,multiple
+2d8c8a20-6f54-4c2e-ad56-61fbe7af6b78,https://www.quora.com/How-do-I-force-LibreOffice-Calc-to-recalculate-a-spreadsheet-from-the-command-line,multiple
+ee9a3c83-f437-4879-8918-be5efbb9fac7,https://stackoverflow.com/questions/64589140/convert-ods-to-csv-using-command-line-when-libreoffice-instance-is-running,multiple
+f7dfbef3-7697-431c-883a-db8583a4e4f9,https://www.thegeekdiary.com/libreoffice-command-examples-in-linux/,multiple
+2b9493d7-49b8-493a-a71b-56cd1f4d6908,https://devicetests.com/kill-libreoffice-writer-command-line-ubuntu,multiple
+51f5801c-18b3-4f25-b0c3-02f85507a078,https://github.com/danielrcollins1/ImpressExtractNotes,multiple
+81de345e-5473-4cb6-a74d-b6abf3475a6a,https://stackoverflow.com/questions/45588952/how-can-i-compose-and-send-email-in-thunderbird-from-commandline,multiple
+2c9fc0de-3ee7-45e1-a5df-c86206ad78b5,https://nikki-ricks.medium.com/how-to-use-git-add-commit-and-push-in-vs-code-and-command-line-35c0e8c47b62,multiple
+510f64c8-9bcc-4be1-8d30-638705850618,https://www.geeksforgeeks.org/how-to-start-vs-code-from-the-terminal-command-line/,multiple
+9ff484f7-5c09-4398-ae29-d5904e59e138,https://stackoverflow.com/questions/38606973/playing-opening-and-pausing-vlc-command-line-executed-from-python-scripts,multiple
+d9b7c649-c975-4f53-88f5-940b29c47247,https://marketplace.uipath.com/listings/extract-the-first-1000-gmail-emails-from-the-current-month-in-a-new-google-sheets-report,multiple
+be4ef0dc-0f70-4936-81d8-3cd2b04482f8,https://marketplace.uipath.com/listings/table-data-extraction-for-sales-opportunities-to-excel-workbook,multiple
+78aed49a-a710-4321-a793-b611a7c5b56b,https://marketplace.uipath.com/listings/upload-email-attachments-from-gmail-to-google-drive,multiple
+897e3b53-5d4d-444b-85cb-2cdc8a97d903,https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive,multiple
+4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc,https://marketplace.uipath.com/listings/extract-data-from-a-new-invoice-file-in-google-drive-and-store-it-in-google-sheets4473,multiple
+b52b40a5-ad70-4c53-b5b0-5650a8387052,https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive,multiple
+46407397-a7d5-4c6b-92c6-dbe038b1457b,https://marketplace.uipath.com/listings/upload-to-google-drive-images-from-pdf-attachments-received-via-gmail,multiple
+a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb,https://marketplace.uipath.com/listings/backup-important-emails-to-onedrive-or-sharepoint,multiple
+665f4af1-617d-4009-baff-84ff66071e6a,https://www.howtogeek.com/663927/how-to-open-google-chrome-using-command-prompt-on-windows-10/#open-chrome-straight-to-a-specific-website,multiple
+e6313b30-3903-4ed9-8c7d-4c47bf51fc96,https://stackoverflow.com/questions/12258086/how-do-i-run-google-chrome-as-root,multiple
\ No newline at end of file
diff --git a/resouce_collection/Source2Doc/get_Source_Doc.py b/resouce_collection/Source2Doc/get_Source_Doc.py
index 959e4cf..5f5e207 100644
--- a/resouce_collection/Source2Doc/get_Source_Doc.py
+++ b/resouce_collection/Source2Doc/get_Source_Doc.py
@@ -1,6 +1,6 @@
 import csv
 import os
-import yt_dlp as youtube_dl
+import yt_dlp
 from docx import Document
 import requests
 from bs4 import BeautifulSoup
@@ -9,81 +9,226 @@ import pytesseract
 from io import BytesIO
 from docx import Document
 import re
+import markdownify
+from markdownify import markdownify as md
 
-# convert .vtt file to .docx file
-def vtt_to_docx(vtt_filepath, docx_filepath):
-    doc = Document()
-    
-    # open .vtt file
-    with open(vtt_filepath, 'r', encoding='utf-8') as file:
-        lines = file.readlines()
-    
-    # apply regex to each line to check if it is a timecode
-    vtt_text_pattern = re.compile(r'^\d{2}:\d{2}:\d{2}.\d{3} --> \d{2}:\d{2}:\d{2}.\d{3}')
-    
-    # deal with each line
-    for line in lines:
-        # if it is a timecode, skip it
-        if vtt_text_pattern.match(line) or 'WEBVTT' in line:
-            continue
-        # else, add it to the document
-        if line.strip(): 
-            doc.add_paragraph(line.strip())
+def valid_xml_char_ordinal(c):
+    codepoint = ord(c)
+    # conditions ordered by presumed frequency
+    return (
+        0x20 <= codepoint <= 0xD7FF or
+        codepoint in (0x9, 0xA, 0xD) or
+        0xE000 <= codepoint <= 0xFFFD or
+        0x10000 <= codepoint <= 0x10FFFF
+        )
 
-    doc.save(docx_filepath)
-
-
-# download youtube subtitles and convert them to .docx file
-def download_youtube_subtitles(video_url, doc_filename):
+def download_and_clean_youtube_subtitles(video_url, txt_filepath):
+    # set up youtube-dl options to download the subtitles
+    subtitles_path = txt_filepath[0:-4]
     ydl_opts = {
         'skip_download': True,
-        'writeautomaticsub': True,
-        'subtitleslangs': ['en'],
-        'outtmpl': f'{doc_filename}.%(ext)s',
+        'writesubtitles': True,
+        'writeautomaticsub': True, # if no subtitles are available, try to generate them
+        'subtitleslangs': ['en'], 
+        'outtmpl': f'{subtitles_path}.%(ext)s', 
         'quiet': True,
     }
-    
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        ydl.download([video_url])
-    
-    # call vtt_to_docx function to convert .vtt file to .docx file
-    vtt_to_docx(f'/content/{doc_filename}.en.vtt', f'/content/{doc_filename}.docx')
 
-# scrape and OCR a forum
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        # download the subtitles
+        ydl.download([video_url])
+        subtitle_file = f'{subtitles_path}.en.vtt'
+
+        # read the subtitle file
+        subtitles = []
+        try:
+            with open(subtitle_file, 'r', encoding='utf-8') as file:                
+                lines = file.readlines()
+
+            # define a pattern to match the time line
+            pattern = re.compile(r'(\d{2}:\d{2}:\d{2}.\d{3} --> \d{2}:\d{2}:\d{2}.\d{3})|(^WEBVTT)|(^Kind: captions)|(^Language: .*)')
+
+            # clean the subtitles
+            for line in lines:
+                # if this line is a time line or it is blank , skip it
+                if pattern.match(line) or line.strip() == '':
+                    continue
+                # add this subtitle line to subtitles list, remove the trailing spaces and line change
+                subtitles.append(line.strip())
+
+            # remove duplicated subtitles
+            subtitles = list(dict.fromkeys(subtitles))
+
+            # save the subtitles as a txt file
+            with open(txt_filepath, 'w', encoding='utf-8') as f:
+                for line in subtitles:
+                    if line: 
+                        f.write(line + '\n')
+
+        except IOError:
+            print(f"Could not read file: {subtitle_file}")
+
+# scrape a webpage and perform OCR on images
 def scrape_and_ocr_forum(url, doc):
     response = requests.get(url)
     soup = BeautifulSoup(response.content, 'html.parser')
-    
+
     text_elements = soup.find_all(['h1', 'h2', 'h3', 'p', 'li'])
     for element in text_elements:
         doc.add_paragraph(element.get_text())
-    
+
     image_elements = soup.find_all('img')
     for image in image_elements:
+        if 'src' not in image.attrs:
+            continue
         image_url = image['src']
         if image_url.startswith('http'):
+            if not image_url.endswith('.svg') and not image_url.endswith('.png'):
+                continue
+            if 'neveragain.allstatics.com/2019/assets/icon/logo' in image_url:
+                continue
             img_response = requests.get(image_url, stream=True)
             img = Image.open(BytesIO(img_response.content))
             ocr_text = pytesseract.image_to_string(img)
-            if not ocr_text:
-              doc.add_paragraph(ocr_text)
 
-# process a url
-def process_url(url, doc_id):
-    doc_filepath = f"{doc_id}.docx"
+            if ocr_text != ' ' and ocr_text != '':
+              cleaned_string = ''.join(c for c in ocr_text if valid_xml_char_ordinal(c))
+              doc.add_paragraph(cleaned_string)
+
+def superuser_to_markdown(url, doc_filepath):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+
+    # set up the markdown document
+    markdown_content = ""
+
+    # get the question title and body
+    question_title = soup.find('h1').get_text(strip=True)
+    question = soup.find('div', {'id': 'question'})
+    if question:
+        question_body = question.find('div', {'class': 's-prose js-post-body'}).prettify()
+        markdown_content += f"# {question_title}\n\n" + markdownify.markdownify(question_body, heading_style="ATX") + "\n\n"
+
+    # get all answers
+    answers = soup.find_all('div', {'class': 'answer'})
+    for answer in answers:
+        answer_body = answer.find('div', {'class': 's-prose js-post-body'}).prettify()
+        markdown_content += markdownify.markdownify(answer_body, heading_style="ATX") + "\n\n"
+
+    # deal with images and perform OCR
+    all_img_tags = question.find_all('img') + [img for answer in answers for img in answer.find_all('img')]
+    for img_tag in all_img_tags:
+        image_src = img_tag.get('src') or img_tag.get('data-src')  # Superuser uses lazy loading
+        if image_src and image_src.startswith('http'):
+            img_response = requests.get(image_src, stream=True)
+            img = Image.open(BytesIO(img_response.content))
+            ocr_text = pytesseract.image_to_string(img)
+            if ocr_text.strip():  # if the OCR result is not empty, add it to the markdown content
+                markdown_content += "```\n" + ocr_text.strip() + "\n```\n\n"
+
+    with open(doc_filepath, 'w', encoding='utf-8') as f:
+        f.write(markdown_content)
+
+
+def stack_overflow_to_markdown(url, doc_filepath):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+
+    # set up the markdown document
+    markdown_content = ""
+
+    # get the question title and body
+    question = soup.find('div', {'id': 'question'})
+
+    question_title = soup.find('h1').get_text(strip=True)
+    if question:
+
+        question_body = question.find('div', {'class': 's-prose js-post-body'}).prettify()
+        markdown_content += f"# {question_title}\n\n" + markdownify.markdownify(question_body, heading_style="ATX") + "\n\n"
+
+    # get all answers
+    answers = soup.find_all('div', {'class': 'answer'})
+    for answer in answers:
+        answer_body = answer.find('div', {'class': 's-prose js-post-body'}).prettify()
+        markdown_content += markdownify.markdownify(answer_body, heading_style="ATX") + "\n\n"
+
+    # deal with images and perform OCR
+    all_img_tags = soup.find_all('img')
+    for img_tag in all_img_tags:
+        image_url = img_tag['src']
+        if image_url.startswith('http') and (image_url.endswith('.svg') or image_url.endswith('.png')):  # 确保图片URL有效
+            img_response = requests.get(image_url, stream=True)
+            img = Image.open(BytesIO(img_response.content))
+            ocr_text = pytesseract.image_to_string(img)
+            if ocr_text.strip():
+                markdown_content += "```\n" + ocr_text.strip() + "\n```\n\n"
+
+    with open(doc_filepath, 'w', encoding='utf-8') as f:
+        f.write(markdown_content)
+
+def scrape_webpage_to_markdown(url, doc_filepath):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+
+    articles = soup.find_all('article') or soup.find_all('main') or soup.find_all('div', {'class': 'lia-message-body-content'})
+
+    if not articles:
+        return
+
+    markdown_content = ''
+
+    # scrape the webpage and perform OCR on images
+    for article in articles:
+      for child in article.recursiveChildGenerator():
+          # if this is an image, perform OCR
+          if child.name == 'img':
+              img_url = child.get('src')
+              if not img_url.startswith(('http:', 'https:')):
+                  img_url = '{}{}'.format(url, img_url)
+              if not img_url.endswith('.svg') and not img_url.endswith('.png'):
+                  continue
+              if 'neveragain.allstatics.com/2019/assets/icon/logo' in img_url:
+                  continue
+              img_response = requests.get(img_url, stream=True)
+              img = Image.open(BytesIO(img_response.content))
+              ocr_text = pytesseract.image_to_string(img)
+              if ocr_text.strip():
+                  markdown_content += '\n```plaintext\n{}\n```\n'.format(ocr_text.strip())
+              continue
+          # Not an image, so continue recursively calling function
+          if child.name is None:
+              continue
+          
+          html_str = str(child)
+          markdown_content += md(html_str) + '\n\n'
+
+    with open(doc_filepath, 'w', encoding='utf-8') as f:
+        f.write(markdown_content)
+
+
+# process a URL and save the file
+def process_url(url, doc_id, app):
+    doc_filepath = f"/content/drive/MyDrive/SourceDoc/{doc_id}_{app}.md"
+    txt_filepath = f"/content/drive/MyDrive/SourceDoc/{doc_id}_{app}.txt"
     doc = Document()
-    
-    if 'youtube.com' in url or 'youtu.be' in url:
-        download_youtube_subtitles(url, doc_id)
-    else:
-        scrape_and_ocr_forum(url, doc)
-    
-    doc.save(doc_filepath)
 
-# read csv file and process each row
-csv_filepath = './Get_Source_Doc - Sheet1.csv' 
+    if 'youtube.com' in url or 'youtu.be' in url:
+        download_and_clean_youtube_subtitles(url, txt_filepath)
+    elif 'superuser.com' in url:
+        superuser_to_markdown(url, doc_filepath)
+    elif 'stackoverflow.com' in url:
+        stack_overflow_to_markdown(url, doc_filepath)
+    else:
+        scrape_webpage_to_markdown(url, doc_filepath)
+
+# read the CSV file and process each URL
+csv_filepath = './Get_Source_Doc - Sheet1.csv'
 with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile:
     reader = csv.DictReader(csvfile)
+    cnt = 55
     for row in reader:
-        process_url(row['Source'], row['id'])
+        if cnt>0:
+            cnt -= 1
+            continue
+        process_url(row['Source'], row['id'], row['InvolvedApp'])
         print(row)
\ No newline at end of file