From ab71ebb2ba6e6e0165f399166da6129e381693dc Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Thu, 4 Jan 2024 17:05:17 +0800
Subject: [PATCH 1/4] Initialize VLC getters and metrics, fix some bugs in
 infra logic, needs to be refactored later on

---
 desktop_env/controllers/python.py             | 19 +++++-
 desktop_env/controllers/setup.py              | 66 ++++++++++++-------
 desktop_env/envs/desktop_env.py               |  3 +-
 desktop_env/evaluators/getters/__init__.py    |  1 +
 desktop_env/evaluators/getters/file.py        |  5 +-
 desktop_env/evaluators/metrics/__init__.py    |  3 +-
 .../59f21cfb-0120-4326-b255-a5b827b38967.json | 36 +++++++++-
 7 files changed, 101 insertions(+), 32 deletions(-)

diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py
index 2b0dfb7..5469800 100644
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -80,7 +80,8 @@ class PythonController:
                 y = parameters["y"]
                 if "num_clicks" in parameters:
                     num_clicks = parameters["num_clicks"]
-                    self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})")
+                    self.execute_python_command(
+                        f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})")
                 else:
                     self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y})")
             elif "button" in parameters and "x" not in parameters and "y" not in parameters:
@@ -143,7 +144,8 @@ class PythonController:
             if "x" in parameters and "y" in parameters:
                 x = parameters["x"]
                 y = parameters["y"]
-                self.execute_python_command(f"pyautogui.dragTo({x}, {y}, duration=1.0, button='left', mouseDownUp=True)")
+                self.execute_python_command(
+                    f"pyautogui.dragTo({x}, {y}, duration=1.0, button='left', mouseDownUp=True)")
 
         elif action_type == "SCROLL":
             # todo: check if it is related to the operating system, as https://github.com/TheDuckAI/DuckTrack/blob/main/ducktrack/playback.py pointed out
@@ -206,3 +208,16 @@ class PythonController:
 
         else:
             raise Exception(f"Unknown action type: {action_type}")
+
+
+    def get_vlc_status(self, host='localhost', port=8080, password='password'):
+        url = f'http://{host}:{port}/requests/status.xml'
+
+        response = requests.get(url, auth=('', password))
+
+        if response.status_code == 200:
+            print("File downloaded successfully")
+            return response.content
+        else:
+            print("Failed to get vlc status. Status code:", response.status_code)
+            return None
diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py
index e28287b..2320e99 100644
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -10,12 +10,11 @@ from typing import Any
 
 
 class SetupController:
-    def __init__( self
-                , http_server: str
-                , cache_dir: str
-                ):
-        self.http_server = http_server + "/setup"
+    def __init__(self, http_server: str, cache_dir: str):
+        self.http_server: str = http_server
+        self.http_server_setup_root = http_server + "/setup"
         self.cache_dir: str = cache_dir
+
     def reset_cache_dir(self, cache_dir: str):
         self.cache_dir = cache_dir
 
@@ -48,6 +47,32 @@ class SetupController:
         # self._open_setup(config)
         # can add other setup steps
 
+    def _command_setup(self, command: str):
+        """
+        Directly send a command into the virtual machine os for setting up.
+        """
+        payload = json.dumps({"command": command})
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        timeout = 5
+        timout_whitelist = ["vlc"]
+
+        try:
+
+            response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
+            if response.status_code == 200:
+                print("Command executed successfully:", response.text)
+            else:
+                print("Failed to execute command. Status code:", response.status_code)
+        except requests.exceptions.Timeout as e:
+            if command in timout_whitelist:
+                print("Command executed successfully:", command)
+            else:
+                print("An error occurred while trying to execute the command:", e)
+        except requests.exceptions.RequestException as e:
+            print("An error occurred while trying to execute the command:", e)
+
     def _download_setup(self, files: List[Dict[str, str]]):
         """
         Args:
@@ -66,12 +91,9 @@ class SetupController:
         for f in files:
             url: str = f["url"]
             path: str = f["path"]
-            cache_path: str = os.path.join( self.cache_dir
-                                          , "{:}_{:}".format(
-                                              uuid.uuid5(uuid.NAMESPACE_URL, url)
-                                            , os.path.basename(path)
-                                            )
-                                          )
+            cache_path: str = os.path.join(self.cache_dir, "{:}_{:}".format(
+                uuid.uuid5(uuid.NAMESPACE_URL, url),
+                os.path.basename(path)))
 
             if not url or not path:
                 raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).")
@@ -97,21 +119,21 @@ class SetupController:
                 if not downloaded:
                     raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
 
-            #payload = json.dumps({"url": url, "path": path})
-            #headers = {
-                #'Content-Type': 'application/json'
-            #}
+            # payload = json.dumps({"url": url, "path": path})
+            # headers = {
+            # 'Content-Type': 'application/json'
+            # }
 
-            form = MultipartEncoder( { "file_path": path
-                                     , "file_data": (os.path.basename(path), open(cache_path, "rb"))
-                                     }
-                                   )
+            form = MultipartEncoder({
+                "file_path": path,
+                "file_data": (os.path.basename(path), open(cache_path, "rb"))
+            })
             headers = {"Content-Type": form.content_type}
             print(form.content_type)
 
             # send request to server to upload file
             try:
-                response = requests.post(self.http_server + "/upload", headers=headers, data=form)
+                response = requests.post(self.http_server_setup_root + "/upload", headers=headers, data=form)
                 if response.status_code == 200:
                     print("Command executed successfully:", response.text)
                 else:
@@ -136,7 +158,7 @@ class SetupController:
 
         # send request to server to change wallpaper
         try:
-            response = requests.post(self.http_server + "/change_wallpaper", headers=headers, data=payload)
+            response = requests.post(self.http_server_setup_root + "/change_wallpaper", headers=headers, data=payload)
             if response.status_code == 200:
                 print("Command executed successfully:", response.text)
             else:
@@ -163,7 +185,7 @@ class SetupController:
 
         # send request to server to open file
         try:
-            response = requests.post(self.http_server + "/open_file", headers=headers, data=payload)
+            response = requests.post(self.http_server_setup_root + "/open_file", headers=headers, data=payload)
             if response.status_code == 200:
                 print("Command executed successfully:", response.text)
             else:
diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index 03086ff..c953f3c 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -85,7 +85,8 @@ class DesktopEnv(gym.Env):
         # Initialize emulator and controller
         print("Initializing...")
         self._start_emulator()
-        self.host = f"http://{self._get_vm_ip()}:5000"
+        self.vm_ip = self._get_vm_ip()
+        self.host = f"http://{self.vm_ip}:5000"
         self.controller = PythonController(http_server=self.host)
         self.setup_controller = SetupController(http_server=self.host, cache_dir=self.cache_dir)
 
diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py
index 81a23fd..770c30c 100644
--- a/desktop_env/evaluators/getters/__init__.py
+++ b/desktop_env/evaluators/getters/__init__.py
@@ -1,2 +1,3 @@
 from .file import get_cloud_file, get_vm_file
 from .misc import get_rule
+from .vlc import get_vlc_playing_info
diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py
index a9be430..25fd081 100644
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -3,6 +3,7 @@ from typing import Dict
 import os
 import requests
 
+
 def get_cloud_file(env, config: Dict[str, str]) -> str:
     """
     Config:
@@ -25,6 +26,7 @@ def get_cloud_file(env, config: Dict[str, str]) -> str:
 
     return _path
 
+
 def get_vm_file(env, config: Dict[str, str]) -> str:
     """
     Config:
@@ -33,12 +35,9 @@ def get_vm_file(env, config: Dict[str, str]) -> str:
     """
 
     _path = os.path.join(env.cache_dir, config["dest"])
-    if os.path.exists(_path):
-        return _path
 
     file = env.controller.get_file(config["path"])
     with open(_path, "wb") as f:
         f.write(file)
 
     return _path
-
diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 498df17..80a26cf 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -2,4 +2,5 @@ from .table import compare_table
 from .table import check_sheet_list, check_xlsx_freeze, check_zoom
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, compare_insert_equation
 from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
-from .docs import is_first_line_centered, check_file_exists, compare_contains_image
\ No newline at end of file
+from .docs import is_first_line_centered, check_file_exists, compare_contains_image
+from .vlc import is_vlc_playing
diff --git a/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json b/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
index 512427e..fabd42b 100644
--- a/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
+++ b/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
@@ -1,12 +1,42 @@
 {
   "id": "59f21cfb-0120-4326-b255-a5b827b38967",
   "snapshot": "base_setup",
-  "instruction": "Could you help me play the file at FILE_PATH?",
+  "instruction": "Play the music video on my desktop",
   "source": "https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/media.html#playing-a-file",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=14-vhVMVw53e0l-MDVBFbngFAE1jMqvgm&export=download&authuser=0&confirm=t&uuid=d31607ed-0075-4fe5-b68c-b24b6eec356e&at=APZUnTV0Wy0672VFGrQChgHmd1Ba:1704337791613",
+            "path": "Desktop/Rick Astley - Never Gonna Give You Up (Official Music Video).mp4"
+          }
+        ]
+      }
+    },
+    {
+      "type": "command",
+      "parameters": {
+        "command": "vlc"
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
     "vlc"
   ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
+    "func": "is_vlc_playing",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "file_path": "Desktop/Rick Astley - Never Gonna Give You Up (Official Music Video).mp4"
+      }
+    },
+    "result": {
+      "type": "vlc_playing_info",
+      "dest": "status.xml"
+    }
+  }
 }

From 2401513c1961ec7c976c3eae28347081d028012c Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Thu, 4 Jan 2024 17:55:07 +0800
Subject: [PATCH 2/4] Initialize VLC getters and metrics, fix some bugs in
 infra logic, needs to be refactored later on

---
 desktop_env/evaluators/getters/vlc.py  | 20 +++++++++++++++
 desktop_env/evaluators/metrics/gimp.py | 22 ++++++++++++++++
 desktop_env/evaluators/metrics/vlc.py  | 35 ++++++++++++--------------
 3 files changed, 58 insertions(+), 19 deletions(-)
 create mode 100644 desktop_env/evaluators/getters/vlc.py
 create mode 100644 desktop_env/evaluators/metrics/gimp.py

diff --git a/desktop_env/evaluators/getters/vlc.py b/desktop_env/evaluators/getters/vlc.py
new file mode 100644
index 0000000..e00ce61
--- /dev/null
+++ b/desktop_env/evaluators/getters/vlc.py
@@ -0,0 +1,20 @@
+import os
+from typing import Dict
+
+
+def get_vlc_playing_info(env, config: Dict[str, str]):
+    """
+    Gets the current playing information from VLC's HTTP interface.
+    """
+    _path = os.path.join(env.cache_dir, config["dest"])
+
+    host = env.vm_ip
+    port = 8080
+    password = 'password'
+
+    content = env.controller.get_vlc_status(host, port, password)
+    print("content: ", content)
+    with open(_path, "wb") as f:
+        f.write(content)
+
+    return _path
diff --git a/desktop_env/evaluators/metrics/gimp.py b/desktop_env/evaluators/metrics/gimp.py
new file mode 100644
index 0000000..fbc328c
--- /dev/null
+++ b/desktop_env/evaluators/metrics/gimp.py
@@ -0,0 +1,22 @@
+import os
+
+
+def get_gimp_export_path():
+    # Path to GIMP's configuration file. This example assumes GIMP version 2.10.
+    # You need to adjust the path according to the GIMP version and user's file system.
+    gimp_config_file = os.path.expanduser("~/.config/GIMP/2.10/gimprc")
+
+    try:
+        # Open and read the configuration file
+        with open(gimp_config_file, 'r') as file:
+            for line in file:
+                # Search for the default export path setting
+                if "default-export-path" in line:
+                    # Extract the current path from the line (assuming it's enclosed in quotes)
+                    current_path = line.split('"')[1]
+                    # Compare the current path with the expected path
+                    return current_path
+    except FileNotFoundError:
+        # Handle the case where the configuration file is not found
+        print("GIMP configuration file not found")
+        return False
diff --git a/desktop_env/evaluators/metrics/vlc.py b/desktop_env/evaluators/metrics/vlc.py
index d3083c2..b5272eb 100644
--- a/desktop_env/evaluators/metrics/vlc.py
+++ b/desktop_env/evaluators/metrics/vlc.py
@@ -1,11 +1,12 @@
 import os
 import platform
-import requests
 from xml.etree import ElementTree
 import pygetwindow as gw
 import pyautogui
+from typing import Dict
 
-def read_vlc_config(setting_name):
+
+def get_vlc_config(setting_name):
     """
     Reads the VLC configuration file to check for a specific setting.
 
@@ -38,24 +39,22 @@ def read_vlc_config(setting_name):
     return None
 
 
-def get_vlc_playing_info(host='localhost', port=8080, password='password'):
+def is_vlc_playing(actual: str, rule: Dict[str, str]) -> float:
     """
-    Gets the current playing information from VLC's HTTP interface.
+    Checks if VLC is currently playing a file.
     """
-    url = f'http://{host}:{port}/requests/status.xml'
-    try:
-        response = requests.get(url, auth=('', password))
-        if response.status_code == 200:
-            tree = ElementTree.fromstring(response.content)
-            status = tree.find('state').text
-            if status == 'playing':
-                file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
-                return status, file_info
-            return status, None
-    except Exception as e:
-        print(f"Error: {e}")
+    with open(actual, 'rb') as file:
+        actual_status = file.read().decode('utf-8')
 
-    return None, None
+    tree = ElementTree.fromstring(actual_status)
+    status = tree.find('state').text
+    if status == 'playing':
+        file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
+        print("file_info: ", file_info)
+        if file_info:
+            return 1 if file_info.endswith(rule['expected']) else 0
+    else:
+        return 0
 
 
 def is_vlc_fullscreen():
@@ -83,5 +82,3 @@ def is_vlc_fullscreen():
     except Exception as e:
         print(f"An error occurred: {e}")
         return False
-
-

From 3cbb57f24c4a041dd1c29c975abefe7ef51f5daf Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Fri, 5 Jan 2024 11:00:31 +0800
Subject: [PATCH 3/4] Add the GUI set-of-mark object detector data collection
 script

---
 mm_agents/gui_som/__init__.py                 |   0
 mm_agents/gui_som/data_preparation/README.md  |   8 +
 .../gui_som/data_preparation/__init__.py      |   0
 .../data_preparation/get_tag_elem_dict.js     | 158 ++++++++++++++++++
 .../majestic_million_download.py              |  39 +++++
 .../data_preparation/scrape_crawler.py        | 119 +++++++++++++
 6 files changed, 324 insertions(+)
 create mode 100644 mm_agents/gui_som/__init__.py
 create mode 100644 mm_agents/gui_som/data_preparation/README.md
 create mode 100644 mm_agents/gui_som/data_preparation/__init__.py
 create mode 100644 mm_agents/gui_som/data_preparation/get_tag_elem_dict.js
 create mode 100644 mm_agents/gui_som/data_preparation/majestic_million_download.py
 create mode 100644 mm_agents/gui_som/data_preparation/scrape_crawler.py

diff --git a/mm_agents/gui_som/__init__.py b/mm_agents/gui_som/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mm_agents/gui_som/data_preparation/README.md b/mm_agents/gui_som/data_preparation/README.md
new file mode 100644
index 0000000..cf95798
--- /dev/null
+++ b/mm_agents/gui_som/data_preparation/README.md
@@ -0,0 +1,8 @@
+1. Get the URLs from majestic_million and save them to `majestic_million.csv`
+```bash
+python3 majestic_million.py
+```
+2. Run scrapy spider to get the data from the URLs
+```bash
+python scrapy_crawler.py
+```
\ No newline at end of file
diff --git a/mm_agents/gui_som/data_preparation/__init__.py b/mm_agents/gui_som/data_preparation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mm_agents/gui_som/data_preparation/get_tag_elem_dict.js b/mm_agents/gui_som/data_preparation/get_tag_elem_dict.js
new file mode 100644
index 0000000..f838ada
--- /dev/null
+++ b/mm_agents/gui_som/data_preparation/get_tag_elem_dict.js
@@ -0,0 +1,158 @@
+(() => {
+    let labels = [];
+    let selector_id_table = {};
+    var generateQuerySelector = function (el) {
+        function cssEscape(value) {
+            if (!value) return '';
+            // Escape all CSS special characters, including the colon.
+            return value.replace(/([!"#$%&'()*+,./:;<=>?@[\]^`{|}~])/g, '\\$&');
+        }
+
+        function getChildIndex(el) {
+            var siblings = Array.from(el.parentNode.children);
+            var sameTagSiblings = siblings.filter(sibling => sibling.tagName === el.tagName);
+            return sameTagSiblings.indexOf(el);
+        }
+
+        if (el.tagName.toLowerCase() === "html") {
+            return "HTML";
+        }
+
+        var str = el.tagName;
+        var idPresent = false; // Add a flag to check if an ID is present
+
+        if (el.id !== "") {
+            str += "#" + cssEscape(el.id);
+            idPresent = true; // Set the flag to true if there's an ID
+        }
+
+        if (el.className) {
+            var classes = el.className.split(/\s+/).filter(Boolean); // Filter out empty strings
+            for (var i = 0; i < classes.length; i++) {
+                str += "." + cssEscape(classes[i]);
+            }
+        }
+
+        // Only add :nth-of-type() if no ID is present
+        if (!idPresent) {
+            str += ":nth-of-type(" + (getChildIndex(el) + 1) + ")";
+        }
+
+        // Use '>' combinator if parent is not 'HTML'
+        var parentSelector = generateQuerySelector(el.parentNode);
+        return parentSelector === "HTML" ? str : parentSelector + " > " + str;
+    }
+
+
+    function unmarkPage() {
+        for (const label of labels) {
+            document.body.removeChild(label);
+        }
+        labels = [];
+    }
+
+    // Expose the unmarkPage function globally
+    window.unmarkPage = unmarkPage;
+
+    function markPage() {
+        unmarkPage();
+
+        var bodyRect = document.body.getBoundingClientRect();
+
+        var items = Array.prototype.slice.call(
+            document.querySelectorAll('*')
+        ).map(function (element) {
+            var vw = Math.max(document.documentElement.clientWidth || 0, window.innerWidth || 0);
+            var vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
+
+            var rects = [...element.getClientRects()].filter(bb => {
+                var center_x = bb.left + bb.width / 2;
+                var center_y = bb.top + bb.height / 2;
+                var elAtCenter = document.elementFromPoint(center_x, center_y);
+
+                return elAtCenter === element || element.contains(elAtCenter)
+            }).map(bb => {
+                const rect = {
+                    left: Math.max(0, bb.left),
+                    top: Math.max(0, bb.top),
+                    right: Math.min(vw, bb.right),
+                    bottom: Math.min(vh, bb.bottom)
+                };
+                return {
+                    ...rect,
+                    width: rect.right - rect.left,
+                    height: rect.bottom - rect.top
+                }
+            });
+
+            var area = rects.reduce((acc, rect) => acc + rect.width * rect.height, 0);
+
+            return {
+                element: element,
+                include:
+                    (element.tagName === "INPUT" || element.tagName === "TEXTAREA" || element.tagName === "SELECT") ||
+                    (element.tagName === "BUTTON" || element.tagName === "A" || (element.onclick != null) || window.getComputedStyle(element).cursor == "pointer") ||
+                    (element.tagName === "IFRAME" || element.tagName === "VIDEO")
+                ,
+                area,
+                rects,
+                text: element.textContent.trim().replace(/\s{2,}/g, ' ')
+            };
+        }).filter(item =>
+            item.include && (item.area >= 20)
+        );
+
+        // Only keep inner clickable items
+        items = items.filter(x => !items.some(y => x.element.contains(y.element) && !(x == y)))
+
+        // Function to generate random colors
+        function getRandomColor() {
+            var letters = '0123456789ABCDEF';
+            var color = '#';
+            for (var i = 0; i < 6; i++) {
+                color += letters[Math.floor(Math.random() * 16)];
+            }
+            return color;
+        }
+
+        // Lets create a floating border on top of these elements that will always be visible
+        items.forEach(function (item, index) {
+            selector_id_table[index.toString()] = item.rects;
+            item.rects.forEach((bbox) => {
+                newElement = document.createElement("div");
+                var borderColor = getRandomColor();
+                newElement.style.outline = `2px dashed ${borderColor}`;
+                newElement.style.position = "fixed";
+                newElement.style.left = bbox.left + "px";
+                newElement.style.top = bbox.top + "px";
+                newElement.style.width = bbox.width + "px";
+                newElement.style.height = bbox.height + "px";
+                newElement.style.pointerEvents = "none";
+                newElement.style.boxSizing = "border-box";
+                newElement.style.zIndex = 2147483647;
+                // newElement.style.background = `${borderColor}80`;
+
+                // Add floating label at the corner
+                var label = document.createElement("span");
+                label.textContent = index;
+                label.style.position = "absolute";
+                label.style.top = "-19px";
+                label.style.left = "0px";
+                label.style.background = borderColor;
+                label.style.color = "white";
+                label.style.padding = "2px 4px";
+                label.style.fontSize = "12px";
+                label.style.borderRadius = "2px";
+                newElement.appendChild(label);
+
+                document.body.appendChild(newElement);
+                labels.push(newElement);
+                // item.element.setAttribute("-ai-label", label.textContent);
+            });
+        })
+        return selector_id_table;
+    }
+
+    return markPage();
+})()
+
diff --git a/mm_agents/gui_som/data_preparation/majestic_million_download.py b/mm_agents/gui_som/data_preparation/majestic_million_download.py
new file mode 100644
index 0000000..b76d934
--- /dev/null
+++ b/mm_agents/gui_som/data_preparation/majestic_million_download.py
@@ -0,0 +1,39 @@
+import csv
+
+import requests
+
+
+# Latest run on 2024.1.4
+def download_csv(url, file_path):
+    response = requests.get(url)
+    with open(file_path, 'w', newline='', encoding='utf-8') as file:
+        file.write(response.text)
+
+
+def read_csv(file_path):
+    urls = []
+    with open(file_path, newline='', encoding='utf-8') as csvfile:
+        reader = csv.reader(csvfile)
+        next(reader, None)  # Skip the header
+        for row in reader:
+            urls.append(row[2])  # Assuming the URL is in the third column
+    return urls
+
+
+def main():
+    url = 'http://downloads.majestic.com/majestic_million.csv'
+    file_path = 'majestic_million.csv'
+
+    print("Downloading Majestic Million CSV...")
+    download_csv(url, file_path)
+
+    print("Reading URLs from CSV...")
+    urls = read_csv(file_path)
+
+    # Print the first 10 URLs as a sample
+    for url in urls[:10]:
+        print(url)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mm_agents/gui_som/data_preparation/scrape_crawler.py b/mm_agents/gui_som/data_preparation/scrape_crawler.py
new file mode 100644
index 0000000..4ba93c5
--- /dev/null
+++ b/mm_agents/gui_som/data_preparation/scrape_crawler.py
@@ -0,0 +1,119 @@
+import json
+import os
+import random
+import time
+import uuid
+from multiprocessing import Pool
+
+from playwright.sync_api import sync_playwright
+
+from majestic_million_download import read_csv
+
+# JavaScript code as a string
+with open('get_tag_elem_dict.js', 'r') as f:
+    get_tag_elem_dict_js_code = f.read()
+
+
+def scrape_data(website_url, action_depth=10):
+    # if file exists, skip
+    if os.path.exists(os.path.join('collected_data', website_url.split("//")[1])):
+        print("Data already exists, skipping...")
+        return
+
+    def click_random_link(page):
+        links = page.query_selector_all("a")
+        if links:
+            random_link = random.choice(links)
+            try:
+                page.evaluate("window.unmarkPage()")
+
+                # Capture the initial HTML content of the body
+                initial_content = page.inner_html("body")
+
+                # Click the link and wait for potential navigation
+                random_link.click()
+                page.wait_for_timeout(5000)  # wait for 5 seconds to allow page changes to occur
+
+                # Capture the new HTML content of the body
+                new_content = page.inner_html("body")
+
+                # Compare the contents
+                if new_content != initial_content:
+                    print("Content change detected.")
+                    return True
+                else:
+                    print("No content change detected.")
+                    return False
+
+            except Exception as e:
+                print("Error occurred:", e)
+                return False
+        else:
+            print("No links found on the page.")
+            return False
+
+        return False
+
+    with sync_playwright() as p:
+        # Launch the browser
+        browser = p.chromium.launch()
+        context = browser.new_context(viewport={'width': 1920, 'height': 1080}, locale='en-US')
+        context.set_extra_http_headers({'Accept-Language': 'en-US'})
+        page = context.new_page()
+
+        # Navigate to Google
+        page.goto(website_url, timeout=60000, wait_until='networkidle')
+
+        data_id = str(uuid.uuid4())
+        data_dir = os.path.join('collected_data', website_url.split("//")[1], data_id)
+        os.makedirs(data_dir, exist_ok=True)
+        page.screenshot(path=os.path.join(data_dir, 'screenshot_0.png'))
+        tag_elem_dict = page.evaluate(get_tag_elem_dict_js_code)
+        with open(os.path.join(data_dir, 'meta_data_0.json'), 'w') as f:
+            json.dump({
+                'timestamp': time.time(),
+                'url': website_url,
+                'data_id': data_id,
+                'tag_elem_dict': tag_elem_dict
+            }, f, indent=4)
+        page.screenshot(path=os.path.join(data_dir, 'screenshot_som_0.png'))
+
+        for i in range(action_depth):
+            if not click_random_link(page):
+                print("Invalid click or no navigation, stopping random clicks.")
+                break
+            page.screenshot(path=os.path.join(data_dir, f'screenshot_{i + 1}.png'))
+            tag_elem_dict = page.evaluate(get_tag_elem_dict_js_code)
+            with open(os.path.join(data_dir, f'meta_data_{i + 1}.json'), 'w') as f:
+                json.dump({
+                    'timestamp': time.time(),
+                    'url': website_url,
+                    'data_id': data_id,
+                    'tag_elem_dict': tag_elem_dict
+                }, f, indent=4)
+            page.screenshot(path=os.path.join(data_dir, f'screenshot_som_{i + 1}.png'))
+
+        # Close the browser
+        browser.close()
+
+
+def run_one(url):
+    try:
+        scrape_data("https://" + url, action_depth=5)
+    except Exception as e:
+        print("Error scraping data:", e)
+        print("Start next one...")
+
+
+def main():
+    urls = read_csv("majestic_million.csv")[:20000]
+
+    # Number of processes
+    num_processes = 50  # Adjust based on your system's capability, on my i9-13900k, 50 processes can be used
+
+    with Pool(num_processes) as pool:
+        pool.map(run_one, urls)
+
+
+if __name__ == '__main__':
+    main()

From fbb49187343b2361481db835684949d6e14603b6 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Fri, 5 Jan 2024 16:08:29 +0800
Subject: [PATCH 4/4] ver Jan5thv2

tested correctness of merging
---
 .gitignore                                 | 1 +
 desktop_env/evaluators/metrics/__init__.py | 2 +-
 requirements.txt                           | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 37c4ba6..1672492 100644
--- a/.gitignore
+++ b/.gitignore
@@ -172,3 +172,4 @@ snapshots
 branch_flag
 branch-config
 *.syncthing.*.tmp
+cache
diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 05bbcbd..18c9477 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -1,5 +1,5 @@
 from .table import compare_table
-from .table import check_sheet_list, check_xlsx_freeze, check_zoom
+from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, compare_insert_equation
 from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
 from .docs import is_first_line_centered, check_file_exists, compare_contains_image
diff --git a/requirements.txt b/requirements.txt
index 5a3707b..d97aedd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,3 +22,4 @@ openpyxl
 python-docx
 python-pptx
 pypdf
+PyGetWindow