From a1c3e4c294974515ab7fcdd206f19bd8b8a069b5 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 13 Jan 2024 22:56:50 +0800
Subject: [PATCH] Finish Chrome example loading v1

---
 README.md                                     |   4 +-
 desktop_env/controllers/python.py             |  28 +++++
 desktop_env/evaluators/getters/__init__.py    |   2 +-
 desktop_env/evaluators/getters/chrome.py      | 113 +++++++++++++++---
 desktop_env/evaluators/getters/file.py        |   2 +-
 desktop_env/evaluators/metrics/__init__.py    |   2 +-
 desktop_env/evaluators/metrics/chrome.py      |  80 +++++++++++--
 .../2ad9387a-65d8-4e33-ad5b-7580065a27ca.json |   3 +-
 .../35253b65-1c19-4304-8aa4-6884b8218fc0.json |  38 +++++-
 .../7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json |  41 ++++++-
 .../7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json |  42 ++++++-
 .../e1e75309-3ddb-4d09-92ec-de869c928143.json |  38 +++++-
 requirements.txt                              |   1 +
 13 files changed, 351 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index 60d8add..b7d56df 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ todo
 - [x] Set up a pipeline and build agents implementation (zero-shot) for the task
 - [x] Start to design on which tasks inside the DesktopENv to focus on, start to wrap up the environment to be public
 - [x] Start to annotate the examples for ~~training~~ and testing
-- [ ] Error handling during file passing and file opening, etc.
-- [ ] Add accessibility tree from the OS into the observation space
+- [x] Error handling during file passing and file opening, etc.
+- [x] Add accessibility tree from the OS into the observation space
 - [ ] Add pre-process and post-process action support for benchmarking setup and evaluation
 - [ ] Multiprocess support, this can enable the reinforcement learning to be more efficient
\ No newline at end of file
diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py
index 081b76d..5d1dec5 100644
--- a/desktop_env/controllers/python.py
+++ b/desktop_env/controllers/python.py
@@ -280,3 +280,31 @@ class PythonController:
         else:
             logger.error("Failed to get wallpaper. Status code: %d", response.status_code)
             return None
+
+    def get_vm_desktop_path(self):
+        """
+        Gets the desktop path of the vm.
+        """
+        response = requests.post(self.http_server + "/desktop_path")
+        if response.status_code == 200:
+            logger.info("Desktop path downloaded successfully")
+            return response.json()["desktop_path"]
+        else:
+            logger.error("Failed to get desktop path. Status code: %d", response.status_code)
+            return None
+
+    def get_vm_directory_tree(self, path):
+        """
+        Gets the directory tree of the vm.
+        """
+        payload = json.dumps({"path": path})
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        response = requests.post(self.http_server + "/list_directory", headers=headers, data=payload)
+        if response.status_code == 200:
+            logger.info("Directory tree downloaded successfully")
+            return response.json()["directory_tree"]
+        else:
+            logger.error("Failed to get directory tree. Status code: %d", response.status_code)
+            return None
\ No newline at end of file
diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py
index 40b1726..e9c2319 100644
--- a/desktop_env/evaluators/getters/__init__.py
+++ b/desktop_env/evaluators/getters/__init__.py
@@ -2,4 +2,4 @@ from .file import get_cloud_file, get_vm_file, get_cache_file
 from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
 from .misc import get_rule, get_accessibility_tree
 from .vlc import get_vlc_playing_info, get_vlc_config
-from .chrome import get_default_search_engine, get_bookmarks, get_open_tabs_info
+from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, get_shortcuts_on_desktop
diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py
index 62838d7..1b77016 100644
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -46,6 +46,10 @@ def get_default_search_engine(env, config: Dict[str, str]):
 
 
 def get_cookie_data(env, config: Dict[str, str]):
+    """
+    Get the cookies from the Chrome browser.
+    Assume the cookies are stored in the default location, not encrypted and not large in size.
+    """
     os_type = env.vm_platform
     if os_type == 'Windows':
         chrome_cookie_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
@@ -61,21 +65,23 @@ def get_cookie_data(env, config: Dict[str, str]):
     else:
         raise Exception('Unsupported operating system')
 
-    # todo: add a new controller function to connect the cookie database
-    #############
     try:
-        conn = sqlite3.connect(chrome_cookie_file_path)
+        content = env.controller.get_file(chrome_cookie_file_path)
+        _path = os.path.join(env.cache_dir, config["dest"])
+
+        with open(_path, "wb") as f:
+            f.write(content)
+
+        conn = sqlite3.connect(_path)
         cursor = conn.cursor()
 
         # Query to check for OpenAI cookies
         cursor.execute("SELECT * FROM cookies")
         cookies = cursor.fetchall()
-
         return cookies
     except Exception as e:
         logger.error(f"Error: {e}")
         return None
-    #############
 
 
 def get_bookmarks(env, config: Dict[str, str]):
@@ -94,17 +100,12 @@ def get_bookmarks(env, config: Dict[str, str]):
     else:
         raise Exception('Unsupported operating system')
 
-    try:
-        content = env.controller.get_file(preference_file_path)
-        # make content json variable
-        data = json.load(content)
-
-        bookmarks = data.get('roots', {})
-        return bookmarks
-
-    except Exception as e:
-        logger.error(f"Error: {e}")
-        return None
+    content = env.controller.get_file(preference_file_path)
+    if not content:
+        return []
+    data = json.loads(content)
+    bookmarks = data.get('roots', {})
+    return bookmarks
 
 
 # todo: move this to the main.py
@@ -190,3 +191,83 @@ def get_active_tab_info(env, config: Dict[str, str]):
 
         browser.close()
         return active_tab_info
+
+
+def get_pdf_from_url(env, config: Dict[str, str]) -> str:
+    """
+    Download a PDF from a URL.
+    """
+    _url = config["path"]
+    _path = os.path.join(env.cache_dir, config["dest"])
+
+    host = env.vm_ip
+    port = 9222  # fixme: this port is hard-coded, need to be changed from config file
+
+    remote_debugging_url = f"http://{host}:{port}"
+
+    with sync_playwright() as p:
+        browser = p.chromium.connect_over_cdp(remote_debugging_url)
+        page = browser.new_page()
+        page.goto(_url)
+        page.pdf(path=_path)
+        browser.close()
+
+    return _path
+
+
+# fixme: needs to be changed (maybe through post-processing) since it's not working
+def get_chrome_saved_address(env, config: Dict[str, str]):
+    # host = env.vm_ip
+    host = "192.168.13.130"
+    port = 9222  # fixme: this port is hard-coded, need to be changed from config file
+
+    remote_debugging_url = f"http://{host}:{port}"
+    with sync_playwright() as p:
+        # connect to remote Chrome instance
+        browser = p.chromium.connect_over_cdp(remote_debugging_url)
+
+        page = browser.new_page()
+
+        # Navigate to Chrome's settings page for autofill
+        page.goto("chrome://settings/addresses")
+
+        # Get the HTML content of the page
+        content = page.content()
+
+        browser.close()
+
+    return content
+
+
+def get_shortcuts_on_desktop(env, config: Dict[str, str]):
+    # Find out the operating system
+    os_name = env.vm_platform
+
+    # Depending on the OS, define the shortcut file extension
+    if os_name == 'Windows':
+        # Windows shortcuts are typically .url or .lnk files
+        shortcut_extension = '.lnk'
+    elif os_name == 'Darwin':
+        # macOS's shortcuts are .webloc files
+        shortcut_extension = '.webloc'
+    elif os_name == 'Linux':
+        # Linux (Ubuntu, etc.) shortcuts are typically .desktop files
+        shortcut_extension = '.desktop'
+    else:
+        logger.error(f"Unsupported operating system: {os_name}")
+        return []
+
+    # Get the path to the desktop folder
+    desktop_path = env.controller.get_vm_desktop_path()
+    desktop_directory_tree = env.controller.get_vm_directory_tree(desktop_path)
+
+    shortcuts_paths = [file['name'] for file in desktop_directory_tree['children'] if
+                       file['name'].endswith(shortcut_extension)]
+
+    short_cuts = {}
+
+    for shortcut_path in shortcuts_paths:
+        short_cuts[shortcut_path] = env.controller.get_file(env.controller.execute_python_command(
+            f"import os; print(os.path.join(os.path.expanduser('~'), 'Desktop', '{shortcut_path}'))")['output'].strip()).decode('utf-8')
+
+    return short_cuts
diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py
index 606fead..6714b0e 100644
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -40,7 +40,7 @@ def get_vm_file(env, config: Dict[str, str]) -> Optional[str]:
     file = env.controller.get_file(config["path"])
     if file is None:
         return None
-        #raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
+        # raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
     with open(_path, "wb") as f:
         f.write(file)
 
diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 289428c..b61b4ff 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -1,4 +1,4 @@
-from .chrome import is_expected_tabs, is_expected_bookmarks
+from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop
 from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
     compare_insert_equation
diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py
index dc2bcdc..78afac9 100644
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -1,5 +1,9 @@
 import logging
 from typing import Any, Dict, List
+
+import fitz  # PyMuPDF
+import rapidfuzz.fuzz as fuzz
+
 from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls
 
 logger = logging.getLogger("desktopenv.metrics.chrome")
@@ -22,18 +26,72 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f
         return 0
 
 
-def is_expected_bookmarks(bookmarks: List[Dict[str, Any]], rule: Dict[str, Any]) -> float:
+def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
     """
     Checks if the expected bookmarks are in Chrome.
     """
-
-    # todo
-    match_type = rule['type']
-
-    if match_type == "url":
-        expected_urls = rule['urls']
-        actual_urls = [bookmark['url'] for bookmark in bookmarks]
-        return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0
+    if not bookmarks:
+        return 0.
+    elif rule['type'] == "bookmark_bar_folders_names":
+        bookmark_bar_folders_names = [bookmark['name'] for bookmark in bookmarks['bookmark_bar']['children'] if
+                                      bookmark['type'] == 'folder']
+        return 1. if set(bookmark_bar_folders_names) == set(rule['names']) else 0.
+    elif rule['type'] == "bookmark_bar_websites_urls":
+        bookmark_bar_websites_urls = [bookmark['url'] for bookmark in bookmarks['bookmark_bar']['children'] if
+                                      bookmark['type'] == 'url']
+        return 1. if set(bookmark_bar_websites_urls) == set(rule['urls']) else 0.
     else:
-        logger.error(f"Unknown type: {match_type}")
-        return 0
+        raise TypeError(f"{rule['type']} not support yet!")
+
+
+def compare_pdfs(pdf1_path, pdf2_path):
+    """
+    Compare two PDF files.
+    """
+
+    def extract_text_from_pdf(pdf_path):
+        """Extract text from each page of the PDF."""
+        text = ""
+        with fitz.open(pdf_path) as pdf:
+            for page in pdf:
+                text += page.get_text()
+        return text.strip()
+
+    text1 = extract_text_from_pdf(pdf1_path)
+    text2 = extract_text_from_pdf(pdf2_path)
+
+    return fuzz.ratio(text1, text2) / 100
+
+
+def is_cookie_deleted(cookie_data, rule):
+    """
+    Check if the cookie is deleted.
+    """
+
+    if rule['type'] == 'domains':
+        cookies_domains = [cookie[1] for cookie in cookie_data]
+        for domain in rule['domains']:
+            for cookies_domain in cookies_domains:
+                if compare_urls(domain, cookies_domain):
+                    return 0.
+        return 1.
+    else:
+        raise TypeError(f"{rule['type']} not support yet!")
+
+
+def is_shortcut_on_desktop(shortcuts: Dict[str, str], rule):
+    """
+    Check if the shortcut is on the desktop.
+    """
+    # fixme: if the name of the website changed in the future, this will not work; can be replaced with url
+    if rule['type'] == 'name':
+        for shortcut_path, shortcut_content in shortcuts.items():
+            if "Name=" + rule['name'] + "\n" in shortcut_content:
+                return 1.
+        return 0.
+    elif rule['type'] == 'url':
+        raise TypeError(f"{rule['type']} not support yet!")
+    elif rule['type'] == 'id':
+        raise TypeError(f"{rule['type']} not support yet!")
+    else:
+        raise TypeError(f"{rule['type']} not support yet!")
diff --git a/evaluation_examples/examples/chrome/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json b/evaluation_examples/examples/chrome/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json
index 21c9654..ef3fe12 100644
--- a/evaluation_examples/examples/chrome/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json
+++ b/evaluation_examples/examples/chrome/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json
@@ -36,7 +36,8 @@
     "expected": {
       "type": "rule",
       "rules": {
-
+        "type": "bookmark_bar_folders_names",
+        "names": ["Favorites"]
       }
     }
   }
diff --git a/evaluation_examples/examples/chrome/35253b65-1c19-4304-8aa4-6884b8218fc0.json b/evaluation_examples/examples/chrome/35253b65-1c19-4304-8aa4-6884b8218fc0.json
index 4d064b5..71542bf 100644
--- a/evaluation_examples/examples/chrome/35253b65-1c19-4304-8aa4-6884b8218fc0.json
+++ b/evaluation_examples/examples/chrome/35253b65-1c19-4304-8aa4-6884b8218fc0.json
@@ -3,16 +3,50 @@
   "snapshot": "chrome",
   "instruction": "Hey, I need a quick way back to this site. Could you whip up a shortcut on my desktop for me?",
   "source": "https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome",
-  "config": [],
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "google-chrome",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "socat",
+          "tcp-listen:9222,fork",
+          "tcp:localhost:1337"
+        ]
+      }
+    },
+    {
+      "type": "chrome_open_tabs",
+      "parameters": {
+        "urls_to_open": [
+          "https://www.mathsisfun.com/games/2048.html"
+        ]
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
     "chrome"
   ],
   "evaluator": {
-    "func": "",
+    "func": "is_shortcut_on_desktop",
     "result": {
+      "type": "shortcuts_on_desktop"
     },
     "expected": {
+      "type": "rule",
+      "rules": {
+        "type": "name",
+        "name": "Play Puzzle Game 2048"
+      }
     }
   }
 }
diff --git a/evaluation_examples/examples/chrome/7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json b/evaluation_examples/examples/chrome/7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json
index 2f1d7f4..f5ef7f2 100644
--- a/evaluation_examples/examples/chrome/7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json
+++ b/evaluation_examples/examples/chrome/7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json
@@ -1,18 +1,53 @@
 {
   "id": "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
   "snapshot": "chrome",
-  "instruction": "Can you save this webpage I'm looking at to my bookmarks so I can come back to it later?",
+  "instruction": "Can you save this webpage I'm looking at to bookmarks bar so I can come back to it later?",
   "source": "https://www.youtube.com/watch?v=ZaZ8GcTxjXA",
-  "config": [],
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "google-chrome",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "socat",
+          "tcp-listen:9222,fork",
+          "tcp:localhost:1337"
+        ]
+      }
+    },
+    {
+      "type": "chrome_open_tabs",
+      "parameters": {
+        "urls_to_open": [
+          "https://blog.eleuther.ai/rotary-embeddings/",
+          "https://jalammar.github.io/illustrated-transformer/"
+        ]
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
     "chrome"
   ],
   "evaluator": {
-    "func": "",
+    "func": "is_expected_bookmarks",
     "result": {
+      "type": "bookmarks"
     },
     "expected": {
+      "type": "rule",
+      "rules": {
+        "type": "bookmark_bar_websites_urls",
+        "urls": ["https://jalammar.github.io/illustrated-transformer/"]
+      }
     }
   }
 }
diff --git a/evaluation_examples/examples/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json b/evaluation_examples/examples/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
index b914773..94ed902 100644
--- a/evaluation_examples/examples/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
+++ b/evaluation_examples/examples/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json
@@ -1,18 +1,54 @@
 {
   "id": "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
   "snapshot": "chrome",
-  "instruction": "Can you help me clean up my computer by getting rid of all the tracking things that websites like Amazon or eBay might have saved? I want to make sure my browsing is private and those sites don't remember me.",
+  "instruction": "Can you help me clean up my computer by getting rid of all the tracking things that Amazon might have saved? I want to make sure my browsing is private and those sites don't remember me.",
   "source": "https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site",
-  "config": [],
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "google-chrome",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "socat",
+          "tcp-listen:9222,fork",
+          "tcp:localhost:1337"
+        ]
+      }
+    },
+    {
+      "type": "chrome_open_tabs",
+      "parameters": {
+        "urls_to_open": [
+          "https://www.amazon.com",
+          "https://www.amazon.com/s?k=huggingface+transformers+book"
+        ]
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
     "chrome"
   ],
   "evaluator": {
-    "func": "",
+    "func": "is_cookie_deleted",
     "result": {
+      "type": "cookie_data",
+      "dest": "Cookies"
     },
     "expected": {
+      "type": "rule",
+      "rules": {
+        "type": "domains",
+        "domains": [".amazon.com"]
+      }
     }
   }
 }
diff --git a/evaluation_examples/examples/chrome/e1e75309-3ddb-4d09-92ec-de869c928143.json b/evaluation_examples/examples/chrome/e1e75309-3ddb-4d09-92ec-de869c928143.json
index 2484394..2b2fd37 100644
--- a/evaluation_examples/examples/chrome/e1e75309-3ddb-4d09-92ec-de869c928143.json
+++ b/evaluation_examples/examples/chrome/e1e75309-3ddb-4d09-92ec-de869c928143.json
@@ -3,16 +3,50 @@
   "snapshot": "chrome",
   "instruction": "Computer, can you turn the webpage I'm looking at into a PDF file and put it on my main screen, you know, the Desktop?",
   "source": "https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php",
-  "config": [],
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "google-chrome",
+          "--remote-debugging-port=1337"
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "socat",
+          "tcp-listen:9222,fork",
+          "tcp:localhost:1337"
+        ]
+      }
+    },
+    {
+      "type": "chrome_open_tabs",
+      "parameters": {
+        "urls_to_open": [
+          "https://lilianweng.github.io/posts/2023-06-23-agent/"
+        ]
+      }
+    }
+  ],
   "trajectory": "trajectories/",
   "related_apps": [
     "chrome"
   ],
   "evaluator": {
-    "func": "",
+    "func": "compare_pdfs",
     "result": {
+      "type": "vm_file",
+      "path": "Desktop/LLM Powered Autonomous Agents _ Lil'Log.pdf",
+      "dest": "LLM Powered Autonomous Agents _ Lil'Log.pdf"
     },
     "expected": {
+      "type": "pdf_from_url",
+      "path": "https://lilianweng.github.io/posts/2023-06-23-agent/",
+      "dest": "LLM Powered Autonomous Agents _ Lil'Log_gold.pdf"
     }
   }
 }
diff --git a/requirements.txt b/requirements.txt
index 558098b..fe07dc9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,3 +28,4 @@ pyacoustid
 opencv-python
 ImageHash
 scikit-image
+pymupdf