From a1c3e4c294974515ab7fcdd206f19bd8b8a069b5 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Sat, 13 Jan 2024 22:56:50 +0800 Subject: [PATCH] Finish Chrome example loading v1 --- README.md | 4 +- desktop_env/controllers/python.py | 28 +++++ desktop_env/evaluators/getters/__init__.py | 2 +- desktop_env/evaluators/getters/chrome.py | 113 +++++++++++++++--- desktop_env/evaluators/getters/file.py | 2 +- desktop_env/evaluators/metrics/__init__.py | 2 +- desktop_env/evaluators/metrics/chrome.py | 80 +++++++++++-- .../2ad9387a-65d8-4e33-ad5b-7580065a27ca.json | 3 +- .../35253b65-1c19-4304-8aa4-6884b8218fc0.json | 38 +++++- .../7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json | 41 ++++++- .../7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json | 42 ++++++- .../e1e75309-3ddb-4d09-92ec-de869c928143.json | 38 +++++- requirements.txt | 1 + 13 files changed, 351 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 60d8add..b7d56df 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ todo - [x] Set up a pipeline and build agents implementation (zero-shot) for the task - [x] Start to design on which tasks inside the DesktopENv to focus on, start to wrap up the environment to be public - [x] Start to annotate the examples for ~~training~~ and testing -- [ ] Error handling during file passing and file opening, etc. -- [ ] Add accessibility tree from the OS into the observation space +- [x] Error handling during file passing and file opening, etc. +- [x] Add accessibility tree from the OS into the observation space - [ ] Add pre-process and post-process action support for benchmarking setup and evaluation - [ ] Multiprocess support, this can enable the reinforcement learning to be more efficient \ No newline at end of file diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py index 081b76d..5d1dec5 100644 --- a/desktop_env/controllers/python.py +++ b/desktop_env/controllers/python.py @@ -280,3 +280,31 @@ class PythonController: else: logger.error("Failed to get wallpaper. Status code: %d", response.status_code) return None + + def get_vm_desktop_path(self): + """ + Gets the desktop path of the vm. + """ + response = requests.post(self.http_server + "/desktop_path") + if response.status_code == 200: + logger.info("Desktop path downloaded successfully") + return response.json()["desktop_path"] + else: + logger.error("Failed to get desktop path. Status code: %d", response.status_code) + return None + + def get_vm_directory_tree(self, path): + """ + Gets the directory tree of the vm. + """ + payload = json.dumps({"path": path}) + headers = { + 'Content-Type': 'application/json' + } + response = requests.post(self.http_server + "/list_directory", headers=headers, data=payload) + if response.status_code == 200: + logger.info("Directory tree downloaded successfully") + return response.json()["directory_tree"] + else: + logger.error("Failed to get directory tree. Status code: %d", response.status_code) + return None \ No newline at end of file diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 40b1726..e9c2319 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -2,4 +2,4 @@ from .file import get_cloud_file, get_vm_file, get_cache_file from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper from .misc import get_rule, get_accessibility_tree from .vlc import get_vlc_playing_info, get_vlc_config -from .chrome import get_default_search_engine, get_bookmarks, get_open_tabs_info +from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, get_shortcuts_on_desktop diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 62838d7..1b77016 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -46,6 +46,10 @@ def get_default_search_engine(env, config: Dict[str, str]): def get_cookie_data(env, config: Dict[str, str]): + """ + Get the cookies from the Chrome browser. + Assume the cookies are stored in the default location, not encrypted and not large in size. + """ os_type = env.vm_platform if os_type == 'Windows': chrome_cookie_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'), @@ -61,21 +65,23 @@ def get_cookie_data(env, config: Dict[str, str]): else: raise Exception('Unsupported operating system') - # todo: add a new controller function to connect the cookie database - ############# try: - conn = sqlite3.connect(chrome_cookie_file_path) + content = env.controller.get_file(chrome_cookie_file_path) + _path = os.path.join(env.cache_dir, config["dest"]) + + with open(_path, "wb") as f: + f.write(content) + + conn = sqlite3.connect(_path) cursor = conn.cursor() # Query to check for OpenAI cookies cursor.execute("SELECT * FROM cookies") cookies = cursor.fetchall() - return cookies except Exception as e: logger.error(f"Error: {e}") return None - ############# def get_bookmarks(env, config: Dict[str, str]): @@ -94,17 +100,12 @@ def get_bookmarks(env, config: Dict[str, str]): else: raise Exception('Unsupported operating system') - try: - content = env.controller.get_file(preference_file_path) - # make content json variable - data = json.load(content) - - bookmarks = data.get('roots', {}) - return bookmarks - - except Exception as e: - logger.error(f"Error: {e}") - return None + content = env.controller.get_file(preference_file_path) + if not content: + return [] + data = json.loads(content) + bookmarks = data.get('roots', {}) + return bookmarks # todo: move this to the main.py @@ -190,3 +191,83 @@ def get_active_tab_info(env, config: Dict[str, str]): browser.close() return active_tab_info + + +def get_pdf_from_url(env, config: Dict[str, str]) -> str: + """ + Download a PDF from a URL. + """ + _url = config["path"] + _path = os.path.join(env.cache_dir, config["dest"]) + + host = env.vm_ip + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + + remote_debugging_url = f"http://{host}:{port}" + + with sync_playwright() as p: + browser = p.chromium.connect_over_cdp(remote_debugging_url) + page = browser.new_page() + page.goto(_url) + page.pdf(path=_path) + browser.close() + + return _path + + +# fixme: needs to be changed (maybe through post-processing) since it's not working +def get_chrome_saved_address(env, config: Dict[str, str]): + # host = env.vm_ip + host = "192.168.13.130" + port = 9222 # fixme: this port is hard-coded, need to be changed from config file + + remote_debugging_url = f"http://{host}:{port}" + with sync_playwright() as p: + # connect to remote Chrome instance + browser = p.chromium.connect_over_cdp(remote_debugging_url) + + page = browser.new_page() + + # Navigate to Chrome's settings page for autofill + page.goto("chrome://settings/addresses") + + # Get the HTML content of the page + content = page.content() + + browser.close() + + return content + + +def get_shortcuts_on_desktop(env, config: Dict[str, str]): + # Find out the operating system + os_name = env.vm_platform + + # Depending on the OS, define the shortcut file extension + if os_name == 'Windows': + # Windows shortcuts are typically .url or .lnk files + shortcut_extension = '.lnk' + elif os_name == 'Darwin': + # macOS's shortcuts are .webloc files + shortcut_extension = '.webloc' + elif os_name == 'Linux': + # Linux (Ubuntu, etc.) shortcuts are typically .desktop files + shortcut_extension = '.desktop' + else: + logger.error(f"Unsupported operating system: {os_name}") + return [] + + # Get the path to the desktop folder + desktop_path = env.controller.get_vm_desktop_path() + desktop_directory_tree = env.controller.get_vm_directory_tree(desktop_path) + + shortcuts_paths = [file['name'] for file in desktop_directory_tree['children'] if + file['name'].endswith(shortcut_extension)] + + short_cuts = {} + + for shortcut_path in shortcuts_paths: + short_cuts[shortcut_path] = env.controller.get_file(env.controller.execute_python_command( + f"import os; print(os.path.join(os.path.expanduser('~'), 'Desktop', '{shortcut_path}'))")['output'].strip()).decode('utf-8') + + return short_cuts diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index 606fead..6714b0e 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -40,7 +40,7 @@ def get_vm_file(env, config: Dict[str, str]) -> Optional[str]: file = env.controller.get_file(config["path"]) if file is None: return None - #raise FileNotFoundError("File not found on VM: {:}".format(config["path"])) + # raise FileNotFoundError("File not found on VM: {:}".format(config["path"])) with open(_path, "wb") as f: f.write(file) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 289428c..b61b4ff 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -1,4 +1,4 @@ -from .chrome import is_expected_tabs, is_expected_bookmarks +from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \ compare_insert_equation diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index dc2bcdc..78afac9 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -1,5 +1,9 @@ import logging from typing import Any, Dict, List + +import fitz # PyMuPDF +import rapidfuzz.fuzz as fuzz + from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls logger = logging.getLogger("desktopenv.metrics.chrome") @@ -22,18 +26,72 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f return 0 -def is_expected_bookmarks(bookmarks: List[Dict[str, Any]], rule: Dict[str, Any]) -> float: +def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float: """ Checks if the expected bookmarks are in Chrome. """ - - # todo - match_type = rule['type'] - - if match_type == "url": - expected_urls = rule['urls'] - actual_urls = [bookmark['url'] for bookmark in bookmarks] - return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0 + if not bookmarks: + return 0. + elif rule['type'] == "bookmark_bar_folders_names": + bookmark_bar_folders_names = [bookmark['name'] for bookmark in bookmarks['bookmark_bar']['children'] if + bookmark['type'] == 'folder'] + return 1. if set(bookmark_bar_folders_names) == set(rule['names']) else 0. + elif rule['type'] == "bookmark_bar_websites_urls": + bookmark_bar_websites_urls = [bookmark['url'] for bookmark in bookmarks['bookmark_bar']['children'] if + bookmark['type'] == 'url'] + return 1. if set(bookmark_bar_websites_urls) == set(rule['urls']) else 0. else: - logger.error(f"Unknown type: {match_type}") - return 0 + raise TypeError(f"{rule['type']} not support yet!") + + +def compare_pdfs(pdf1_path, pdf2_path): + """ + Compare two PDF files. + """ + + def extract_text_from_pdf(pdf_path): + """Extract text from each page of the PDF.""" + text = "" + with fitz.open(pdf_path) as pdf: + for page in pdf: + text += page.get_text() + return text.strip() + + text1 = extract_text_from_pdf(pdf1_path) + text2 = extract_text_from_pdf(pdf2_path) + + return fuzz.ratio(text1, text2) / 100 + + +def is_cookie_deleted(cookie_data, rule): + """ + Check if the cookie is deleted. + """ + + if rule['type'] == 'domains': + cookies_domains = [cookie[1] for cookie in cookie_data] + for domain in rule['domains']: + for cookies_domain in cookies_domains: + if compare_urls(domain, cookies_domain): + return 0. + return 1. + else: + raise TypeError(f"{rule['type']} not support yet!") + + +def is_shortcut_on_desktop(shortcuts: Dict[str, str], rule): + """ + Check if the shortcut is on the desktop. + """ + # fixme: if the name of the website changed in the future, this will not work; can be replaced with url + if rule['type'] == 'name': + for shortcut_path, shortcut_content in shortcuts.items(): + if "Name=" + rule['name'] + "\n" in shortcut_content: + return 1. + return 0. + elif rule['type'] == 'url': + raise TypeError(f"{rule['type']} not support yet!") + elif rule['type'] == 'id': + raise TypeError(f"{rule['type']} not support yet!") + else: + raise TypeError(f"{rule['type']} not support yet!") diff --git a/evaluation_examples/examples/chrome/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json b/evaluation_examples/examples/chrome/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json index 21c9654..ef3fe12 100644 --- a/evaluation_examples/examples/chrome/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json +++ b/evaluation_examples/examples/chrome/2ad9387a-65d8-4e33-ad5b-7580065a27ca.json @@ -36,7 +36,8 @@ "expected": { "type": "rule", "rules": { - + "type": "bookmark_bar_folders_names", + "names": ["Favorites"] } } } diff --git a/evaluation_examples/examples/chrome/35253b65-1c19-4304-8aa4-6884b8218fc0.json b/evaluation_examples/examples/chrome/35253b65-1c19-4304-8aa4-6884b8218fc0.json index 4d064b5..71542bf 100644 --- a/evaluation_examples/examples/chrome/35253b65-1c19-4304-8aa4-6884b8218fc0.json +++ b/evaluation_examples/examples/chrome/35253b65-1c19-4304-8aa4-6884b8218fc0.json @@ -3,16 +3,50 @@ "snapshot": "chrome", "instruction": "Hey, I need a quick way back to this site. Could you whip up a shortcut on my desktop for me?", "source": "https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.mathsisfun.com/games/2048.html" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_shortcut_on_desktop", "result": { + "type": "shortcuts_on_desktop" }, "expected": { + "type": "rule", + "rules": { + "type": "name", + "name": "Play Puzzle Game 2048" + } } } } diff --git a/evaluation_examples/examples/chrome/7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json b/evaluation_examples/examples/chrome/7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json index 2f1d7f4..f5ef7f2 100644 --- a/evaluation_examples/examples/chrome/7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json +++ b/evaluation_examples/examples/chrome/7a5a7856-f1b6-42a4-ade9-1ca81ca0f263.json @@ -1,18 +1,53 @@ { "id": "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263", "snapshot": "chrome", - "instruction": "Can you save this webpage I'm looking at to my bookmarks so I can come back to it later?", + "instruction": "Can you save this webpage I'm looking at to bookmarks bar so I can come back to it later?", "source": "https://www.youtube.com/watch?v=ZaZ8GcTxjXA", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://blog.eleuther.ai/rotary-embeddings/", + "https://jalammar.github.io/illustrated-transformer/" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_expected_bookmarks", "result": { + "type": "bookmarks" }, "expected": { + "type": "rule", + "rules": { + "type": "bookmark_bar_websites_urls", + "urls": ["https://jalammar.github.io/illustrated-transformer/"] + } } } } diff --git a/evaluation_examples/examples/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json b/evaluation_examples/examples/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json index b914773..94ed902 100644 --- a/evaluation_examples/examples/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json +++ b/evaluation_examples/examples/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3.json @@ -1,18 +1,54 @@ { "id": "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3", "snapshot": "chrome", - "instruction": "Can you help me clean up my computer by getting rid of all the tracking things that websites like Amazon or eBay might have saved? I want to make sure my browsing is private and those sites don't remember me.", + "instruction": "Can you help me clean up my computer by getting rid of all the tracking things that Amazon might have saved? I want to make sure my browsing is private and those sites don't remember me.", "source": "https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.amazon.com", + "https://www.amazon.com/s?k=huggingface+transformers+book" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "is_cookie_deleted", "result": { + "type": "cookie_data", + "dest": "Cookies" }, "expected": { + "type": "rule", + "rules": { + "type": "domains", + "domains": [".amazon.com"] + } } } } diff --git a/evaluation_examples/examples/chrome/e1e75309-3ddb-4d09-92ec-de869c928143.json b/evaluation_examples/examples/chrome/e1e75309-3ddb-4d09-92ec-de869c928143.json index 2484394..2b2fd37 100644 --- a/evaluation_examples/examples/chrome/e1e75309-3ddb-4d09-92ec-de869c928143.json +++ b/evaluation_examples/examples/chrome/e1e75309-3ddb-4d09-92ec-de869c928143.json @@ -3,16 +3,50 @@ "snapshot": "chrome", "instruction": "Computer, can you turn the webpage I'm looking at into a PDF file and put it on my main screen, you know, the Desktop?", "source": "https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://lilianweng.github.io/posts/2023-06-23-agent/" + ] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "", + "func": "compare_pdfs", "result": { + "type": "vm_file", + "path": "Desktop/LLM Powered Autonomous Agents _ Lil'Log.pdf", + "dest": "LLM Powered Autonomous Agents _ Lil'Log.pdf" }, "expected": { + "type": "pdf_from_url", + "path": "https://lilianweng.github.io/posts/2023-06-23-agent/", + "dest": "LLM Powered Autonomous Agents _ Lil'Log_gold.pdf" } } } diff --git a/requirements.txt b/requirements.txt index 558098b..fe07dc9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,4 @@ pyacoustid opencv-python ImageHash scikit-image +pymupdf