Finish Chrome example loading v1

This commit is contained in:
Timothyxxx
2024-01-13 22:56:50 +08:00
parent bc88ee0c41
commit a1c3e4c294
13 changed files with 351 additions and 43 deletions

View File

@@ -2,4 +2,4 @@ from .file import get_cloud_file, get_vm_file, get_cache_file
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
from .misc import get_rule, get_accessibility_tree
from .vlc import get_vlc_playing_info, get_vlc_config
from .chrome import get_default_search_engine, get_bookmarks, get_open_tabs_info
from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, get_shortcuts_on_desktop

View File

@@ -46,6 +46,10 @@ def get_default_search_engine(env, config: Dict[str, str]):
def get_cookie_data(env, config: Dict[str, str]):
"""
Get the cookies from the Chrome browser.
Assume the cookies are stored in the default location, not encrypted and not large in size.
"""
os_type = env.vm_platform
if os_type == 'Windows':
chrome_cookie_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
@@ -61,21 +65,23 @@ def get_cookie_data(env, config: Dict[str, str]):
else:
raise Exception('Unsupported operating system')
# todo: add a new controller function to connect the cookie database
#############
try:
conn = sqlite3.connect(chrome_cookie_file_path)
content = env.controller.get_file(chrome_cookie_file_path)
_path = os.path.join(env.cache_dir, config["dest"])
with open(_path, "wb") as f:
f.write(content)
conn = sqlite3.connect(_path)
cursor = conn.cursor()
# Query to check for OpenAI cookies
cursor.execute("SELECT * FROM cookies")
cookies = cursor.fetchall()
return cookies
except Exception as e:
logger.error(f"Error: {e}")
return None
#############
def get_bookmarks(env, config: Dict[str, str]):
@@ -94,17 +100,12 @@ def get_bookmarks(env, config: Dict[str, str]):
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
# make content json variable
data = json.load(content)
bookmarks = data.get('roots', {})
return bookmarks
except Exception as e:
logger.error(f"Error: {e}")
return None
content = env.controller.get_file(preference_file_path)
if not content:
return []
data = json.loads(content)
bookmarks = data.get('roots', {})
return bookmarks
# todo: move this to the main.py
@@ -190,3 +191,83 @@ def get_active_tab_info(env, config: Dict[str, str]):
browser.close()
return active_tab_info
def get_pdf_from_url(env, config: Dict[str, str]) -> str:
"""
Download a PDF from a URL.
"""
_url = config["path"]
_path = os.path.join(env.cache_dir, config["dest"])
host = env.vm_ip
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
remote_debugging_url = f"http://{host}:{port}"
with sync_playwright() as p:
browser = p.chromium.connect_over_cdp(remote_debugging_url)
page = browser.new_page()
page.goto(_url)
page.pdf(path=_path)
browser.close()
return _path
# fixme: needs to be changed (maybe through post-processing) since it's not working
def get_chrome_saved_address(env, config: Dict[str, str]):
# host = env.vm_ip
host = "192.168.13.130"
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
remote_debugging_url = f"http://{host}:{port}"
with sync_playwright() as p:
# connect to remote Chrome instance
browser = p.chromium.connect_over_cdp(remote_debugging_url)
page = browser.new_page()
# Navigate to Chrome's settings page for autofill
page.goto("chrome://settings/addresses")
# Get the HTML content of the page
content = page.content()
browser.close()
return content
def get_shortcuts_on_desktop(env, config: Dict[str, str]):
# Find out the operating system
os_name = env.vm_platform
# Depending on the OS, define the shortcut file extension
if os_name == 'Windows':
# Windows shortcuts are typically .url or .lnk files
shortcut_extension = '.lnk'
elif os_name == 'Darwin':
# macOS's shortcuts are .webloc files
shortcut_extension = '.webloc'
elif os_name == 'Linux':
# Linux (Ubuntu, etc.) shortcuts are typically .desktop files
shortcut_extension = '.desktop'
else:
logger.error(f"Unsupported operating system: {os_name}")
return []
# Get the path to the desktop folder
desktop_path = env.controller.get_vm_desktop_path()
desktop_directory_tree = env.controller.get_vm_directory_tree(desktop_path)
shortcuts_paths = [file['name'] for file in desktop_directory_tree['children'] if
file['name'].endswith(shortcut_extension)]
short_cuts = {}
for shortcut_path in shortcuts_paths:
short_cuts[shortcut_path] = env.controller.get_file(env.controller.execute_python_command(
f"import os; print(os.path.join(os.path.expanduser('~'), 'Desktop', '{shortcut_path}'))")['output'].strip()).decode('utf-8')
return short_cuts

View File

@@ -40,7 +40,7 @@ def get_vm_file(env, config: Dict[str, str]) -> Optional[str]:
file = env.controller.get_file(config["path"])
if file is None:
return None
#raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
# raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
with open(_path, "wb") as f:
f.write(file)

View File

@@ -1,4 +1,4 @@
from .chrome import is_expected_tabs, is_expected_bookmarks
from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop
from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
compare_insert_equation

View File

@@ -1,5 +1,9 @@
import logging
from typing import Any, Dict, List
import fitz # PyMuPDF
import rapidfuzz.fuzz as fuzz
from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls
logger = logging.getLogger("desktopenv.metrics.chrome")
@@ -22,18 +26,72 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f
return 0
def is_expected_bookmarks(bookmarks: List[Dict[str, Any]], rule: Dict[str, Any]) -> float:
def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
"""
Checks if the expected bookmarks are in Chrome.
"""
# todo
match_type = rule['type']
if match_type == "url":
expected_urls = rule['urls']
actual_urls = [bookmark['url'] for bookmark in bookmarks]
return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0
if not bookmarks:
return 0.
elif rule['type'] == "bookmark_bar_folders_names":
bookmark_bar_folders_names = [bookmark['name'] for bookmark in bookmarks['bookmark_bar']['children'] if
bookmark['type'] == 'folder']
return 1. if set(bookmark_bar_folders_names) == set(rule['names']) else 0.
elif rule['type'] == "bookmark_bar_websites_urls":
bookmark_bar_websites_urls = [bookmark['url'] for bookmark in bookmarks['bookmark_bar']['children'] if
bookmark['type'] == 'url']
return 1. if set(bookmark_bar_websites_urls) == set(rule['urls']) else 0.
else:
logger.error(f"Unknown type: {match_type}")
return 0
raise TypeError(f"{rule['type']} not support yet!")
def compare_pdfs(pdf1_path, pdf2_path):
"""
Compare two PDF files.
"""
def extract_text_from_pdf(pdf_path):
"""Extract text from each page of the PDF."""
text = ""
with fitz.open(pdf_path) as pdf:
for page in pdf:
text += page.get_text()
return text.strip()
text1 = extract_text_from_pdf(pdf1_path)
text2 = extract_text_from_pdf(pdf2_path)
return fuzz.ratio(text1, text2) / 100
def is_cookie_deleted(cookie_data, rule):
"""
Check if the cookie is deleted.
"""
if rule['type'] == 'domains':
cookies_domains = [cookie[1] for cookie in cookie_data]
for domain in rule['domains']:
for cookies_domain in cookies_domains:
if compare_urls(domain, cookies_domain):
return 0.
return 1.
else:
raise TypeError(f"{rule['type']} not support yet!")
def is_shortcut_on_desktop(shortcuts: Dict[str, str], rule):
"""
Check if the shortcut is on the desktop.
"""
# fixme: if the name of the website changed in the future, this will not work; can be replaced with url
if rule['type'] == 'name':
for shortcut_path, shortcut_content in shortcuts.items():
if "Name=" + rule['name'] + "\n" in shortcut_content:
return 1.
return 0.
elif rule['type'] == 'url':
raise TypeError(f"{rule['type']} not support yet!")
elif rule['type'] == 'id':
raise TypeError(f"{rule['type']} not support yet!")
else:
raise TypeError(f"{rule['type']} not support yet!")