Finish Chrome example loading v1
This commit is contained in:
@@ -20,7 +20,7 @@ todo
|
||||
- [x] Set up a pipeline and build agents implementation (zero-shot) for the task
|
||||
- [x] Start to design on which tasks inside the DesktopENv to focus on, start to wrap up the environment to be public
|
||||
- [x] Start to annotate the examples for ~~training~~ and testing
|
||||
- [ ] Error handling during file passing and file opening, etc.
|
||||
- [ ] Add accessibility tree from the OS into the observation space
|
||||
- [x] Error handling during file passing and file opening, etc.
|
||||
- [x] Add accessibility tree from the OS into the observation space
|
||||
- [ ] Add pre-process and post-process action support for benchmarking setup and evaluation
|
||||
- [ ] Multiprocess support, this can enable the reinforcement learning to be more efficient
|
||||
@@ -280,3 +280,31 @@ class PythonController:
|
||||
else:
|
||||
logger.error("Failed to get wallpaper. Status code: %d", response.status_code)
|
||||
return None
|
||||
|
||||
def get_vm_desktop_path(self):
|
||||
"""
|
||||
Gets the desktop path of the vm.
|
||||
"""
|
||||
response = requests.post(self.http_server + "/desktop_path")
|
||||
if response.status_code == 200:
|
||||
logger.info("Desktop path downloaded successfully")
|
||||
return response.json()["desktop_path"]
|
||||
else:
|
||||
logger.error("Failed to get desktop path. Status code: %d", response.status_code)
|
||||
return None
|
||||
|
||||
def get_vm_directory_tree(self, path):
|
||||
"""
|
||||
Gets the directory tree of the vm.
|
||||
"""
|
||||
payload = json.dumps({"path": path})
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
response = requests.post(self.http_server + "/list_directory", headers=headers, data=payload)
|
||||
if response.status_code == 200:
|
||||
logger.info("Directory tree downloaded successfully")
|
||||
return response.json()["directory_tree"]
|
||||
else:
|
||||
logger.error("Failed to get directory tree. Status code: %d", response.status_code)
|
||||
return None
|
||||
@@ -2,4 +2,4 @@ from .file import get_cloud_file, get_vm_file, get_cache_file
|
||||
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
|
||||
from .misc import get_rule, get_accessibility_tree
|
||||
from .vlc import get_vlc_playing_info, get_vlc_config
|
||||
from .chrome import get_default_search_engine, get_bookmarks, get_open_tabs_info
|
||||
from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, get_shortcuts_on_desktop
|
||||
|
||||
@@ -46,6 +46,10 @@ def get_default_search_engine(env, config: Dict[str, str]):
|
||||
|
||||
|
||||
def get_cookie_data(env, config: Dict[str, str]):
|
||||
"""
|
||||
Get the cookies from the Chrome browser.
|
||||
Assume the cookies are stored in the default location, not encrypted and not large in size.
|
||||
"""
|
||||
os_type = env.vm_platform
|
||||
if os_type == 'Windows':
|
||||
chrome_cookie_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
|
||||
@@ -61,21 +65,23 @@ def get_cookie_data(env, config: Dict[str, str]):
|
||||
else:
|
||||
raise Exception('Unsupported operating system')
|
||||
|
||||
# todo: add a new controller function to connect the cookie database
|
||||
#############
|
||||
try:
|
||||
conn = sqlite3.connect(chrome_cookie_file_path)
|
||||
content = env.controller.get_file(chrome_cookie_file_path)
|
||||
_path = os.path.join(env.cache_dir, config["dest"])
|
||||
|
||||
with open(_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
conn = sqlite3.connect(_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Query to check for OpenAI cookies
|
||||
cursor.execute("SELECT * FROM cookies")
|
||||
cookies = cursor.fetchall()
|
||||
|
||||
return cookies
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return None
|
||||
#############
|
||||
|
||||
|
||||
def get_bookmarks(env, config: Dict[str, str]):
|
||||
@@ -94,17 +100,12 @@ def get_bookmarks(env, config: Dict[str, str]):
|
||||
else:
|
||||
raise Exception('Unsupported operating system')
|
||||
|
||||
try:
|
||||
content = env.controller.get_file(preference_file_path)
|
||||
# make content json variable
|
||||
data = json.load(content)
|
||||
|
||||
bookmarks = data.get('roots', {})
|
||||
return bookmarks
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return None
|
||||
content = env.controller.get_file(preference_file_path)
|
||||
if not content:
|
||||
return []
|
||||
data = json.loads(content)
|
||||
bookmarks = data.get('roots', {})
|
||||
return bookmarks
|
||||
|
||||
|
||||
# todo: move this to the main.py
|
||||
@@ -190,3 +191,83 @@ def get_active_tab_info(env, config: Dict[str, str]):
|
||||
|
||||
browser.close()
|
||||
return active_tab_info
|
||||
|
||||
|
||||
def get_pdf_from_url(env, config: Dict[str, str]) -> str:
|
||||
"""
|
||||
Download a PDF from a URL.
|
||||
"""
|
||||
_url = config["path"]
|
||||
_path = os.path.join(env.cache_dir, config["dest"])
|
||||
|
||||
host = env.vm_ip
|
||||
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||
|
||||
remote_debugging_url = f"http://{host}:{port}"
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.connect_over_cdp(remote_debugging_url)
|
||||
page = browser.new_page()
|
||||
page.goto(_url)
|
||||
page.pdf(path=_path)
|
||||
browser.close()
|
||||
|
||||
return _path
|
||||
|
||||
|
||||
# fixme: needs to be changed (maybe through post-processing) since it's not working
|
||||
def get_chrome_saved_address(env, config: Dict[str, str]):
|
||||
# host = env.vm_ip
|
||||
host = "192.168.13.130"
|
||||
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||
|
||||
remote_debugging_url = f"http://{host}:{port}"
|
||||
with sync_playwright() as p:
|
||||
# connect to remote Chrome instance
|
||||
browser = p.chromium.connect_over_cdp(remote_debugging_url)
|
||||
|
||||
page = browser.new_page()
|
||||
|
||||
# Navigate to Chrome's settings page for autofill
|
||||
page.goto("chrome://settings/addresses")
|
||||
|
||||
# Get the HTML content of the page
|
||||
content = page.content()
|
||||
|
||||
browser.close()
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def get_shortcuts_on_desktop(env, config: Dict[str, str]):
|
||||
# Find out the operating system
|
||||
os_name = env.vm_platform
|
||||
|
||||
# Depending on the OS, define the shortcut file extension
|
||||
if os_name == 'Windows':
|
||||
# Windows shortcuts are typically .url or .lnk files
|
||||
shortcut_extension = '.lnk'
|
||||
elif os_name == 'Darwin':
|
||||
# macOS's shortcuts are .webloc files
|
||||
shortcut_extension = '.webloc'
|
||||
elif os_name == 'Linux':
|
||||
# Linux (Ubuntu, etc.) shortcuts are typically .desktop files
|
||||
shortcut_extension = '.desktop'
|
||||
else:
|
||||
logger.error(f"Unsupported operating system: {os_name}")
|
||||
return []
|
||||
|
||||
# Get the path to the desktop folder
|
||||
desktop_path = env.controller.get_vm_desktop_path()
|
||||
desktop_directory_tree = env.controller.get_vm_directory_tree(desktop_path)
|
||||
|
||||
shortcuts_paths = [file['name'] for file in desktop_directory_tree['children'] if
|
||||
file['name'].endswith(shortcut_extension)]
|
||||
|
||||
short_cuts = {}
|
||||
|
||||
for shortcut_path in shortcuts_paths:
|
||||
short_cuts[shortcut_path] = env.controller.get_file(env.controller.execute_python_command(
|
||||
f"import os; print(os.path.join(os.path.expanduser('~'), 'Desktop', '{shortcut_path}'))")['output'].strip()).decode('utf-8')
|
||||
|
||||
return short_cuts
|
||||
|
||||
@@ -40,7 +40,7 @@ def get_vm_file(env, config: Dict[str, str]) -> Optional[str]:
|
||||
file = env.controller.get_file(config["path"])
|
||||
if file is None:
|
||||
return None
|
||||
#raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
|
||||
# raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
|
||||
with open(_path, "wb") as f:
|
||||
f.write(file)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .chrome import is_expected_tabs, is_expected_bookmarks
|
||||
from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop
|
||||
from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
|
||||
from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
|
||||
compare_insert_equation
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import fitz # PyMuPDF
|
||||
import rapidfuzz.fuzz as fuzz
|
||||
|
||||
from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls
|
||||
|
||||
logger = logging.getLogger("desktopenv.metrics.chrome")
|
||||
@@ -22,18 +26,72 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f
|
||||
return 0
|
||||
|
||||
|
||||
def is_expected_bookmarks(bookmarks: List[Dict[str, Any]], rule: Dict[str, Any]) -> float:
|
||||
def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
|
||||
"""
|
||||
Checks if the expected bookmarks are in Chrome.
|
||||
"""
|
||||
|
||||
# todo
|
||||
match_type = rule['type']
|
||||
|
||||
if match_type == "url":
|
||||
expected_urls = rule['urls']
|
||||
actual_urls = [bookmark['url'] for bookmark in bookmarks]
|
||||
return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0
|
||||
if not bookmarks:
|
||||
return 0.
|
||||
elif rule['type'] == "bookmark_bar_folders_names":
|
||||
bookmark_bar_folders_names = [bookmark['name'] for bookmark in bookmarks['bookmark_bar']['children'] if
|
||||
bookmark['type'] == 'folder']
|
||||
return 1. if set(bookmark_bar_folders_names) == set(rule['names']) else 0.
|
||||
elif rule['type'] == "bookmark_bar_websites_urls":
|
||||
bookmark_bar_websites_urls = [bookmark['url'] for bookmark in bookmarks['bookmark_bar']['children'] if
|
||||
bookmark['type'] == 'url']
|
||||
return 1. if set(bookmark_bar_websites_urls) == set(rule['urls']) else 0.
|
||||
else:
|
||||
logger.error(f"Unknown type: {match_type}")
|
||||
return 0
|
||||
raise TypeError(f"{rule['type']} not support yet!")
|
||||
|
||||
|
||||
def compare_pdfs(pdf1_path, pdf2_path):
|
||||
"""
|
||||
Compare two PDF files.
|
||||
"""
|
||||
|
||||
def extract_text_from_pdf(pdf_path):
|
||||
"""Extract text from each page of the PDF."""
|
||||
text = ""
|
||||
with fitz.open(pdf_path) as pdf:
|
||||
for page in pdf:
|
||||
text += page.get_text()
|
||||
return text.strip()
|
||||
|
||||
text1 = extract_text_from_pdf(pdf1_path)
|
||||
text2 = extract_text_from_pdf(pdf2_path)
|
||||
|
||||
return fuzz.ratio(text1, text2) / 100
|
||||
|
||||
|
||||
def is_cookie_deleted(cookie_data, rule):
|
||||
"""
|
||||
Check if the cookie is deleted.
|
||||
"""
|
||||
|
||||
if rule['type'] == 'domains':
|
||||
cookies_domains = [cookie[1] for cookie in cookie_data]
|
||||
for domain in rule['domains']:
|
||||
for cookies_domain in cookies_domains:
|
||||
if compare_urls(domain, cookies_domain):
|
||||
return 0.
|
||||
return 1.
|
||||
else:
|
||||
raise TypeError(f"{rule['type']} not support yet!")
|
||||
|
||||
|
||||
def is_shortcut_on_desktop(shortcuts: Dict[str, str], rule):
|
||||
"""
|
||||
Check if the shortcut is on the desktop.
|
||||
"""
|
||||
# fixme: if the name of the website changed in the future, this will not work; can be replaced with url
|
||||
if rule['type'] == 'name':
|
||||
for shortcut_path, shortcut_content in shortcuts.items():
|
||||
if "Name=" + rule['name'] + "\n" in shortcut_content:
|
||||
return 1.
|
||||
return 0.
|
||||
elif rule['type'] == 'url':
|
||||
raise TypeError(f"{rule['type']} not support yet!")
|
||||
elif rule['type'] == 'id':
|
||||
raise TypeError(f"{rule['type']} not support yet!")
|
||||
else:
|
||||
raise TypeError(f"{rule['type']} not support yet!")
|
||||
|
||||
@@ -36,7 +36,8 @@
|
||||
"expected": {
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
|
||||
"type": "bookmark_bar_folders_names",
|
||||
"names": ["Favorites"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,16 +3,50 @@
|
||||
"snapshot": "chrome",
|
||||
"instruction": "Hey, I need a quick way back to this site. Could you whip up a shortcut on my desktop for me?",
|
||||
"source": "https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome",
|
||||
"config": [],
|
||||
"config": [
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"google-chrome",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"socat",
|
||||
"tcp-listen:9222,fork",
|
||||
"tcp:localhost:1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://www.mathsisfun.com/games/2048.html"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"chrome"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "",
|
||||
"func": "is_shortcut_on_desktop",
|
||||
"result": {
|
||||
"type": "shortcuts_on_desktop"
|
||||
},
|
||||
"expected": {
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"type": "name",
|
||||
"name": "Play Puzzle Game 2048"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +1,53 @@
|
||||
{
|
||||
"id": "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
|
||||
"snapshot": "chrome",
|
||||
"instruction": "Can you save this webpage I'm looking at to my bookmarks so I can come back to it later?",
|
||||
"instruction": "Can you save this webpage I'm looking at to bookmarks bar so I can come back to it later?",
|
||||
"source": "https://www.youtube.com/watch?v=ZaZ8GcTxjXA",
|
||||
"config": [],
|
||||
"config": [
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"google-chrome",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"socat",
|
||||
"tcp-listen:9222,fork",
|
||||
"tcp:localhost:1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://blog.eleuther.ai/rotary-embeddings/",
|
||||
"https://jalammar.github.io/illustrated-transformer/"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"chrome"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "",
|
||||
"func": "is_expected_bookmarks",
|
||||
"result": {
|
||||
"type": "bookmarks"
|
||||
},
|
||||
"expected": {
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"type": "bookmark_bar_websites_urls",
|
||||
"urls": ["https://jalammar.github.io/illustrated-transformer/"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +1,54 @@
|
||||
{
|
||||
"id": "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
|
||||
"snapshot": "chrome",
|
||||
"instruction": "Can you help me clean up my computer by getting rid of all the tracking things that websites like Amazon or eBay might have saved? I want to make sure my browsing is private and those sites don't remember me.",
|
||||
"instruction": "Can you help me clean up my computer by getting rid of all the tracking things that Amazon might have saved? I want to make sure my browsing is private and those sites don't remember me.",
|
||||
"source": "https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site",
|
||||
"config": [],
|
||||
"config": [
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"google-chrome",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"socat",
|
||||
"tcp-listen:9222,fork",
|
||||
"tcp:localhost:1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://www.amazon.com",
|
||||
"https://www.amazon.com/s?k=huggingface+transformers+book"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"chrome"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "",
|
||||
"func": "is_cookie_deleted",
|
||||
"result": {
|
||||
"type": "cookie_data",
|
||||
"dest": "Cookies"
|
||||
},
|
||||
"expected": {
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"type": "domains",
|
||||
"domains": [".amazon.com"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,16 +3,50 @@
|
||||
"snapshot": "chrome",
|
||||
"instruction": "Computer, can you turn the webpage I'm looking at into a PDF file and put it on my main screen, you know, the Desktop?",
|
||||
"source": "https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php",
|
||||
"config": [],
|
||||
"config": [
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"google-chrome",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"socat",
|
||||
"tcp-listen:9222,fork",
|
||||
"tcp:localhost:1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://lilianweng.github.io/posts/2023-06-23-agent/"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"chrome"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "",
|
||||
"func": "compare_pdfs",
|
||||
"result": {
|
||||
"type": "vm_file",
|
||||
"path": "Desktop/LLM Powered Autonomous Agents _ Lil'Log.pdf",
|
||||
"dest": "LLM Powered Autonomous Agents _ Lil'Log.pdf"
|
||||
},
|
||||
"expected": {
|
||||
"type": "pdf_from_url",
|
||||
"path": "https://lilianweng.github.io/posts/2023-06-23-agent/",
|
||||
"dest": "LLM Powered Autonomous Agents _ Lil'Log_gold.pdf"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,3 +28,4 @@ pyacoustid
|
||||
opencv-python
|
||||
ImageHash
|
||||
scikit-image
|
||||
pymupdf
|
||||
|
||||
Reference in New Issue
Block a user