Make up missing getters and metrics; Update VLC scripts; Start to work on Chrome, update examples instructions

This commit is contained in:
Timothyxxx
2024-01-11 21:27:40 +08:00
parent b20027884a
commit 820579a5a2
15 changed files with 249 additions and 160 deletions

View File

@@ -228,6 +228,13 @@ class PythonController:
else:
raise Exception(f"Unknown action type: {action_type}")
# Additional info
def get_vm_platform(self):
"""
Gets the size of the vm screen.
"""
return self.execute_python_command("import platform; print(platform.system())")['output'].strip()
def get_vm_screen_size(self):
"""
Gets the size of the vm screen.
@@ -262,6 +269,7 @@ class PythonController:
logger.error("Failed to get wallpaper. Status code: %d", response.status_code)
return None
# VLC
def get_vlc_status(self, host='localhost', port=8080, password='password'):
url = f'http://{host}:{port}/requests/status.xml'

View File

@@ -35,7 +35,10 @@ def _execute_command(command: List[str]) -> None:
class DesktopEnv(gym.Env):
"""DesktopEnv with OpenAI Gym interface."""
"""
DesktopEnv with OpenAI Gym interface.
Fixme: refactor the logic when implementing the multi-process version
"""
def __init__(
self,
@@ -81,6 +84,10 @@ class DesktopEnv(gym.Env):
self.controller = PythonController(http_server=self.host)
self.setup_controller = SetupController(http_server=self.host, cache_dir=self.cache_dir)
# Meta info of the VM
self.vm_platform = self.controller.get_vm_platform()
self.vm_screen_size = self.controller.get_vm_screen_size()
# mode: human or machine
assert action_space in ["computer_13", "pyautogui"]
self.action_space = action_space

View File

@@ -3,3 +3,4 @@ from .misc import get_rule
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
from .misc import get_rule, get_accessibility_tree
from .vlc import get_vlc_playing_info, get_vlc_config
from .chrome import get_default_search_engine, get_bookmarks

View File

@@ -0,0 +1,126 @@
import json
import logging
import os
import sqlite3
from typing import Dict
logger = logging.getLogger("desktopenv.getters.chrome")
"""
WARNING:
1. Functions from this script assume that no account is registered on Chrome, otherwise the default file path needs to be changed.
2. The functions are not tested on Windows and Mac, but they should work.
"""
def get_default_search_engine(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command("import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")['output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command("import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")['output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# The path within the JSON data to the default search engine might vary
search_engine = data.get('default_search_provider_data', {}).get('template_url_data', {}).get('short_name',
'Google')
return search_engine
except Exception as e:
logger.error(f"Error: {e}")
return "Google"
def get_cookie_data(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
chrome_cookie_file_path = os.path.join(os.getenv('LOCALAPPDATA'), 'Google\\Chrome\\User Data\\Default\\Cookies')
elif os_type == 'Darwin':
chrome_cookie_file_path = os.path.join(os.getenv('HOME'),
'Library/Application Support/Google/Chrome/Default/Cookies')
elif os_type == 'Linux':
chrome_cookie_file_path = os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Cookies')
else:
raise Exception('Unsupported operating system')
# todo: add a new controller function to connect the cookie database
#############
try:
conn = sqlite3.connect(chrome_cookie_file_path)
cursor = conn.cursor()
# Query to check for OpenAI cookies
cursor.execute("SELECT * FROM cookies")
cookies = cursor.fetchall()
return cookies
except Exception as e:
logger.error(f"Error: {e}")
return None
#############
def get_bookmarks(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Bookmarks')
elif os_type == 'Darwin':
preference_file_path = os.path.join(os.getenv('HOME'),
'Library/Application Support/Google/Chrome/Default/Bookmarks')
elif os_type == 'Linux':
preference_file_path = os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Bookmarks')
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
# make content json variable
data = json.load(content)
bookmarks = data.get('roots', {})
return bookmarks
except Exception as e:
logger.error(f"Error: {e}")
return None
# todo: move this to the main.py
def get_extensions_installed_from_shop(env, config: Dict[str, str]):
"""Find the Chrome extensions directory based on the operating system."""
os_type = env.vm_platform
if os_type == 'Windows':
chrome_extension_dir = os.path.expanduser(
'~') + '\\AppData\\Local\\Google\\Chrome\\User Data\\Default\\Extensions\\'
elif os_type == 'Darwin': # macOS
chrome_extension_dir = os.path.expanduser(
'~') + '/Library/Application Support/Google/Chrome/Default/Extensions/'
elif os_type == 'Linux':
chrome_extension_dir = os.path.expanduser('~') + '/.config/google-chrome/Default/Extensions/'
else:
raise Exception('Unsupported operating system')
manifests = []
for extension_id in os.listdir(chrome_extension_dir):
extension_path = os.path.join(chrome_extension_dir, extension_id)
if os.path.isdir(extension_path):
# Iterate through version-named subdirectories
for version_dir in os.listdir(extension_path):
version_path = os.path.join(extension_path, version_dir)
manifest_path = os.path.join(version_path, 'manifest.json')
if os.path.isfile(manifest_path):
with open(manifest_path, 'r') as file:
try:
manifest = json.load(file)
manifests.append(manifest)
except json.JSONDecodeError:
logger.error(f"Error reading {manifest_path}")
return manifests

View File

@@ -0,0 +1,20 @@
import os
from typing import Union
def get_vm_screen_size(env, config: dict) -> dict:
return env.controller.get_vm_screen_size()
def get_vm_window_size(env, config: dict) -> dict:
return env.controller.get_vm_window_size(app_class_name=config["app_class_name"])
def get_vm_wallpaper(env, config: dict) -> Union[str, bytes]:
_path = os.path.join(env.cache_dir, config["dest"])
content = env.controller.get_vm_wallpaper()
with open(_path, "wb") as f:
f.write(content)
return _path

View File

@@ -9,12 +9,12 @@ def get_vlc_playing_info(env, config: Dict[str, str]):
"""
Gets the current playing information from VLC's HTTP interface.
"""
_path = os.path.join(env.cache_dir, config["dest"])
host = env.vm_ip
port = 8080
password = 'password'
_path = os.path.join(env.cache_dir, config["dest"])
content = env.controller.get_vlc_status(host, port, password)
with open(_path, "wb") as f:
f.write(content)
@@ -26,22 +26,24 @@ def get_vlc_config(env, config: Dict[str, str]):
"""
Reads the VLC configuration file to check setting.
"""
_path = os.path.join(env.cache_dir, config["dest"])
os_type = env.controller.execute_python_command("import platform; print(platform.system())")['output'].strip()
os_type = env.vm_platform
# fixme: depends on how we config and install the vlc in virtual machine, need to be aligned and double-checked
if os_type == "Linux":
config_path = \
env.controller.execute_python_command("import os; print(os.path.expanduser('~/.config/vlc/vlcrc'))")[
'output'].strip()
env.controller.execute_python_command("import os; print(os.path.expanduser('~/.config/vlc/vlcrc'))")[
'output'].strip()
elif os_type == "Darwin":
config_path = env.controller.execute_python_command(
"import os; print(os.path.expanduser('~/Library/Preferences/org.videolan.vlc/vlcrc'))")['output'].strip()
elif os_type == "Windows":
config_path = env.controller.execute_python_command(
"import os; print(os.path.expanduser('~\\AppData\\Roaming\\vlc\\vlcrc'))")['output'].strip()
else:
raise Exception("Unsupported operating system", os_type)
_path = os.path.join(env.cache_dir, config["dest"])
content = env.controller.get_file(config_path)
with open(_path, "wb") as f:
f.write(content)

View File

@@ -1,10 +1,11 @@
from .table import compare_table
from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom
from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, compare_insert_equation
from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
compare_insert_equation
from .docs import is_first_line_centered, check_file_exists, compare_contains_image
from .pdf import check_pdf_pages
from .general import exact_match, fuzzy_match, check_csv, check_accessibility_tree, check_list
from .libreoffice import check_libre_locale
from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, compare_videos
from .general import check_csv, check_accessibility_tree, check_list
from .pdf import check_pdf_pages
from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom
from .table import compare_table
from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \
compare_videos

View File

@@ -1,127 +1,15 @@
import json
import os
import platform
import sqlite3
import logging
from playwright.sync_api import sync_playwright
import logging
logger = logging.getLogger("desktopenv.metrics.chrome")
"""
WARNING:
1. Functions from this script assume that no account is registered on Chrome, otherwise the default file path needs to be changed.
2. The functions are not tested on Windows and Mac, but they should work.
"""
# todo: move to getter module
# The following ones just need to load info from the files of software, no need to connect to the software
def get_default_search_engine():
if platform.system() == 'Windows':
preference_file_path = os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences')
elif platform.system() == 'Darwin':
preference_file_path = os.path.join(os.getenv('HOME'),
'Library/Application Support/Google/Chrome/Default/Preferences')
elif platform.system() == 'Linux':
preference_file_path = os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences')
else:
raise Exception('Unsupported operating system')
try:
with open(preference_file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
# The path within the JSON data to the default search engine might vary
search_engine = data.get('default_search_provider_data', {}).get('template_url_data', {}).get('short_name',
'Google')
return search_engine
except Exception as e:
logger.error(f"Error: {e}")
return "Google"
def get_cookie_data():
if platform.system() == 'Windows':
chrome_cookie_file_path = os.path.join(os.getenv('LOCALAPPDATA'), 'Google\\Chrome\\User Data\\Default\\Cookies')
elif platform.system() == 'Darwin':
chrome_cookie_file_path = os.path.join(os.getenv('HOME'),
'Library/Application Support/Google/Chrome/Default/Cookies')
elif platform.system() == 'Linux':
chrome_cookie_file_path = os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Cookies')
else:
raise Exception('Unsupported operating system')
try:
conn = sqlite3.connect(chrome_cookie_file_path)
cursor = conn.cursor()
# Query to check for OpenAI cookies
cursor.execute("SELECT * FROM cookies")
cookies = cursor.fetchall()
return cookies
except Exception as e:
logger.error(f"Error: {e}")
return None
def get_bookmarks():
if platform.system() == 'Windows':
preference_file_path = os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Bookmarks')
elif platform.system() == 'Darwin':
preference_file_path = os.path.join(os.getenv('HOME'),
'Library/Application Support/Google/Chrome/Default/Bookmarks')
elif platform.system() == 'Linux':
preference_file_path = os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Bookmarks')
else:
raise Exception('Unsupported operating system')
try:
with open(preference_file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
bookmarks = data.get('roots', {})
return bookmarks
except Exception as e:
logger.error(f"Error: {e}")
return None
def get_extensions_installed_from_shop():
"""Find the Chrome extensions directory based on the operating system."""
os_name = platform.system()
if os_name == 'Windows':
chrome_extension_dir = os.path.expanduser(
'~') + '\\AppData\\Local\\Google\\Chrome\\User Data\\Default\\Extensions\\'
elif os_name == 'Darwin': # macOS
chrome_extension_dir = os.path.expanduser(
'~') + '/Library/Application Support/Google/Chrome/Default/Extensions/'
elif os_name == 'Linux':
chrome_extension_dir = os.path.expanduser('~') + '/.config/google-chrome/Default/Extensions/'
else:
raise Exception('Unsupported operating system')
manifests = []
for extension_id in os.listdir(chrome_extension_dir):
extension_path = os.path.join(chrome_extension_dir, extension_id)
if os.path.isdir(extension_path):
# Iterate through version-named subdirectories
for version_dir in os.listdir(extension_path):
version_path = os.path.join(extension_path, version_dir)
manifest_path = os.path.join(version_path, 'manifest.json')
if os.path.isfile(manifest_path):
with open(manifest_path, 'r') as file:
try:
manifest = json.load(file)
manifests.append(manifest)
except json.JSONDecodeError:
logger.error(f"Error reading {manifest_path}")
return manifests
# The following ones require Playwright to be installed on the target machine, and the chrome needs to be pre-config on port info to allow remote debugging, see README.md for details

View File

@@ -1,20 +1,36 @@
import csv
import functools
import operator
import re
from numbers import Number
from typing import Callable, Any
from typing import Dict, List, Pattern
import lxml.etree
from lxml.etree import _Element
from lxml.cssselect import CSSSelector
from typing import Dict, List, Pattern
from typing import Callable, Any
from numbers import Number
import operator
from lxml.etree import _Element
from rapidfuzz import fuzz
import functools
import re
def exact_match(result, rules) -> float:
expect = rules["expected"]
print(result, expect)
if result == expect:
return 1.
else:
return 0.
def fuzzy_match(result, rules) -> float:
expect = rules["expected"]
return fuzz.ratio(result, expect) / 100.
def _match_record(pattern: Dict[str, str], item: Dict[str, str]) -> float:
return all(k in item and item[k]==val for k, val in pattern.items())
return all(k in item and item[k] == val for k, val in pattern.items())
def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
"""
@@ -41,6 +57,7 @@ def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
unexpect_metric = unexpect_metric and not any(_match_record(r, rcd) for r in rules.get("unexpect", []))
return float(all(expect_metrics) and unexpect_metric)
def check_list(result: str, rules: Dict[str, List[str]]) -> float:
"""
Args:
@@ -67,15 +84,18 @@ def check_list(result: str, rules: Dict[str, List[str]]) -> float:
unexpect_metric = unexpect_metric and all(r.search(l) is None for r in unexpect_patterns)
return float(all(expect_metrics) and unexpect_metric)
_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
, "cp": "uri:deskat:component.at-spi.gnome.org"
, "doc": "uri:deskat:document.at-spi.gnome.org"
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
, "txt": "uri:deskat:text.at-spi.gnome.org"
, "val": "uri:deskat:value.at-spi.gnome.org"
, "act": "uri:deskat:action.at-spi.gnome.org"
}
_accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org"
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
, "cp": "uri:deskat:component.at-spi.gnome.org"
, "doc": "uri:deskat:document.at-spi.gnome.org"
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
, "txt": "uri:deskat:text.at-spi.gnome.org"
, "val": "uri:deskat:value.at-spi.gnome.org"
, "act": "uri:deskat:action.at-spi.gnome.org"
}
def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
"""
Args:
@@ -106,13 +126,13 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
else:
raise ValueError("At least one of xpath and selectors is required")
if len(elements)==0:
if len(elements) == 0:
return 0.
if "text" in rules:
match_func: Callable[[str], Number] = functools.partial( operator.eq if rules["exact"] else fuzz.ratio
, rules["text"]
)
match_func: Callable[[str], Number] = functools.partial(operator.eq if rules["exact"] else fuzz.ratio
, rules["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
@@ -121,5 +141,5 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
return float(match_score)
#def check_existence(result: str, *args) -> float:
#return 1. - (result is None)
# def check_existence(result: str, *args) -> float:
# return 1. - (result is None)

View File

@@ -7,10 +7,10 @@ from xml.etree import ElementTree
import acoustid
import cv2
import imagehash
from skimage.metrics import structural_similarity as ssim
import librosa
from PIL import Image
import numpy as np
from PIL import Image
from skimage.metrics import structural_similarity as ssim
logger = logging.getLogger("desktopenv.metrics.vlc")
@@ -40,6 +40,7 @@ def is_vlc_playing(actual_status_path: str, rule: Dict[str, str]) -> float:
return 0
# fixme: part of this function can be moved to getters
def is_vlc_recordings_folder(actual_config_path: str, rule: Dict[str, str]) -> float:
"""
Checks if VLC's recording folder is set to the expected value.

View File

@@ -1,7 +1,7 @@
{
"id": "06fe7178-4491-4589-810f-2e2bc9502122",
"snapshot": "chrome",
"instruction": "Could you help me open the previously closed tab?",
"instruction": "Can you make my computer bring back the last tab I shut down?",
"source": "https://www.wikihow.com/Switch-Tabs-in-Chrome",
"config": [],
"trajectory": "trajectories/",

View File

@@ -1,7 +1,7 @@
{
"id": "12086550-11c0-466b-b367-1d9e75b3910e",
"snapshot": "chrome",
"instruction": "Help me find the hidden password of xx website.",
"instruction": "Computer, please navigate to the area in my browser settings where my passwords are stored. I want to check my login information for Etsy without revealing it just yet.",
"source": "https://www.quora.com/What-are-the-cool-tricks-to-use-Google-Chrome",
"config": [],
"trajectory": "trajectories/",

View File

@@ -1,7 +1,7 @@
{
"id": "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"snapshot": "chrome",
"instruction": "Could you help me delete cookies from openai website?",
"instruction": "Can you help me clean up my computer by getting rid of all the tracking things that websites like Amazon or eBay might have saved? I want to make sure my browsing is private and those sites don't remember me.",
"source": "https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site",
"config": [],
"trajectory": "trajectories/",

View File

@@ -1,18 +1,33 @@
{
"id": "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"snapshot": "chrome",
"instruction": "Could you help me set my default search engine to be Google?",
"instruction": "Can you make Bing the main search thingy when I look stuff up on the internet?",
"source": "https://support.google.com/chrome/answer/95426?sjid=16867045591165135686-AP",
"config": [],
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=9222"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "",
"func": "exact_match",
"result": {
"type": "default_search_engine"
},
"expected": {
"type": "rule",
"rules": {
"expected": "Bing"
}
}
}
}