load libreoffice writer eval -batch 2

This commit is contained in:
tsuky_chen
2024-01-26 02:15:42 +08:00
99 changed files with 4318 additions and 109 deletions

View File

@@ -1,10 +1,13 @@
from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \
get_shortcuts_on_desktop
get_shortcuts_on_desktop, get_history, get_enabled_experiments, get_chrome_language, get_chrome_font_size, \
get_profile_name, get_number_of_search_results, get_googledrive_file, get_active_tab_info
from .file import get_cloud_file, get_vm_file, get_cache_file
from .general import get_vm_command_line
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
from .impress import get_audio_in_slide
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
from .misc import get_rule, get_accessibility_tree
from .replay import get_replay
from .vlc import get_vlc_playing_info, get_vlc_config
from .vscode import get_vscode_config
# from .impress import get_audio_in_slide

View File

@@ -2,8 +2,9 @@ import json
import logging
import os
import sqlite3
from typing import Dict
from typing import Dict, Any
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile
from playwright.sync_api import sync_playwright
logger = logging.getLogger("desktopenv.getters.chrome")
@@ -84,6 +85,168 @@ def get_cookie_data(env, config: Dict[str, str]):
return None
def get_history(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
chrome_history_path = env.controller.execute_python_command(
"""import os; print(os.path.join(os.getenv('USERPROFILE'), "AppData", "Local", "Google", "Chrome", "User Data", "Default", "History"))""")[
'output'].strip()
elif os_type == 'Darwin':
chrome_history_path = env.controller.execute_python_command(
"""import os; print(os.path.join(os.getenv('HOME'), "Library", "Application Support", "Google", "Chrome", "Default", "History"))""")[
'output'].strip()
elif os_type == 'Linux':
chrome_history_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config', 'google-chrome', 'Default', 'History'))")[
'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(chrome_history_path)
_path = os.path.join(env.cache_dir, config["dest"])
with open(_path, "wb") as f:
f.write(content)
conn = sqlite3.connect(_path)
cursor = conn.cursor()
# Query to check for OpenAI cookies
cursor.execute("SELECT url, title, last_visit_time FROM urls")
history_items = cursor.fetchall()
return history_items
except Exception as e:
logger.error(f"Error: {e}")
return None
def get_enabled_experiments(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Local State'))""")[
'output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Local State'))")[
'output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Local State'))")[
'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# The path within the JSON data to the default search engine might vary
enabled_labs_experiments = data.get('browser', {}).get('enabled_labs_experiments', [])
return enabled_labs_experiments
except Exception as e:
logger.error(f"Error: {e}")
return []
def get_profile_name(env, config: Dict[str, str]):
"""
Get the username from the Chrome browser.
Assume the cookies are stored in the default location, not encrypted and not large in size.
"""
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
'output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# The path within the JSON data to the default search engine might vary
profile_name = data.get('profile', {}).get('name', None)
return profile_name
except Exception as e:
logger.error(f"Error: {e}")
return None
def get_chrome_language(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Local State'))""")[
'output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Local State'))")[
'output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Local State'))")[
'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# The path within the JSON data to the default search engine might vary
enabled_labs_experiments = data.get('intl', {}).get('app_locale', "en-US")
return enabled_labs_experiments
except Exception as e:
logger.error(f"Error: {e}")
return "en-US"
def get_chrome_font_size(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")[
'output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
'output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# The path within the JSON data to the default search engine might vary
search_engine = data.get('webkit', {}).get('webprefs', {
"default_fixed_font_size": 13,
"default_font_size": 16,
"minimum_font_size": 13
})
return search_engine
except Exception as e:
logger.error(f"Error: {e}")
return {
"default_fixed_font_size": 13,
"default_font_size": 16
}
def get_bookmarks(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
@@ -227,8 +390,7 @@ def get_pdf_from_url(env, config: Dict[str, str]) -> str:
# fixme: needs to be changed (maybe through post-processing) since it's not working
def get_chrome_saved_address(env, config: Dict[str, str]):
# host = env.vm_ip
host = "192.168.13.130"
host = env.vm_ip
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
remote_debugging_url = f"http://{host}:{port}"
@@ -278,6 +440,80 @@ def get_shortcuts_on_desktop(env, config: Dict[str, str]):
for shortcut_path in shortcuts_paths:
short_cuts[shortcut_path] = env.controller.get_file(env.controller.execute_python_command(
f"import os; print(os.path.join(os.path.expanduser('~'), 'Desktop', '{shortcut_path}'))")['output'].strip()).decode('utf-8')
f"import os; print(os.path.join(os.path.expanduser('~'), 'Desktop', '{shortcut_path}'))")[
'output'].strip()).decode('utf-8')
return short_cuts
def get_number_of_search_results(env, config: Dict[str, str]):
# todo: move into the config file
url, result_selector = "https://google.com/search?q=query", '.search-result'
host = env.vm_ip
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
remote_debugging_url = f"http://{host}:{port}"
with sync_playwright() as p:
browser = p.chromium.connect_over_cdp(remote_debugging_url)
page = browser.new_page()
page.goto(url)
search_results = page.query_selector_all(result_selector)
actual_count = len(search_results)
browser.close()
return actual_count
def get_googledrive_file(env, config: Dict[str, Any]) -> str:
""" Get the desired file from Google Drive based on config, return the downloaded local filepath.
To retrieve the file, we provide two options in config dict:
1. query: a list of queries to search the file, each query is a string that follows the format of Google Drive search query
2. path: a list of path to the file, 'folder/subfolder/filename' -> ['folder', 'subfolder', 'filename']
3. query_list: query extends to list to download multiple files
4. path_list: path extends to list to download multiple files
dest: target file name or list. If *_list is used, dest should also be a list of the same length.
Return the downloaded filepath locally.
"""
settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json')
auth = GoogleAuth(settings_file=settings_file)
drive = GoogleDrive(auth)
def get_single_file(_query, _path):
parent_id = 'root'
try:
for q in _query:
search = f'( {q} ) and "{parent_id}" in parents'
filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList()
if len(filelist) == 0: # target file not found
return None
file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one
parent_id = file['id']
file.GetContentFile(_path, mimetype=file['mimeType'])
except:
logger.info('[ERROR]: Failed to download the file from Google Drive')
return None
return _path
if 'query' in config:
return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
elif 'path' in config:
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1
else f'title = {fp} and trashed = false' for idx, fp in enumerate(config['path'])]
return get_single_file(query, os.path.join(env.cache_dir, config['dest']))
elif 'query_list' in config:
_path_list = []
assert len(config['query_list']) == len(config['dest'])
for idx, query in enumerate(config['query_list']):
dest = config['dest'][idx]
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
return _path_list
else: # path_list in config
_path_list = []
assert len(config['path_list']) == len(config['dest'])
for idx, path in enumerate(config['path_list']):
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1
else f'title = {fp} and trashed = false' for jdx, fp in enumerate(path)]
dest = config['dest'][idx]
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
return _path_list

View File

@@ -9,8 +9,9 @@ def get_vm_command_line(env, config: Dict[str, str]):
vm_ip = env.vm_ip
port = 5000
command = config["command"]
shell = config.get("shell", False)
response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command})
response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command, "shell": shell})
if response.status_code == 200:
return response.json()["output"]

View File

@@ -0,0 +1,62 @@
import os
import tempfile
import xml.etree.ElementTree as ET
import zipfile
from typing import Dict
from desktop_env.evaluators.getters.file import get_vm_file
def get_audio_in_slide(env, config: Dict[str, str]):
ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]
# Open the .pptx file as a zip file, fixme: now we assume there is only one audio file in the slides
audio_file_path = None
ppt_file_localhost_path = get_vm_file(env, {"path": ppt_file_path, "dest": os.path.split(ppt_file_path)[-1]})
with zipfile.ZipFile(ppt_file_localhost_path, 'r') as myzip:
# Find the relationships XML file for the first slide
slide1_rels_file = 'ppt/slides/_rels/slide{}.xml.rels'.format(slide_index + 1)
if slide1_rels_file in myzip.namelist():
with myzip.open(slide1_rels_file) as f:
# Parse the XML tree from the relationships file
tree = ET.parse(f)
root = tree.getroot()
# Define the namespace used in the relationships file
namespaces = {'r': 'http://schemas.openxmlformats.org/package/2006/relationships'}
# Look for all relationship elements that have a type attribute for audio
for rel in root.findall('r:Relationship', namespaces):
# Check if the relationship is for an audio file
if 'audio' in rel.attrib['Type']:
# The audio can be embedded inside the file or linked to an external file
# Get the target attribute which contains the audio file path
target = rel.attrib['Target']
if target.startswith('..'):
# Resolve the relative path to get the correct path within the zip file
audio_file_path = os.path.normpath(os.path.join('ppt/slides', target))
# Replace backslashes with forward slashes for ZIP compatibility
audio_file_path = audio_file_path.replace('\\', '/')
# Create a temporary directory to extract the audio file
with tempfile.TemporaryDirectory() as tmpdirname:
# Extract the audio file
myzip.extract(audio_file_path, tmpdirname)
# Get the full path of the extracted audio file
extracted_audio_path = os.path.join(tmpdirname, audio_file_path)
# Return the extracted audio file path
audio_file_path = extracted_audio_path
else:
# the audio file is external to the .pptx file
# Return the audio file path
assert target.startswith("file://"), target
audio_file_path = target[7:]
if audio_file_path is None:
return None
else:
# Get the audio file from vm and return the file path in the host
return get_vm_file(env, {"path": audio_file_path, "dest": dest})

View File

@@ -18,3 +18,7 @@ def get_vm_wallpaper(env, config: dict) -> Union[str, bytes]:
f.write(content)
return _path
def get_list_directory(env, config: dict) -> dict:
return env.controller.get_vm_directory_tree(config["path"])