Merge branch 'main' into xiaochuanli/addChromeExtensions

This commit is contained in:
Tianbao Xie
2024-03-08 20:45:49 +08:00
committed by GitHub
109 changed files with 7196 additions and 172 deletions

View File

@@ -24,12 +24,13 @@ from .chrome import (
get_gotoRecreationPage_and_get_html_content,
get_url_dashPart,
get_active_url_from_accessTree,
get_find_installed_extension_name
get_find_installed_extension_name,
get_info_from_website
)
from .file import get_cloud_file, get_vm_file, get_cache_file
from .general import get_vm_command_line, get_vm_terminal_output
from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file
from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command_error
from .gimp import get_gimp_config_file
from .impress import get_audio_in_slide
from .impress import get_audio_in_slide, get_background_image_in_slide
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime, get_time_diff_range
from .replay import get_replay

View File

@@ -12,7 +12,7 @@ import lxml.etree
import requests
from lxml.cssselect import CSSSelector
from lxml.etree import _Element
from playwright.sync_api import sync_playwright
from playwright.sync_api import sync_playwright, expect
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile
@@ -36,6 +36,89 @@ WARNING:
"""
def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
""" Get information from a website. Especially useful when the information may be updated through time.
Args:
env (Any): The environment object.
config (Dict[Any, Any]): The configuration dictionary.
- url (str): The URL of the website to visit
- infos (List[Dict[str, str]]): The list of information to be extracted from the website. Each dictionary contains:
- action (str): chosen from 'inner_text', 'attribute', 'click_and_inner_text', 'click_and_attribute', etc., concretely,
- inner_text: extract the inner text of the element specified by the selector
- attribute: extract the attribute of the element specified by the selector
- click_and_inner_text: click elements following the selector and then extract the inner text of the last element
- click_and_attribute: click elements following the selector and then extract the attribute of the last element
- selector (Union[str, List[str]]): The CSS selector(s) of the element(s) to be extracted.
- attribute (str): optional for 'attribute' and 'click_and_attribute', the attribute to be extracted.
- backups (Any): The backup information to be returned if the extraction fails.
"""
try:
host = env.vm_ip
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
remote_debugging_url = f"http://{host}:{port}"
with sync_playwright() as p:
# connect to remote Chrome instance
try:
browser = p.chromium.connect_over_cdp(remote_debugging_url)
except Exception as e:
# If the connection fails (e.g., the agent close the browser instance), start a new browser instance
app = 'chromium' if 'arm' in platform.machine() else 'google-chrome'
payload = json.dumps({"command": [
app,
"--remote-debugging-port=1337"
], "shell": False})
headers = {"Content-Type": "application/json"}
requests.post("http://" + host + ":5000/setup" + "/launch", headers=headers, data=payload)
time.sleep(5)
browser = p.chromium.connect_over_cdp(remote_debugging_url)
page = browser.contexts[0].new_page()
page.goto(config["url"])
page.wait_for_load_state('load')
infos = []
for info_dict in config.get('infos', []):
if page.url != config["url"]:
page.goto(config["url"])
page.wait_for_load_state('load')
action = info_dict.get('action', 'inner_text')
if action == "inner_text":
ele = page.locator(info_dict['selector'])
expect(ele).to_be_visible()
infos.append(ele.inner_text())
elif action == "attribute":
ele = page.locator(info_dict['selector'])
expect(ele).to_be_visible()
infos.append(ele.get_attribute(info_dict['attribute']))
elif action == 'click_and_inner_text':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
link = page.locator(sel)
expect(link).to_be_visible()
link.click()
page.wait_for_load_state('load')
else:
ele = page.locator(sel)
expect(ele).to_be_visible()
infos.append(ele.inner_text())
elif action == 'click_and_attribute':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
link = page.locator(sel)
expect(link).to_be_visible()
link.click()
page.wait_for_load_state('load')
else:
ele = page.locator(sel)
expect(ele).to_be_visible()
infos.append(ele.get_attribute(info_dict['attribute']))
else:
raise NotImplementedError(f'The action {action} is not supported yet.')
return infos
except Exception as e:
logger.error(f'[ERROR]: failed to obtain information from the website: {config["url"]}. Use backup results instead.')
return config.get('backups', None)
# The following ones just need to load info from the files of software, no need to connect to the software
def get_default_search_engine(env, config: Dict[str, str]):
os_type = env.vm_platform
@@ -507,6 +590,10 @@ def get_active_url_from_accessTree(env, config):
if len(elements) == 0:
print("no elements found")
return None
elif elements[-1].text is None:
print("no text found")
return None
active_tab_url = config["goto_prefix"] + elements[0].text if "goto_prefix" in config.keys() else "https://" + \
elements[0].text
print("active tab url now: {}".format(active_tab_url))
@@ -722,15 +809,20 @@ def get_number_of_search_results(env, config: Dict[str, str]):
def get_googledrive_file(env, config: Dict[str, Any]) -> str:
""" Get the desired file from Google Drive based on config, return the downloaded local filepath.
To retrieve the file, we provide two options in config dict:
1. query: a list of queries to search the file, each query is a string that follows the format of Google Drive search query
2. path: a list of path to the file, 'folder/subfolder/filename' -> ['folder', 'subfolder', 'filename']
3. query_list: query extends to list to download multiple files
4. path_list: path extends to list to download multiple files
dest: target file name or list. If *_list is used, dest should also be a list of the same length.
Return the downloaded filepath locally.
@args: keys in config dict
settings_file(str): target filepath to the settings file for Google Drive authentication, default is 'evaluation_examples/settings/googledrive/settings.yml'
query/path[_list](Union[str, List[str]]): the query or path [list] to the file(s) on Google Drive. To retrieve the file, we provide multiple key options to specify the filepath on drive in config dict:
1) query: a list of queries to search the file, each query is a string that follows the format of Google Drive search query. The documentation is available here: (support more complex search but too complicated to use)
https://developers.google.com/drive/api/guides/search-files?hl=en
2) path: a str list poingting to file path on googledrive, e.g., 'folder/subfolder/filename.txt' ->
config contain one key-value pair "path": ['folder', 'subfolder', 'filename.txt']
3) query_list: query extends to list to download multiple files
4) path_list: path extends to list to download multiple files, e.g.,
"path_list": [['folder', 'subfolder', 'filename1.txt'], ['folder', 'subfolder', 'filename2.txt']]
@return:
dest(Union[List[str], str]): target file name or list. If *_list is used in input config, dest should also be a list of the same length. Return the downloaded local filepath.
"""
settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json')
settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.yml')
auth = GoogleAuth(settings_file=settings_file)
drive = GoogleDrive(auth)

View File

@@ -3,6 +3,26 @@ from typing import Dict, List, Set
from typing import Optional, Any, Union
from datetime import datetime
import requests
import pandas as pd
def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any:
"""
Config:
path (str): absolute path on the VM to fetch
"""
path = config["path"]
file_path = get_vm_file(env, {"path": path, "dest": os.path.basename(path)})
file_type, file_content = config['file_type'], config['file_content']
if file_type == 'xlsx':
if file_content == 'last_row':
df = pd.read_excel(file_path)
last_row = df.iloc[-1]
last_row_as_list = last_row.astype(str).tolist()
return last_row_as_list
else:
raise NotImplementedError(f"File type {file_type} not supported")
def get_cloud_file(env, config: Dict[str, Any]) -> Union[str, List[str]]:

View File

@@ -21,6 +21,22 @@ def get_vm_command_line(env, config: Dict[str, str]):
logger.error("Failed to get vm command line. Status code: %d", response.status_code)
return None
def get_vm_command_error(env, config: Dict[str, str]):
vm_ip = env.vm_ip
port = 5000
command = config["command"]
shell = config.get("shell", False)
response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command, "shell": shell})
print(response.json())
if response.status_code == 200:
return response.json()["error"]
else:
logger.error("Failed to get vm command line error. Status code: %d", response.status_code)
return None
def get_vm_terminal_output(env, config: Dict[str, str]):
return env.controller.get_terminal_output()

View File

@@ -7,6 +7,67 @@ from typing import Dict
from desktop_env.evaluators.getters.file import get_vm_file
def get_background_image_in_slide(env, config: Dict[str, str]):
ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]
image_id, image_file_path = None, None
ppt_file_localhost_path = get_vm_file(env, {"path": ppt_file_path, "dest": os.path.split(ppt_file_path)[-1]})
with zipfile.ZipFile(ppt_file_localhost_path, 'r') as myzip:
slide1_xml_file = 'ppt/slides/slide{}.xml'.format(slide_index + 1)
# firstly, check whether the background image is used in the slide
if slide1_xml_file not in myzip.namelist(): return None
with myzip.open(slide1_xml_file) as f:
# Parse the XML tree from the relationships file
tree = ET.parse(f)
root = tree.getroot()
bg_tag = "{http://schemas.openxmlformats.org/presentationml/2006/main}bgPr"
image_tag = "{http://schemas.openxmlformats.org/drawingml/2006/main}blip"
attr_tag = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
for child in root.iter(bg_tag):
try:
for element in child.iter(image_tag):
image_id = element.attrib[attr_tag]
break
except: pass
if image_id is not None: break
else: return None
# next, extract the background image from the slide
slide1_rels_file = 'ppt/slides/_rels/slide{}.xml.rels'.format(slide_index + 1)
if slide1_rels_file in myzip.namelist():
with myzip.open(slide1_rels_file) as f:
# Parse the XML tree from the relationships file
tree = ET.parse(f)
root = tree.getroot()
# Define the namespace used in the relationships file
namespaces = {'r': 'http://schemas.openxmlformats.org/package/2006/relationships'}
# Look for all relationship elements that have a type attribute for image
for rel in root.findall('r:Relationship', namespaces):
# Check if the relationship is for an image file
if 'image' in rel.attrib['Type'] and rel.attrib['Id'] == image_id:
target = rel.attrib['Target']
if target.startswith('..'):
# Resolve the relative path to get the correct path within the zip file
image_file_path = os.path.normpath(os.path.join('ppt/slides', target))
# Replace backslashes with forward slashes for ZIP compatibility
image_file_path = image_file_path.replace('\\', '/')
tmpdirname = os.path.dirname(ppt_file_localhost_path)
myzip.extract(image_file_path, tmpdirname)
image_file_path = os.path.join(tmpdirname, image_file_path)
return image_file_path
else: # absolute path
assert target.startswith("file://"), target
image_file_path = target[7:]
break
if image_file_path is None:
return None
else:
# Get the audio file from vm and return the file path in the host
return get_vm_file(env, {"path": image_file_path, "dest": dest})
def get_audio_in_slide(env, config: Dict[str, str]):
ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]
@@ -40,20 +101,23 @@ def get_audio_in_slide(env, config: Dict[str, str]):
audio_file_path = audio_file_path.replace('\\', '/')
# Create a temporary directory to extract the audio file
with tempfile.TemporaryDirectory() as tmpdirname:
# Extract the audio file
myzip.extract(audio_file_path, tmpdirname)
# Get the full path of the extracted audio file
extracted_audio_path = os.path.join(tmpdirname, audio_file_path)
# Return the extracted audio file path
audio_file_path = extracted_audio_path
tmpdirname = os.path.dirname(ppt_file_localhost_path)
myzip.extract(audio_file_path, tmpdirname)
audio_file_path = os.path.join(tmpdirname, audio_file_path)
return audio_file_path
# with tempfile.TemporaryDirectory() as tmpdirname:
# # Extract the audio file
# myzip.extract(audio_file_path, tmpdirname)
# # Get the full path of the extracted audio file
# extracted_audio_path = os.path.join(tmpdirname, audio_file_path)
# # Return the extracted audio file path
# audio_file_path = extracted_audio_path
else:
# the audio file is external to the .pptx file
# Return the audio file path
assert target.startswith("file://"), target
audio_file_path = target[7:]
break
if audio_file_path is None:
return None

View File

@@ -1,5 +1,5 @@
import logging
from typing import TypeVar
from typing import TypeVar, Dict
from datetime import datetime, timedelta
logger = logging.getLogger("desktopenv.getters.misc")
@@ -74,13 +74,13 @@ relativeTime_to_IntDay = {
"first monday four months later": "special"
}
def get_rule(env, config: R) -> R:
def get_rule(env, config: Dict[str, R]) -> R:
"""
Returns the rule as-is.
"""
return config["rules"]
def get_rule_relativeTime(env, config: R) -> R:
def get_rule_relativeTime(env, config: Dict[str, R]) -> R:
"""
According to the rule definded in funciton "apply_rules_to_timeFormat", convert the relative time to absolute time.
config: