finish the rest part of chrome examples and verify them on mac arm64
This commit is contained in:
@@ -1,11 +1,27 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import sqlite3
|
||||
from typing import Dict, Any
|
||||
from typing import Dict, Any, List
|
||||
from pydrive.auth import GoogleAuth
|
||||
from pydrive.drive import GoogleDrive, GoogleDriveFileList, GoogleDriveFile
|
||||
from playwright.sync_api import sync_playwright
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
import lxml.etree
|
||||
from lxml.cssselect import CSSSelector
|
||||
from lxml.etree import _Element
|
||||
|
||||
_accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org"
|
||||
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
|
||||
, "cp": "uri:deskat:component.at-spi.gnome.org"
|
||||
, "doc": "uri:deskat:document.at-spi.gnome.org"
|
||||
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
|
||||
, "txt": "uri:deskat:text.at-spi.gnome.org"
|
||||
, "val": "uri:deskat:value.at-spi.gnome.org"
|
||||
, "act": "uri:deskat:action.at-spi.gnome.org"
|
||||
}
|
||||
|
||||
logger = logging.getLogger("desktopenv.getters.chrome")
|
||||
|
||||
@@ -340,7 +356,58 @@ def get_open_tabs_info(env, config: Dict[str, str]):
|
||||
return tabs_info
|
||||
|
||||
|
||||
def get_active_url_from_accessTree(env, config):
|
||||
"""
|
||||
Playwright cannot get the url of active tab directly,
|
||||
so we need to use accessibility tree to get the active tab info.
|
||||
This function is used to get the active tab url from the accessibility tree.
|
||||
config:
|
||||
Dict[str, str]{
|
||||
'xpath':
|
||||
the same as in metrics.general.accessibility_tree.
|
||||
'selectors':
|
||||
the same as in metrics.general.accessibility_tree.
|
||||
'goto_prefix':
|
||||
the prefix you want to add to the beginning of the url to be opened, default is "https://",
|
||||
(the url we get from accTree does not have prefix)
|
||||
...(other keys, not used in this function)
|
||||
}
|
||||
Return
|
||||
url: str
|
||||
"""
|
||||
accessibility_tree: str = env.controller.get_accessibility_tree()
|
||||
# download accessibility tree to "/home/user/Desktop"
|
||||
logger.debug("AT@eval: %s", accessibility_tree)
|
||||
# first, use accessibility API to get the active tab URL
|
||||
at: _Element = lxml.etree.fromstring(accessibility_tree)
|
||||
if "xpath" in config:
|
||||
elements: List[_Element] = at.xpath(config["xpath"], namespaces=_accessibility_ns_map)
|
||||
elif "selectors" in config:
|
||||
selector = CSSSelector(", ".join(config["selectors"]), namespaces=_accessibility_ns_map)
|
||||
elements: List[_Element] = selector(at)
|
||||
else:
|
||||
raise ValueError("At least one of xpath and selectors is required")
|
||||
|
||||
if len(elements) == 0:
|
||||
print("no elements found")
|
||||
return 0.
|
||||
active_tab_url = config["goto_prefix"]+elements[0].text if "goto_prefix" in config.keys() else "https://" + elements[0].text
|
||||
print("active tab url now: {}".format(active_tab_url))
|
||||
return active_tab_url
|
||||
|
||||
|
||||
def get_active_tab_info(env, config: Dict[str, str]):
|
||||
"""
|
||||
This function is used to get all info about active tab.
|
||||
Warning! This function will reload the target-url page
|
||||
If the tartget url has cache or cookie, this function may reload to another page.
|
||||
If you have tested the url will not pop up to another page (check in incongnito mode yourself first),
|
||||
you can use this function.
|
||||
config: Dict[str, str]{
|
||||
# Keys used in get_active_url_from_accessTree: "xpath", "selectors"
|
||||
}
|
||||
"""
|
||||
active_tab_url = get_active_url_from_accessTree(env, config)
|
||||
host = env.vm_ip
|
||||
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||
|
||||
@@ -348,24 +415,21 @@ def get_active_tab_info(env, config: Dict[str, str]):
|
||||
with sync_playwright() as p:
|
||||
# connect to remote Chrome instance
|
||||
browser = p.chromium.connect_over_cdp(remote_debugging_url)
|
||||
|
||||
active_tab_info = {}
|
||||
for context in browser.contexts:
|
||||
for page in context.pages:
|
||||
if page.is_visible("body"): # check the visibility of the page body to determine the active status
|
||||
active_tab_info = {
|
||||
'title': page.title(),
|
||||
'url': page.url,
|
||||
'content': page.content() # get the HTML content of the page
|
||||
}
|
||||
break
|
||||
if active_tab_info:
|
||||
break
|
||||
# go to the target URL page
|
||||
page = browser.new_page()
|
||||
page.goto(active_tab_url)
|
||||
page.wait_for_load_state('load') # Wait for the 'load' event to complete
|
||||
active_tab_info = {
|
||||
'title': page.title(),
|
||||
'url': page.url,
|
||||
'content': page.content() # get the HTML content of the page
|
||||
}
|
||||
|
||||
browser.close()
|
||||
print("active_tab_title: {}".format(active_tab_info.get('title', 'None')))
|
||||
print("active_tab_url: {}".format(active_tab_info.get('url', 'None')))
|
||||
print("active_tab_content: {}".format(active_tab_info.get('content', 'None')))
|
||||
# print("active_tab_title: {}".format(active_tab_info.get('title', 'None')))
|
||||
# print("active_tab_url: {}".format(active_tab_info.get('url', 'None')))
|
||||
# print("active_tab_content: {}".format(active_tab_info.get('content', 'None')))
|
||||
return active_tab_info
|
||||
|
||||
|
||||
@@ -648,4 +712,209 @@ def get_find_unpacked_extension_path(env, config: Dict[str, str]):
|
||||
return all_extensions_path
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return "Google"
|
||||
return "Google"
|
||||
|
||||
|
||||
def get_data_delete_automacally(env, config: Dict[str, str]):
|
||||
"""
|
||||
This function is used to open th "auto-delete" mode of chromium
|
||||
"""
|
||||
os_type = env.vm_platform
|
||||
if os_type == 'Windows':
|
||||
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
|
||||
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
|
||||
elif os_type == 'Darwin':
|
||||
preference_file_path = env.controller.execute_python_command(
|
||||
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
|
||||
'output'].strip()
|
||||
elif os_type == 'Linux':
|
||||
# preference_file_path = env.controller.execute_python_command(
|
||||
# "import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
|
||||
# 'output'].strip()
|
||||
preference_file_path = env.controller.execute_python_command(
|
||||
"import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
|
||||
'output'].strip()
|
||||
else:
|
||||
raise Exception('Unsupported operating system')
|
||||
|
||||
try:
|
||||
content = env.controller.get_file(preference_file_path)
|
||||
data = json.loads(content)
|
||||
data_delete_state = data["profile"]["exit_type"]
|
||||
return data_delete_state
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return "Google"
|
||||
|
||||
|
||||
def get_active_tab_html_parse(env, config: Dict[str, Any]):
|
||||
"""
|
||||
This function is used to get the specific element's text content from the active tab's html.
|
||||
config:
|
||||
Dict[str, str]{
|
||||
# Keys used in get_active_url_from_accessTree: "xpath", "selectors"
|
||||
'category':
|
||||
choose from ["class", "label", "xpath", "input"], used to indicate how to find the element
|
||||
'labelObject':
|
||||
only exists when category is "label",
|
||||
a dict like { "labelSelector": "the key you want to store the text content of this label's ee=lement"}
|
||||
'class_singleObject':
|
||||
only exists when category is "class", a dict with keys as the class name,
|
||||
like { "class name" : "the key you want to store the text content of this element" }
|
||||
'class_multiObject':
|
||||
only exists when category is "class", used for elements with same class name.
|
||||
Two layer of dict, like
|
||||
( {
|
||||
"class name": {
|
||||
"rank in this class" : "the key you want to store the text content of this element"
|
||||
...
|
||||
}
|
||||
} )
|
||||
'xpathObject':
|
||||
only exists when category is "xpath", a dict with keys as the xpath,
|
||||
like { "full xpath" : "the key you want to store the text content of this element" }
|
||||
'inputObject':
|
||||
only exists when category is "input",
|
||||
a dict with keys as the input element's xpath, like { "full xpath" : "the key you want to store the text content of this element" }
|
||||
}
|
||||
"""
|
||||
active_tab_url = get_active_url_from_accessTree(env, config)
|
||||
host = env.vm_ip
|
||||
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||
|
||||
remote_debugging_url = f"http://{host}:{port}"
|
||||
with sync_playwright() as p:
|
||||
# connect to remote Chrome instance
|
||||
browser = p.chromium.connect_over_cdp(remote_debugging_url)
|
||||
target_page = None
|
||||
for context in browser.contexts:
|
||||
for page in context.pages:
|
||||
page.wait_for_load_state("load")
|
||||
if page.url == active_tab_url:
|
||||
target_page = page
|
||||
break
|
||||
return_json = {}
|
||||
if config["category"] == "class":
|
||||
#find the text of elements in html with specific class name
|
||||
class_multiObject = config["class_multiObject"]
|
||||
for key in class_multiObject.keys():
|
||||
object_dict = class_multiObject[key]
|
||||
for order_key in object_dict.keys():
|
||||
return_json[object_dict[order_key]] = target_page.query_selector_all("."+key)[int(order_key)].text_content().strip()
|
||||
class_singleObject = config["class_singleObject"]
|
||||
for key in class_singleObject.keys():
|
||||
return_json[class_singleObject[key]] = target_page.query_selector("."+key).text_content().strip()
|
||||
elif config['category'] == "label":
|
||||
#find the text of elements in html with specific label name
|
||||
labelObject = config["labelObject"]
|
||||
for key in labelObject.keys():
|
||||
return_json[labelObject[key]] = target_page.get_by_label(key).text_content().strip()
|
||||
elif config["category"] == "xpath":
|
||||
#find the text of elements in html with specific xpath
|
||||
xpathObject = config["xpathObject"]
|
||||
for key in xpathObject.keys():
|
||||
return_json[xpathObject[key]] = target_page.locator("xpath="+key).text_content().strip()
|
||||
elif config["category"] == "input":
|
||||
inputObject = config["inputObject"]
|
||||
for key in inputObject.keys():
|
||||
return_json[inputObject[key]] = target_page.locator("xpath="+key).input_value().strip()
|
||||
browser.close()
|
||||
return return_json
|
||||
|
||||
|
||||
def get_gotoRecreationPage_and_get_html_content(env, config: Dict[str, Any]):
|
||||
"""
|
||||
especially used for www.recreation.gov examples
|
||||
"""
|
||||
host = env.vm_ip
|
||||
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||
|
||||
remote_debugging_url = f"http://{host}:{port}"
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.connect_over_cdp(remote_debugging_url)
|
||||
page = browser.new_page()
|
||||
page.goto("https://www.recreation.gov/")
|
||||
page.fill("input#hero-search-input", "Albion Basin")
|
||||
page.click("button.nav-search-button")
|
||||
print("after first click")
|
||||
time.sleep(2)
|
||||
# Assuming .search-result-highlight--success leads to a new page or requires page load
|
||||
with page.expect_popup() as popup_info:
|
||||
page.click(".search-result-highlight--success")
|
||||
print("after second click")
|
||||
newpage = popup_info.value
|
||||
newpage.wait_for_load_state()
|
||||
print("go to newpage: ")
|
||||
print(newpage.title())
|
||||
time.sleep(2)
|
||||
newpage.click("button.next-available")
|
||||
print("after third click")
|
||||
|
||||
|
||||
return_json = {}
|
||||
return_json["expected"]={}
|
||||
#find the text of elements in html with specific class name
|
||||
if config["selector"] == "class":
|
||||
if "order" in config.keys():
|
||||
className = config["class"]
|
||||
return_json["expected"][className]=newpage.query_selector_all("."+className)[int(config["order"])].text_content().strip()
|
||||
else:
|
||||
className = config["class"]
|
||||
return_json["expected"][className] = newpage.query_selector("."+className).text_content().strip()
|
||||
browser.close()
|
||||
return return_json
|
||||
|
||||
|
||||
def get_active_tab_url_parse(env, config:Dict[str, Any]):
|
||||
"""
|
||||
This function is used to parse the url according to config["parse_keys"].
|
||||
config:
|
||||
'parse_keys': must exist,
|
||||
a list of keys to extract from the query parameters of the url.
|
||||
'replace': optional,
|
||||
a dict, used to replace the original key with the new key.
|
||||
( { "original key": "new key" } )
|
||||
"""
|
||||
active_tab_url = get_active_url_from_accessTree(env, config)
|
||||
|
||||
# connect to remote Chrome instance
|
||||
# parse in a hard-coded way to find the specific info about task
|
||||
parsed_url = urlparse(active_tab_url)
|
||||
# Extract the query parameters
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
# Define the keys of interest
|
||||
keys_of_interest = [key for key in config["parse_keys"]]
|
||||
# Extract the parameters of interest
|
||||
extracted_params = {key: query_params.get(key, [''])[0] for key in keys_of_interest}
|
||||
if "replace" in config:
|
||||
for key in config["replace"].keys():
|
||||
# change original key to new key, keep value unchange
|
||||
value = extracted_params.pop(key)
|
||||
extracted_params[config["replace"][key]] = value
|
||||
return extracted_params
|
||||
|
||||
|
||||
def get_url_dashPart(env, config: Dict[str, str]):
|
||||
"""
|
||||
This function is used to extract one of the dash-separated part of the URL.
|
||||
config
|
||||
'partIndex': must exist,
|
||||
the index of the dash-separated part to extract, starting from 0.
|
||||
'needDeleteId': optional,
|
||||
a boolean, used to indicate whether to delete the "id" part ( an example: "/part-you-want?id=xxx" )
|
||||
'returnType': must exist,
|
||||
a string, used to indicate the return type, "string" or "json".
|
||||
"""
|
||||
active_tab_url = get_active_url_from_accessTree(env, config)
|
||||
|
||||
# extract the last dash-separated part of the URL, and delete all the characters after "id"
|
||||
dash_part = active_tab_url.split("/")[config["partIndex"]]
|
||||
if config["needDeleteId"]:
|
||||
dash_part = dash_part.split("?")[0]
|
||||
# print("active_tab_title: {}".format(active_tab_info.get('title', 'None')))
|
||||
# print("active_tab_url: {}".format(active_tab_info.get('url', 'None')))
|
||||
# print("active_tab_content: {}".format(active_tab_info.get('content', 'None')))
|
||||
if config["returnType"] == "string":
|
||||
return dash_part
|
||||
elif config["returnType"] == "json":
|
||||
return {config["key"]: dash_part}
|
||||
|
||||
Reference in New Issue
Block a user