Fix one bug in Chrome getter

This commit is contained in:
Timothyxxx
2024-04-01 15:05:48 +08:00
parent 6c8a7af79d
commit fad621093f
2 changed files with 62 additions and 32 deletions

View File

@@ -1079,18 +1079,18 @@ def get_data_delete_automacally(env, config: Dict[str, str]):
def get_active_tab_html_parse(env, config: Dict[str, Any]):
"""
This function is used to get the specific element's text content from the active tab's html.
config:
config:
Dict[str, str]{
# Keys used in get_active_url_from_accessTree: "xpath", "selectors"
'category':
'category':
choose from ["class", "label", "xpath", "input"], used to indicate how to find the element
'labelObject':
'labelObject':
only exists when category is "label",
a dict like { "labelSelector": "the key you want to store the text content of this label's ee=lement"}
'class_singleObject':
only exists when category is "class", a dict with keys as the class name,
'class_singleObject':
only exists when category is "class", a dict with keys as the class name,
like { "class name" : "the key you want to store the text content of this element" }
'class_multiObject':
'class_multiObject':
only exists when category is "class", used for elements with same class name.
Two layer of dict, like
( {
@@ -1099,8 +1099,8 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
...
}
} )
'xpathObject':
only exists when category is "xpath", a dict with keys as the xpath,
'xpathObject':
only exists when category is "xpath", a dict with keys as the xpath,
like { "full xpath" : "the key you want to store the text content of this element" }
'inputObject':
only exists when category is "input",
@@ -1151,32 +1151,50 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
if target_page is None:
logger.error("Your tab is not the target tab.")
return {}
return_json = {}
def safely_get_text_content(selector):
elements = target_page.query_selector_all(selector)
return [element.text_content().strip() for element in elements if element]
if config["category"] == "class":
# find the text of elements in html with specific class name
class_multiObject = config["class_multiObject"]
for key in class_multiObject.keys():
object_dict = class_multiObject[key]
for order_key in object_dict.keys():
return_json[object_dict[order_key]] = target_page.query_selector_all("." + key)[
int(order_key)].text_content().strip()
class_singleObject = config["class_singleObject"]
for key in class_singleObject.keys():
return_json[class_singleObject[key]] = target_page.query_selector("." + key).text_content().strip()
class_multiObject = config.get("class_multiObject", {})
for class_name, object_dict in class_multiObject.items():
elements_texts = safely_get_text_content("." + class_name)
for order_key, key in object_dict.items():
index = int(order_key)
if len(elements_texts) > index:
return_json[key] = elements_texts[index]
class_singleObject = config.get("class_singleObject", {})
for class_name, key in class_singleObject.items():
element_text = safely_get_text_content("." + class_name)
if element_text:
return_json[key] = element_text[0]
elif config['category'] == "label":
# find the text of elements in html with specific label name
labelObject = config["labelObject"]
for key in labelObject.keys():
return_json[labelObject[key]] = target_page.get_by_label(key).text_content().strip()
# Assuming get_by_label is a custom function or part of the framework being used
labelObject = config.get("labelObject", {})
for labelSelector, key in labelObject.items():
text = target_page.locator(f"text={labelSelector}").first.text_content().strip()
if text:
return_json[key] = text
elif config["category"] == "xpath":
# find the text of elements in html with specific xpath
xpathObject = config["xpathObject"]
for key in xpathObject.keys():
return_json[xpathObject[key]] = target_page.locator("xpath=" + key).text_content().strip()
xpathObject = config.get("xpathObject", {})
for xpath, key in xpathObject.items():
elements = target_page.locator(f"xpath={xpath}")
if elements.count() > 0:
return_json[key] = elements.first.text_content().strip()
elif config["category"] == "input":
inputObject = config["inputObject"]
for key in inputObject.keys():
return_json[inputObject[key]] = target_page.locator("xpath=" + key).input_value().strip()
inputObjects = config.get("inputObject", {})
for xpath, key in inputObjects.items():
inputs = target_page.locator(f"xpath={xpath}")
if inputs.count() > 0:
return_json[key] = inputs.first.input_value().strip()
browser.close()
return return_json

View File

@@ -20,7 +20,7 @@ from lxml.cssselect import CSSSelector
from lxml.etree import _Element
from rapidfuzz import fuzz
from .utils import _match_record, _match_value_to_rule
from desktop_env.evaluators.metrics.utils import _match_record, _match_value_to_rule
logger = logging.getLogger("desktopenv.metric.general")
@@ -321,11 +321,11 @@ def check_direct_json_object(result, rules) -> float:
expected_json = rules["expected"]
for key in expected_json.keys():
if isinstance(expected_json.get(key), List):
if isinstance(expected_json.get(key), list):
flag = 0
expected_value_list = expected_json.get(key)
for each_expected_value in expected_value_list:
if each_expected_value in result.get(key):
if isinstance(result.get(key), list) and each_expected_value in result.get(key):
flag = 1
break
if flag == 0:
@@ -484,3 +484,15 @@ def compare_python_pure_text(py_file_path, gold_file_path):
return 1
else:
return 0
if __name__ == '__main__':
print(check_direct_json_object([], rules={
"relativeTime": {
"from": "5th next month"
},
"expected": {
"start": "SEA",
"end": "NYC",
"time": "{DoW}, {Month} {DayD}, {Year}",
"category": "Miles"
}}))