Files
sci-gui-agent-benchmark/desktop_env/evaluators/metrics/general.py
David Chang 1515b05666 ver Jan10thv2
a new example config for Thunderbird
fixed several bugs
2024-01-10 21:58:29 +08:00

126 lines
4.6 KiB
Python

import csv
import lxml.etree
from lxml.etree import _Element
from lxml.cssselect import CSSSelector
from typing import Dict, List, Pattern
from typing import Callable, Any
from numbers import Number
import operator
from rapidfuzz import fuzz
import functools
import re
def _match_record(pattern: Dict[str, str], item: Dict[str, str]) -> float:
return all(k in item and item[k]==val for k, val in pattern.items())
def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
"""
Args:
result (str): path to csv file
rules (Dict[str, List[Dict[str, str]]]): dict like
{
"expect": [{key: value}]
"unexpect": [{key: value}]
}
Returns:
float
"""
expect_metrics = [False] * len(rules.get("expect", []))
unexpect_metric = True
with open(result) as f:
reader = csv.DictReader(f)
for rcd in reader:
for i, r in enumerate(rules.get("expect", [])):
expect_metrics[i] = expect_metrics[i] or _match_record(r, rcd)
unexpect_metric = unexpect_metric and not any(_match_record(r, rcd) for r in rules.get("unexpect", []))
return float(all(expect_metrics) and unexpect_metric)
def check_list(result: str, rules: Dict[str, List[str]]) -> float:
"""
Args:
result (str): path to list file
rules (Dict[str, List[str]]): dict like
{
"expect": list of str as regexes
"unexpect": list of str as regexes
}
Returns:
float
"""
expect_patterns: List[Pattern[str]] = [re.compile(ptt) for ptt in rules.get("expect", [])]
unexpect_patterns: List[Pattern[str]] = [re.compile(ptt) for ptt in rules.get("unexpect", [])]
expect_metrics = [False] * len(expect_patterns)
unexpect_metric = True
with open(result) as f:
for l in f:
for i, r in enumerate(expect_patterns):
expect_metrics[i] = expect_metrics[i] or (r.search(l) is not None)
unexpect_metric = unexpect_metric and all(r.search(l) is None for r in unexpect_patterns)
return float(all(expect_metrics) and unexpect_metric)
_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
, "cp": "uri:deskat:component.at-spi.gnome.org"
, "doc": "uri:deskat:document.at-spi.gnome.org"
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
, "txt": "uri:deskat:text.at-spi.gnome.org"
, "val": "uri:deskat:value.at-spi.gnome.org"
, "act": "uri:deskat:action.at-spi.gnome.org"
}
def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
"""
Args:
result (str): XML of GNOME Accessibility Tree
rules (Dict[str, Any]): dict like
{
"selectors": list of str as CSS selectors, will be connected by ", "
to form a composite selector. Only one from `selectors` and
`xpath` is needed. If both are present, `xpath` takes the
priority.
"xpath": str as xpath. Only one from `selectors` and `xpath` is
needed. If both are present, `xpath` takes the priority.
"text": str as the expected text content of the selected element.
"exact": bool specifying whether exact match or fuzzy match should
be performed. defaults to True
}
Returns:
float
"""
at: _Element = lxml.etree.fromstring(result)
if "xpath" in rules:
elements: List[_Element] = at.xpath(rules["xpath"], namespaces=_accessibility_ns_map)
elif "selectors" in rules:
selector = CSSSelector(", ".join(rules["selectors"]), namespaces=_accessibility_ns_map)
elements: List[_Element] = selector(at)
else:
raise ValueError("At least one of xpath and selectors is required")
if len(elements)==0:
return 0.
if "text" in rules:
match_func: Callable[[str], Number] = functools.partial( operator.eq if rules["exact"] else fuzz.ratio
, rules["text"]
)
match_score: Number = 0
for elm in elements:
match_score = max(match_score, match_func(elm.text or None))
else:
match_score = 1.
return float(match_score)
#def check_existence(result: str, *args) -> float:
#return 1. - (result is None)