sci-gui-agent-benchmark/desktop_env/evaluators/metrics/utils.py

import logging
import zipfile
from typing import Any, TypeVar, Union, Iterable, Optional
from typing import Dict, List, Set, Match
from urllib.parse import urlparse, urlunparse
import re
import functools
import operator

import lxml.cssselect
import lxml.etree
import openpyxl
import xmltodict
from lxml.etree import _Element
from openpyxl import Workbook
from openpyxl.chart._chart import ChartBase
from openpyxl.worksheet.worksheet import Worksheet

V = TypeVar("Value")

logger = logging.getLogger("desktopenv.metrics.utils")

_xlsx_namespaces = [("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
    , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
                    ]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)


# print(_sparklines_selector.css)
def load_sparklines(xlsx_file: str) -> Dict[str, str]:
    """
    This function modifies data_frame in-place

    Args:
        xlsx_file (str): path to xlsx

    Returns:
        List[Dict[str, str]]: sparkline definitions in form of
          {
            "F3": "Sheet1!C3:E3"
          }
    """

    # read xlsx
    with zipfile.ZipFile(xlsx_file, "r") as z_f:
        with z_f.open("xl/worksheets/sheet1.xml") as f:
            sheet1: _Element = lxml.etree.fromstring(f.read())
            sparklines: List[_Element] = _sparklines_selector(sheet1)

    sparklines_dict: Dict[str, str] = {}
    for sp_l in sparklines:
        sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
        sparkline: Dict[str, Dict[str, str]] = xmltodict.parse(sparkline_xml
                                                               , process_namespaces=True
                                                               , namespaces=_xlsx_ns_imapping
                                                               )
        sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
    return sparklines_dict


# Available Chart Properties:
# title: str
# anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
# width: number
# height: number
# type: "scatterChart" | "lineChart" | "barChart"
# direction: "bar" (hori) | "col" (vert)
# xtitle, ytitle, ztitle: str
def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
    """
    Args:
        xlsx_file (Workbook): concerned excel book
        options (Dict[str, List[str]]): dict like {"chart_props": list of str}
          giving the concerned chart properties

    Returns:
        Dict[str, Any]: information of charts
    """

    # workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
    worksheet: Worksheet = xlsx_file.active
    charts: List[ChartBase] = worksheet._charts

    chart_set: Dict[str, Any] = {}
    chart_props: Set[str] = set(options["chart_props"]) if "chart_props" in options else set()
    for ch in charts:
        series: List[str] = []
        for ser in ch.series:
            value_num = ser.val.numRef.f \
                if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f") \
                else ""
            value_str = ser.val.strRef.f \
                if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f") \
                else ""
            categ_num = ser.cat.numRef.f \
                if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f") \
                else ""
            categ_str = ser.cat.strRef.f \
                if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f") \
                else ""
            series.append("{:},{:},{:},{:}".format(value_num, value_str
                                                   , categ_num, categ_str
                                                   )
                          )
        series: str = ";".join(series)

        # TODO: maybe more aspects, like chart type
        info: Dict[str, Any] = {}

        if "title" in chart_props:
            info["title"] = ch.title.tx.rich.p[0].r[0].t
        if "anchor" in chart_props:
            info["anchor"] = [ch.anchor.editAs
                , ch.anchor._from.col, ch.anchor.to.row
                , ch.anchor.to.col, ch.anchor.to.row
                              ]
        if "width" in chart_props:
            info["width"] = ch.width
        if "height" in chart_props:
            info["height"] = ch.height
        if "type" in chart_props:
            info["type"] = ch.tagname
        if "direction" in chart_props:
            info["direction"] = ch.barDir

        if "xtitle" in chart_props:
            info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
        if "ytitle" in chart_props:
            info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
        if "ztitle" in chart_props:
            info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
        chart_set[series] = info
    return chart_set


def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
    return all(k in item and item[k] == val for k, val in pattern.items())

def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
    """
    Args:
        value (V): value to match
        rule (Dict[str, Union[str, V]]): rule dict like
          {
            "method": str
            "ref": V as ref value
          }

    Returns:
        bool
    """

    if rule["method"].startswith("re"):
        flags: List[str] = rule["method"].split(".")[1:]
        flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags)
        flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0))
        logger.debug("REFLAG: %s", repr(flag))

        match_: Optional[Match[str]] = re.search(rule["ref"], value, flag)
        return match_ is not None
    if rule["method"] in { "eq", "ne"
                         , "le", "lt"
                         , "ge", "gt"
                         }:
        return getattr(operator, rule["method"])(value, rule["ref"])
    raise NotImplementedError()

def are_lists_equal(list1, list2, comparison_func):
    # First check if both lists have the same length
    if len(list1) != len(list2):
        return False

    # Now make sure each element in one list has an equal element in the other list
    for item1 in list1:
        # Use the supplied function to test for an equal item
        if not any(comparison_func(item1, item2) for item2 in list2):
            return False

    # If all items match, the lists are equal
    return True


def compare_urls(url1, url2):
    def normalize_url(url):
        # Parse the URL
        parsed_url = urlparse(url)

        # If no scheme is present, assume 'http'
        scheme = parsed_url.scheme if parsed_url.scheme else 'http'

        # Lowercase the scheme and netloc, remove 'www.', and handle trailing slash
        normalized_netloc = parsed_url.netloc.lower().replace("www.", "")
        normalized_path = parsed_url.path if parsed_url.path != '/' else ''

        # Reassemble the URL with normalized components
        normalized_parsed_url = parsed_url._replace(scheme=scheme.lower(), netloc=normalized_netloc,
                                                    path=normalized_path)
        normalized_url = urlunparse(normalized_parsed_url)

        return normalized_url

    # Normalize both URLs for comparison
    norm_url1 = normalize_url(url1)
    norm_url2 = normalize_url(url2)

    # Compare the normalized URLs
    return norm_url1 == norm_url2


if __name__ == "__main__":
    path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx"
    workbook1: Workbook = openpyxl.load_workbook(filename=path1)
    worksheet1: Worksheet = workbook1.active
    charts: List[ChartBase] = worksheet1._charts
    # print(len(charts))
    # print(type(charts[0]))
    #
    # print(len(charts[0].series))
    # print(type(charts[0].series[0]))
    # print(type(charts[0].series[0].val))
    ##print(charts[0].series[0].val)
    # print(charts[0].series[0].val.numRef.f)
    #
    # print(type(charts[0].series[0].cat))
    ##print(charts[0].series[0].cat)
    # print(charts[0].series[0].cat.numRef)
    # print(charts[0].series[0].cat.strRef)
    # print(charts[0].series[0].cat.strRef.f)

    # print(type(charts[0].title.tx.strRef))
    # print(type(charts[0].title.tx.rich))
    # print(type(charts[0].title.txPr))
    # print(len(charts[0].title.tx.rich.p))
    # print(len(charts[0].title.tx.rich.p[0].r))
    # print(type(charts[0].title.tx.rich.p[0].r[0]))
    # print(type(charts[0].title.tx.rich.p[0].r[0].t))
    # print(charts[0].title.tx.rich.p[0].r[0].t)

    # print(type(charts[0].anchor))
    # print(charts[0].anchor.editAs)
    # print(charts[0].anchor._from.col, charts[0].anchor.to.row)
    # print(charts[0].anchor.to.col, charts[0].anchor.to.row)

    # df1 = pd.read_excel(path1)
    # print(df1)
    print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))