sci-gui-agent-benchmark/desktop_env/evaluators/metrics/table.py

import pandas as pd
import zipfile
import lxml.etree
import lxml.cssselect
from lxml.etree import _Element
import xmltodict
#import pylightxl

from typing import Dict, List
#from typing import Any

def compare_table(actual, expected):
    df1 = pd.read_excel(expected)
    df2 = pd.read_excel(actual)

    # Compare the DataFrames
    return 1 if df1.equals(df2) else 0

_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
                   , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
                   ]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
#print(_sparklines_selector.css)
def _load_sparklines(xlsx_file: str) -> Dict[str, str]:
    """
    This function modifies data_frame in-place

    Args:
        xlsx_file (str): path to xlsx

    Returns:
        List[Dict[str, str]]: sparkline definitions in form of
          {
            "F3": "Sheet1!C3:E3"
          }
    """

    # read xlsx
    with zipfile.ZipFile(xlsx_file, "r") as z_f:
        with z_f.open("xl/worksheets/sheet1.xml") as f:
            sheet1: _Element = lxml.etree.fromstring(f.read())
            sparklines: List[_Element] = _sparklines_selector(sheet1)

    sparklines_dict: Dict[str, str] = {}
    for sp_l in sparklines:
        sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
        sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
                                                              , process_namespaces=True
                                                              , namespaces=_xlsx_ns_imapping
                                                              )
        sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
    return sparklines_dict

def compare_with_sparklines(actual: str, expected: str) -> float:
    df1 = pd.read_excel(actual)
    df2 = pd.read_excel(expected)
    normal_content_metric: bool = df1.equals(df2)

    sp1 = _load_sparklines(actual)
    sp2 = _load_sparklines(expected)
    sparkline_metric: bool = sp1 == sp2

    return float(normal_content_metric and sparkline_metric)

if __name__ == '__main__':
    #path1 = ""
    #path2 = ""
    #print(compare_table(path1, path2))

    path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
    path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
    print(compare_with_sparklines(path1, path2))