sci-gui-agent-benchmark/desktop_env/evaluators/metrics/table.py

import pandas as pd
import openpyxl
from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet

from .utils import load_charts, load_sparklines

from typing import Dict, List
from typing import Any


def compare_table(actual, expected):
    df1 = pd.read_excel(expected)
    df2 = pd.read_excel(actual)

    # Compare the DataFrames
    return 1 if df1.equals(df2) else 0


def compare_with_sparklines(actual: str, expected: str) -> float:
    df1 = pd.read_excel(actual)
    df2 = pd.read_excel(expected)
    normal_content_metric: bool = df1.equals(df2)
    print("Normal Contents Metric: {:}".format(normal_content_metric))

    sp1 = load_sparklines(actual)
    sp2 = load_sparklines(expected)
    sparkline_metric: bool = sp1 == sp2
    print("Sparkline Metric: {:}".format(sparkline_metric))

    return float(normal_content_metric and sparkline_metric)


def compare_with_charts(actual: str, expected: str, **options) -> float:
    """
    Args:
        actual (str): path to result xlsx
        expected (str): path to gold xlsx
        options (Dict[str, List[str]]): dict like {"chart_props": list of str}
          giving the concerned chart properties
    """

    df1 = pd.read_excel(actual)
    df2 = pd.read_excel(expected)
    normal_content_metric: bool = df1.equals(df2)
    print("Normal Contents Metric: {:}".format(normal_content_metric))

    charts1 = load_charts(actual, **options)
    charts2 = load_charts(expected, **options)
    chart_metric: bool = charts1 == charts2
    print("Chart Metric: {:}".format(chart_metric))

    return float(normal_content_metric and chart_metric)

def compare_with_formats(actual: str, expected: str) -> float:
    df1 = pd.read_excel(actual)
    df2 = pd.read_excel(expected)
    normal_content_metric: bool = df1.equals(df2)
    print("Normal Contents Metric: {:}".format(normal_content_metric))

    workbook1: Workbook = openpyxl.load_workbook(actual)
    number_formats1: List[str] = [ c.number_format.lower()\
                                   for col in workbook1.active.iter_cols()\
                                    for c in col\
                                    if c.data_type=="n"
                                 ]
    workbook2: Workbook = openpyxl.load_workbook(expected)
    number_formats2: List[str] = [ c.number_format.lower()\
                                   for col in workbook2.active.iter_cols()\
                                    for c in col\
                                    if c.data_type=="n"
                                 ]
    number_format_metric: bool = number_formats1==number_formats2
    print("Number Format Metric: {:}".format(number_format_metric))

    return float(normal_content_metric & number_format_metric)

def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
    # workbook: Workbook = openpyxl.load_workbook(filename=result)
    workbook = pd.ExcelFile(result)
    worksheet_names: List[str] = workbook.sheet_names

    passes = True
    for r in rules:
        if r["type"] == "sheet_name":
            expected_name: str = worksheet_names[r["sheet_idx"]]
            actual_name: str = r["sheet_name"]
            metric: bool = expected_name == actual_name
            print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
            passes = passes and metric
        elif r["type"] == "sheet_data":
            if isinstance(r["sheet_idx0"], int):
                df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"])
            else:
                file_name: str
                sheet_idx: str
                file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1)
                sheet_idx: int = int(sheet_idx)
                df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
            if isinstance(r["sheet_idx1"], int):
                df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"])
            else:
                file_name: str
                sheet_idx: str
                file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1)
                sheet_idx: int = int(sheet_idx)
                df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
            metric: bool = df1.equals(df2)
            print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
            passes = passes and metric
        else:
            raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))

    return float(passes)


def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
    worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
    return float(worksheet.freeze_panes == rules["position"])


if __name__ == '__main__':
    # path1 = ""
    # path2 = ""
    # print(compare_table(path1, path2))

    # path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
    # path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
    # print(compare_with_sparklines(path1, path2))

    # path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx"
    # path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx"
    # workbook1: Workbook = openpyxl.load_workbook(filename=path1)
    # worksheet1: Worksheet = workbook1.active
    # print(worksheet1.freeze_panes)
    # workbook2: Workbook = openpyxl.load_workbook(filename=path2)
    # worksheet2: Worksheet = workbook2.active
    # print(worksheet2.freeze_panes)
    # rule = {"position": "C6"}
    # print(check_xlsx_freeze(path1, rule))

    # path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx"
    # rule = [ { "type": "sheet_name"
    # , "sheet_idx": 0
    # , "sheet_name": "Sheet1"
    # }
    # , { "type": "sheet_data"
    # , "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0"
    # , "sheet_idx1": 1
    # }
    # , { "type": "sheet_name"
    # , "sheet_idx": 2
    # , "sheet_name": "Sheet2"
    # }
    # ]
    # print(check_sheet_list(path1, rule))

    #path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
    #path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
    #print(compare_with_charts(path1, path2, chart_props=["type", "direction"]))

    path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
    path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
    #workbook1: Workbook = openpyxl.load_workbook(filename=path1)
    #worksheet1: Worksheet = workbook1.active
#
    #import itertools
    #for col, r in itertools.product( ['A', 'B', 'C']
                                   #, range(1, 9)
                                   #):
        #position: str = "{:}{:d}".format(col, r)
        #print(worksheet1[position])
        #print(worksheet1[position].value)
        #print(worksheet1[position].number_format)
    print(compare_with_formats(path1, path2))