Files
sci-gui-agent-benchmark/desktop_env/evaluators/metrics/table.py

225 lines
8.9 KiB
Python

import logging
import operator
from numbers import Number
from typing import Any, Union
from typing import Dict, List
import openpyxl
import pandas as pd
from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet
from .utils import load_charts, load_sparklines
logger = logging.getLogger("desktopenv.metric.table")
def compare_table(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result xlsx
expected (str): path to gold xlsx
options (Dict[str, List[str]]): dict like
{
"features": list of str for other features, supports:
* sparkline
* chart
* number_format
"chart_props": list of str, giving the converned chart properties
}
Return:
float: the score
"""
if actual is None:
return 0.
df1 = pd.read_excel(expected)
df2 = pd.read_excel(actual)
metric: bool = df1.equals(df2)
logger.debug("Normal Contents Metric: {:}".format(metric))
features: List[str] = options.get("features", [])
for ftr in features:
workbook1: Workbook = openpyxl.load_workbook(actual)
workbook2: Workbook = openpyxl.load_workbook(expected)
if ftr == "sparkline":
sp1 = load_sparklines(actual)
sp2 = load_sparklines(expected)
new_metric: bool = sp1 == sp2
logger.debug("Sparkline Metric: {:}".format(new_metric))
elif ftr == "chart":
charts1 = load_charts(workbook1, **options)
charts2 = load_charts(workbook2, **options)
new_metric: bool = charts1 == charts2
logger.debug("Chart Metric: {:}".format(new_metric))
elif ftr == "number_format":
number_formats1: List[str] = [c.number_format.lower() \
for col in workbook1.active.iter_cols() \
for c in col \
if c.data_type == "n"
]
number_formats2: List[str] = [c.number_format.lower() \
for col in workbook2.active.iter_cols() \
for c in col \
if c.data_type == "n"
]
new_metric: bool = number_formats1 == number_formats2
logger.debug("Number Format Metric: {:}".format(new_metric))
else:
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
metric = metric and new_metric
return float(metric)
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
if result is None:
return 0.
# workbook: Workbook = openpyxl.load_workbook(filename=result)
workbook = pd.ExcelFile(result)
worksheet_names: List[str] = workbook.sheet_names
passes = True
for r in rules:
if r["type"] == "sheet_name":
expected_name: str = worksheet_names[r["sheet_idx"]]
actual_name: str = r["sheet_name"]
metric: bool = expected_name == actual_name
logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
passes = passes and metric
elif r["type"] == "sheet_data":
if isinstance(r["sheet_idx0"], int):
df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"])
else:
file_name: str
sheet_idx: str
file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1)
sheet_idx: int = int(sheet_idx)
df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
if isinstance(r["sheet_idx1"], int):
df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"])
else:
file_name: str
sheet_idx: str
file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1)
sheet_idx: int = int(sheet_idx)
df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
metric: bool = df1.equals(df2)
logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
passes = passes and metric
else:
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
return float(passes)
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
if result is None:
return 0.
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
return float(worksheet.freeze_panes == rules["position"])
def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
if result is None:
return 0.
worksheet = openpyxl.load_workbook(filename=result).active
zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
return float(getattr(operator, rules["relation"])(zoom_scale
, rules["ref_value"]
)
)
if __name__ == '__main__':
# path1 = ""
# path2 = ""
# print(compare_table(path1, path2))
# path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
# path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
# print(compare_with_sparklines(path1, path2))
# path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx"
# path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx"
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet1: Worksheet = workbook1.active
# print(worksheet1.freeze_panes)
# workbook2: Workbook = openpyxl.load_workbook(filename=path2)
# worksheet2: Worksheet = workbook2.active
# print(worksheet2.freeze_panes)
# rule = {"position": "C6"}
# print(check_xlsx_freeze(path1, rule))
# path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx"
# rule = [ { "type": "sheet_name"
# , "sheet_idx": 0
# , "sheet_name": "Sheet1"
# }
# , { "type": "sheet_data"
# , "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0"
# , "sheet_idx1": 1
# }
# , { "type": "sheet_name"
# , "sheet_idx": 2
# , "sheet_name": "Sheet2"
# }
# ]
# print(check_sheet_list(path1, rule))
# path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
# print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
# path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
# path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx"
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet1: Worksheet = workbook1.active
# import itertools
# for col, r in itertools.product( ['A', 'B']
# , range(1, 20)
# ):
# position: str = "{:}{:d}".format(col, r)
# print(worksheet1[position])
# print(worksheet1[position].value)
# print(worksheet1[position].number_format)
# workbook2: Workbook = openpyxl.load_workbook(filename=path2)
# worksheet2: Worksheet = workbook2.active
# for col, r in itertools.product( ['A', 'B']
# , range(1, 20)
# ):
# position: str = "{:}{:d}".format(col, r)
# print(worksheet2[position])
# print(worksheet2[position].value)
# print(worksheet2[position].number_format)
# print(compare_table(path1, path2, features=["number_format"]))
# path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx"
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet1: Worksheet = workbook1.active
# print(worksheet1.sheet_view.zoomScale)
# print(type(worksheet1.sheet_view.zoomScale))
#
# import os
# import os.path
# for wb in filter( lambda f: f.endswith(".xlsx")
# , os.listdir("../../任务数据/LibreOffice Calc/")
# ):
# path = os.path.join("../../任务数据/LibreOffice Calc/", wb)
# print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale)
# print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
# print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))
path1 = "../../任务数据/LibreOffice Calc/Padding_Decimals_In_Formular_gold.xlsx"
data_frame: pd.DataFrame = pd.read_excel(path1)
print(data_frame)