Files
sci-gui-agent-benchmark/desktop_env/evaluators/metrics/table.py
David Chang 6e6ef03bc9 ver Jan2nd
calc metrics are prapared by and large
2024-01-02 21:03:57 +08:00

206 lines
8.8 KiB
Python

import pandas as pd
import openpyxl
from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet
from .utils import load_charts, load_sparklines
import operator
from typing import Dict, List
from typing import Any, Union
from numbers import Number
def compare_table(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result xlsx
expected (str): path to gold xlsx
options (Dict[str, List[str]]): dict like
{
"features": list of str for other features, supports:
* sparkline
* chart
* number_format
"chart_props": list of str, giving the converned chart properties
}
Return:
float: the score
"""
df1 = pd.read_excel(expected)
df2 = pd.read_excel(actual)
metric: bool = df1.equals(df2)
print("Normal Contents Metric: {:}".format(metric))
features: List[str] = options.get("features", [])
for ftr in features:
workbook1: Workbook = openpyxl.load_workbook(actual)
workbook2: Workbook = openpyxl.load_workbook(expected)
if ftr=="sparkline":
sp1 = load_sparklines(actual)
sp2 = load_sparklines(expected)
new_metric: bool = sp1 == sp2
print("Sparkline Metric: {:}".format(new_metric))
elif ftr=="chart":
charts1 = load_charts(workbook1, **options)
charts2 = load_charts(workbook2, **options)
new_metric: bool = charts1 == charts2
print("Chart Metric: {:}".format(new_metric))
elif ftr=="number_format":
number_formats1: List[str] = [ c.number_format.lower()\
for col in workbook1.active.iter_cols()\
for c in col\
if c.data_type=="n"
]
number_formats2: List[str] = [ c.number_format.lower()\
for col in workbook2.active.iter_cols()\
for c in col\
if c.data_type=="n"
]
new_metric: bool = number_formats1==number_formats2
print("Number Format Metric: {:}".format(new_metric))
else:
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
metric = metric and new_metric
return float(metric)
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
# workbook: Workbook = openpyxl.load_workbook(filename=result)
workbook = pd.ExcelFile(result)
worksheet_names: List[str] = workbook.sheet_names
passes = True
for r in rules:
if r["type"] == "sheet_name":
expected_name: str = worksheet_names[r["sheet_idx"]]
actual_name: str = r["sheet_name"]
metric: bool = expected_name == actual_name
print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
passes = passes and metric
elif r["type"] == "sheet_data":
if isinstance(r["sheet_idx0"], int):
df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"])
else:
file_name: str
sheet_idx: str
file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1)
sheet_idx: int = int(sheet_idx)
df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
if isinstance(r["sheet_idx1"], int):
df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"])
else:
file_name: str
sheet_idx: str
file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1)
sheet_idx: int = int(sheet_idx)
df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
metric: bool = df1.equals(df2)
print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
passes = passes and metric
else:
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
return float(passes)
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
return float(worksheet.freeze_panes == rules["position"])
def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
worksheet = openpyxl.load_workbook(filename=result).active
zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
return float( getattr(operator, rules["relation"])( zoom_scale
, rules["ref_value"]
)
)
if __name__ == '__main__':
# path1 = ""
# path2 = ""
# print(compare_table(path1, path2))
# path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
# path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
# print(compare_with_sparklines(path1, path2))
# path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx"
# path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx"
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet1: Worksheet = workbook1.active
# print(worksheet1.freeze_panes)
# workbook2: Workbook = openpyxl.load_workbook(filename=path2)
# worksheet2: Worksheet = workbook2.active
# print(worksheet2.freeze_panes)
# rule = {"position": "C6"}
# print(check_xlsx_freeze(path1, rule))
# path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx"
# rule = [ { "type": "sheet_name"
# , "sheet_idx": 0
# , "sheet_name": "Sheet1"
# }
# , { "type": "sheet_data"
# , "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0"
# , "sheet_idx1": 1
# }
# , { "type": "sheet_name"
# , "sheet_idx": 2
# , "sheet_name": "Sheet2"
# }
# ]
# print(check_sheet_list(path1, rule))
#path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
#print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
#path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
#path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx"
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
#worksheet1: Worksheet = workbook1.active
#import itertools
#for col, r in itertools.product( ['A', 'B']
#, range(1, 20)
#):
#position: str = "{:}{:d}".format(col, r)
#print(worksheet1[position])
#print(worksheet1[position].value)
#print(worksheet1[position].number_format)
#workbook2: Workbook = openpyxl.load_workbook(filename=path2)
#worksheet2: Worksheet = workbook2.active
#for col, r in itertools.product( ['A', 'B']
#, range(1, 20)
#):
#position: str = "{:}{:d}".format(col, r)
#print(worksheet2[position])
#print(worksheet2[position].value)
#print(worksheet2[position].number_format)
#print(compare_table(path1, path2, features=["number_format"]))
#path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx"
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
#worksheet1: Worksheet = workbook1.active
#print(worksheet1.sheet_view.zoomScale)
#print(type(worksheet1.sheet_view.zoomScale))
#
#import os
#import os.path
#for wb in filter( lambda f: f.endswith(".xlsx")
#, os.listdir("../../任务数据/LibreOffice Calc/")
#):
#path = os.path.join("../../任务数据/LibreOffice Calc/", wb)
#print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale)
#print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
#print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))
path1 = "../../任务数据/LibreOffice Calc/Padding_Decimals_In_Formular_gold.xlsx"
data_frame: pd.DataFrame = pd.read_excel(path1)
print(data_frame)