227 lines
8.7 KiB
Python
227 lines
8.7 KiB
Python
import pandas as pd
|
|
import zipfile
|
|
import lxml.etree
|
|
import lxml.cssselect
|
|
from lxml.etree import _Element
|
|
import xmltodict
|
|
#import pylightxl
|
|
import openpyxl
|
|
from openpyxl import Workbook
|
|
from openpyxl.worksheet.worksheet import Worksheet
|
|
from openpyxl.chart._chart import ChartBase
|
|
|
|
from typing import Dict, List
|
|
from typing import Any
|
|
|
|
def compare_table(actual, expected):
|
|
df1 = pd.read_excel(expected)
|
|
df2 = pd.read_excel(actual)
|
|
|
|
# Compare the DataFrames
|
|
return 1 if df1.equals(df2) else 0
|
|
|
|
_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
|
|
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
|
]
|
|
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
|
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
|
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
|
|
#print(_sparklines_selector.css)
|
|
def _load_sparklines(xlsx_file: str) -> Dict[str, str]:
|
|
"""
|
|
This function modifies data_frame in-place
|
|
|
|
Args:
|
|
xlsx_file (str): path to xlsx
|
|
|
|
Returns:
|
|
List[Dict[str, str]]: sparkline definitions in form of
|
|
{
|
|
"F3": "Sheet1!C3:E3"
|
|
}
|
|
"""
|
|
|
|
# read xlsx
|
|
with zipfile.ZipFile(xlsx_file, "r") as z_f:
|
|
with z_f.open("xl/worksheets/sheet1.xml") as f:
|
|
sheet1: _Element = lxml.etree.fromstring(f.read())
|
|
sparklines: List[_Element] = _sparklines_selector(sheet1)
|
|
|
|
sparklines_dict: Dict[str, str] = {}
|
|
for sp_l in sparklines:
|
|
sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
|
|
sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
|
|
, process_namespaces=True
|
|
, namespaces=_xlsx_ns_imapping
|
|
)
|
|
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
|
|
return sparklines_dict
|
|
|
|
def compare_with_sparklines(actual: str, expected: str) -> float:
|
|
df1 = pd.read_excel(actual)
|
|
df2 = pd.read_excel(expected)
|
|
normal_content_metric: bool = df1.equals(df2)
|
|
print("Normal Contents Metric: {:}".format(normal_content_metric))
|
|
|
|
sp1 = _load_sparklines(actual)
|
|
sp2 = _load_sparklines(expected)
|
|
sparkline_metric: bool = sp1 == sp2
|
|
print("Sparkline Metric: {:}".format(sparkline_metric))
|
|
|
|
return float(normal_content_metric and sparkline_metric)
|
|
|
|
def _load_charts(xlsx_file: str) -> Dict[str, Any]:
|
|
"""
|
|
Args:
|
|
xlsx_file (str): path to xlsx
|
|
|
|
Returns:
|
|
Dict[str, Any]: information of charts
|
|
"""
|
|
|
|
workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
|
|
worksheet: Worksheet = workbook.active
|
|
charts: List[ChartBase] = worksheet._charts
|
|
|
|
chart_set: Dict[str, Any] = {}
|
|
for ch in charts:
|
|
series: List[str] = []
|
|
for ser in ch.series:
|
|
value_num = ser.val.numRef.f\
|
|
if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\
|
|
else ""
|
|
value_str = ser.val.strRef.f\
|
|
if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\
|
|
else ""
|
|
categ_num = ser.cat.numRef.f\
|
|
if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\
|
|
else ""
|
|
categ_str = ser.cat.strRef.f\
|
|
if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\
|
|
else ""
|
|
series.append( "{:},{:},{:},{:}".format( value_num, value_str
|
|
, categ_num, categ_str
|
|
)
|
|
)
|
|
series: str = ";".join(series)
|
|
|
|
# TODO: maybe more aspects, like chart type
|
|
info: Dict[str, Any] = {}
|
|
chart_set[series] = info
|
|
return chart_set
|
|
|
|
def compare_with_charts(actual: str, expected: str) -> float:
|
|
df1 = pd.read_excel(actual)
|
|
df2 = pd.read_excel(expected)
|
|
normal_content_metric: bool = df1.equals(df2)
|
|
print("Normal Contents Metric: {:}".format(normal_content_metric))
|
|
|
|
charts1 = _load_charts(actual)
|
|
charts2 = _load_charts(expected)
|
|
chart_metric: bool = charts1==charts2
|
|
print("Chart Metric: {:}".format(chart_metric))
|
|
|
|
return float(normal_content_metric and chart_metric)
|
|
|
|
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
|
|
#workbook: Workbook = openpyxl.load_workbook(filename=result)
|
|
workbook = pd.ExcelFile(result)
|
|
worksheet_names: List[str] = workbook.sheet_names
|
|
|
|
passes = True
|
|
for r in rules:
|
|
if r["type"]=="sheet_name":
|
|
expected_name: str = worksheet_names[r["sheet_idx"]]
|
|
actual_name: str = r["sheet_name"]
|
|
metric: bool = expected_name==actual_name
|
|
print("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
|
|
passes = passes and metric
|
|
elif r["type"]=="sheet_data":
|
|
if isinstance(r["sheet_idx0"], int):
|
|
df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"])
|
|
else:
|
|
file_name: str
|
|
sheet_idx: str
|
|
file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1)
|
|
sheet_idx: int = int(sheet_idx)
|
|
df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
|
|
if isinstance(r["sheet_idx1"], int):
|
|
df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"])
|
|
else:
|
|
file_name: str
|
|
sheet_idx: str
|
|
file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1)
|
|
sheet_idx: int = int(sheet_idx)
|
|
df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
|
|
metric: bool = df1.equals(df2)
|
|
print("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
|
|
passes = passes and metric
|
|
else:
|
|
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
|
|
|
|
return float(passes)
|
|
|
|
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
|
|
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
|
|
return float(worksheet.freeze_panes==rules["position"])
|
|
|
|
if __name__ == '__main__':
|
|
#path1 = ""
|
|
#path2 = ""
|
|
#print(compare_table(path1, path2))
|
|
|
|
#path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
|
|
#path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
|
|
#print(compare_with_sparklines(path1, path2))
|
|
|
|
#path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx"
|
|
#path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx"
|
|
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
|
#worksheet1: Worksheet = workbook1.active
|
|
#print(worksheet1.freeze_panes)
|
|
#workbook2: Workbook = openpyxl.load_workbook(filename=path2)
|
|
#worksheet2: Worksheet = workbook2.active
|
|
#print(worksheet2.freeze_panes)
|
|
#rule = {"position": "C6"}
|
|
#print(check_xlsx_freeze(path1, rule))
|
|
|
|
#path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx"
|
|
#rule = [ { "type": "sheet_name"
|
|
#, "sheet_idx": 0
|
|
#, "sheet_name": "Sheet1"
|
|
#}
|
|
#, { "type": "sheet_data"
|
|
#, "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0"
|
|
#, "sheet_idx1": 1
|
|
#}
|
|
#, { "type": "sheet_name"
|
|
#, "sheet_idx": 2
|
|
#, "sheet_name": "Sheet2"
|
|
#}
|
|
#]
|
|
#print(check_sheet_list(path1, rule))
|
|
|
|
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
|
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
|
#worksheet1: Worksheet = workbook1.active
|
|
#charts: List[ChartBase] = worksheet1._charts
|
|
#print(len(charts))
|
|
#print(type(charts[0]))
|
|
#
|
|
#print(len(charts[0].series))
|
|
#print(type(charts[0].series[0]))
|
|
#print(type(charts[0].series[0].val))
|
|
##print(charts[0].series[0].val)
|
|
#print(charts[0].series[0].val.numRef.f)
|
|
#
|
|
#print(type(charts[0].series[0].cat))
|
|
##print(charts[0].series[0].cat)
|
|
#print(charts[0].series[0].cat.numRef)
|
|
#print(charts[0].series[0].cat.strRef)
|
|
#print(charts[0].series[0].cat.strRef.f)
|
|
#
|
|
#df1 = pd.read_excel(path1)
|
|
#print(df1)
|
|
path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
|
|
print(compare_with_charts(path1, path2))
|