ver Jan22ndv3

updated style metric to compare_table
This commit is contained in:
David Chang
2024-01-22 23:45:15 +08:00
parent c97f43ce95
commit 93229ce98c
8 changed files with 625 additions and 35 deletions

View File

@@ -1,6 +1,6 @@
import logging
import zipfile
from typing import Any, TypeVar, Union, Iterable, Optional
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
from typing import Dict, List, Set, Match
from urllib.parse import urlparse, urlunparse
import re
@@ -17,6 +17,12 @@ from openpyxl import Workbook
from openpyxl.chart._chart import ChartBase
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.dimensions import DimensionHolder
from openpyxl.formatting.formatting import ConditionalFormattingList
#from openpyxl.utils import get_column_letter
from openpyxl.cell.cell import Cell
from openpyxl.styles.differential import DifferentialStyle
import formulas
V = TypeVar("Value")
@@ -31,9 +37,8 @@ _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
# function load_sparklines {{{ #
"""
This function modifies data_frame in-place
Args:
xlsx_file (str): path to xlsx
sheet_name (str): sheet name
@@ -64,6 +69,7 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
)
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
return sparklines_dict
# }}} function load_sparklines #
# Available Chart Properties:
@@ -75,6 +81,7 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
# direction: "bar" (hori) | "col" (vert)
# xtitle, ytitle, ztitle: str
def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
# function load_charts {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
@@ -83,7 +90,12 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
giving the concerned chart properties
Returns:
Dict[str, Any]: information of charts
Dict[str, Any]: information of charts, dict like
{
<str representing data source>: {
<str as property>: anything
}
}
"""
# workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
@@ -140,7 +152,132 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
chart_set[series] = info
return chart_set
# }}} function load_charts #
# Supported Styles:
# number_format
# font_name - str
# font_family - float
# font_color - in aRGB, e.g., FF000000 is black
# font_bold - bool
# font_italic - bool
# fill_type - "patternFill" | "gradientFill"
# bgcolor - in aRGB, e.g., FFFF0000 is red
# fgcolor - in aRGB, e.g., FF00FFFF is yellow
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
if style_name=="number_format":
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\
if cell.value is not None and cell.data_type=="n" else None
elif style_name=="font_name":
return (diff_style or cell).font.name if cell.value is not None else None
elif style_name=="font_family":
return (diff_style or cell).font.family if cell.value is not None else None
elif style_name=="font_color":
return (diff_style or cell).font.color.rgb if cell.value is not None else None
elif style_name=="font_bold":
return (diff_style or cell).font.bold if cell.value is not None else None
elif style_name=="font_italic":
return (diff_style or cell).font.italic if cell.value is not None else None
elif style_name=="fill_type":
return (diff_style or cell).fill.tagname
elif style_name=="bgcolor":
return (diff_style or cell).fill.bgColor.rgb
elif style_name=="fgcolor":
return (diff_style or cell).fill.fgColor.rgb
else:
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, List[Any]]:
# function load_xlsx_styles {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
options (Dict[str, List[str]): dick like {"props": list of str} giving
the concerned styles
Returns:
Dict[str, List[Any]]: dict like
{
<str as cell coordinates>: list of anything indicating concerned
property values
}
"""
worksheet: Worksheet = xlsx_file[sheet_name]
style_dict: Dict[str, List[Any]] = {}
concerned_styles: List[str] = options.get("props", [])
# Handles Cell Styles
for col in worksheet.iter_cols():
for c in col:
style_list: List[Any] = []
for st in concerned_styles:
style_list.append(_read_cell_style(st, c))
style_dict[c.coordinate] = style_list
# Handles Conditional Formatting
conditional_formattings: ConditionalFormattingList = worksheet.conditional_formatting
formula_parser = formulas.Parser()
for fmt in conditional_formattings:
for r in fmt.rules:
active_cells: List[Cell] = []
if r.type == "expression":
condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
for rge in fmt.cells:
for c in rge.cells:
cell: Cell = worksheet.cell(row=c[0], column=c[1])
if condition(str(cell.value)):
active_cells.append(cell)
else:
raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
for c in active_cells:
style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
return style_dict
# }}} function load_xlsx_styles #
# Available Row Properties:
# hidden
# collapsed
# height
#
# Available Column Properties:
# width
# auto_size
# hidden
# collapsed
# min
# max
def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
-> Dict[Union[int, str], Dict[str, Any]]:
# function load_rows_or_cols {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
options (Dict[str, List[str]]): dict like
{"obj": "row" | "column", "props": list of str} giving the concerned
row/column properties
Returns:
Dict[Union[int, str], Dict[str, Any]]: row/column information
"""
worksheet: Worksheet = xlsx_file[sheet_name]
objs: DimensionHolder = getattr(worksheet, "{:}_dimensions".format(options["obj"]))
obj_set: Dict[int, Any] = {}
obj_props: Set[str] = set(options.get("props", []))
for obj_no, obj_dms in objs.items():
info_dict: Dict[str, Any] = {}
for prop in obj_props:
info_dict[prop] = getattr(obj_dms, prop)
obj_set[obj_no] = info_dict
return obj_set
# }}} function load_rows_or_cols #
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
return all(k in item and item[k] == val for k, val in pattern.items())