ver Feb19th

updated load_charts with legend prop
updated _read_cell_style with styles font_underline, font_size, and hyperlink
added load_filters and load_pivot_tables
This commit is contained in:
David Chang
2024-02-19 12:08:36 +08:00
parent c39926fc57
commit 3fb2fd9718

View File

@@ -16,12 +16,15 @@ from lxml.etree import _Element
from openpyxl import Workbook from openpyxl import Workbook
from openpyxl.chart._chart import ChartBase from openpyxl.chart._chart import ChartBase
from openpyxl.worksheet.worksheet import Worksheet from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.worksheet.filters import AutoFilter, SortState
from openpyxl.worksheet.cell_range import MultiCellRange from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.dimensions import DimensionHolder from openpyxl.worksheet.dimensions import DimensionHolder
from openpyxl.formatting.formatting import ConditionalFormattingList from openpyxl.formatting.formatting import ConditionalFormattingList
#from openpyxl.utils import get_column_letter #from openpyxl.utils import get_column_letter
from openpyxl.cell.cell import Cell from openpyxl.cell.cell import Cell
from openpyxl.styles.differential import DifferentialStyle from openpyxl.styles.differential import DifferentialStyle
from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
from openpyxl.pivot.cache import CacheSource as PivotCacheSource
import formulas import formulas
V = TypeVar("Value") V = TypeVar("Value")
@@ -79,6 +82,7 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
# Available Chart Properties: # Available Chart Properties:
# title: str # title: str
# anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1] # anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
# legend: "b" | "tr" | "l" | "r" | "t"
# width: number # width: number
# height: number # height: number
# type: "scatterChart" | "lineChart" | "barChart" # type: "scatterChart" | "lineChart" | "barChart"
@@ -133,7 +137,12 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
info: Dict[str, Any] = {} info: Dict[str, Any] = {}
if "title" in chart_props: if "title" in chart_props:
info["title"] = ch.title.tx.rich.p[0].r[0].t try:
info["title"] = ch.title.tx.rich.p[0].r[0].t
except:
info["title"] = None
if "legend" in chart_props:
info["legend"] = ch.legend.position if ch.legend is not None else None
if "anchor" in chart_props: if "anchor" in chart_props:
info["anchor"] = [ch.anchor.editAs info["anchor"] = [ch.anchor.editAs
, ch.anchor._from.col, ch.anchor.to.row , ch.anchor._from.col, ch.anchor.to.row
@@ -149,15 +158,98 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
info["direction"] = ch.barDir info["direction"] = ch.barDir
if "xtitle" in chart_props: if "xtitle" in chart_props:
info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t try:
info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
except:
info["xtitle"] = None
if "ytitle" in chart_props: if "ytitle" in chart_props:
info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t try:
info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
except:
info["ytitle"] = None
if "ztitle" in chart_props: if "ztitle" in chart_props:
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t try:
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
except:
info["ztitle"] = None
chart_set[series] = info chart_set[series] = info
return chart_set return chart_set
# }}} function load_charts # # }}} function load_charts #
# Available Pivot Properties:
# name: str
# show_total, show_empty_row, show_empty_col, show_headers: bool
# location: str
# selection: if the concrete item selection should be checked, a list of list of tuple like (bool, index) will be returned
# filter: if the filter fields should be checked; fields indices will be return in `filter_fields` item
# col_fields: indices
# row_fields: indices
# data_fields: list of str representations. the str representation is like "index;name;subtotal_type;show_data_as"; name is optional and is only returned when `data_fields_name` is specified in `pivot_props`
def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
# function load_pivot_tables {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
options (Dict[str, List[str]]): dict like {"pivot_props": list of str}
giving the concerned pivot properties
Returns:
Dict[str, Any]: information of pivot tables, dict like
{
<str representing data source>: {
<str as property>: anything
}
}
"""
worksheet: Worksheet = xlsx_file[sheet_name]
pivots: List[PivotTableDefinition] = worksheet._pivots
pivot_set: Dict[str, Any] = {}
pivot_props: Set[str] = set(options.get("pivot_props", []))
for pvt in pivots:
name: str = pvt.name
cache_source: PivotCacheSource = pvt.cache.cacheSource
source: str = "{:};{:};{:}".format(cache_source.type, cache_source.worksheetSource.ref, cache_source.worksheetSource.sheet)
info: Dict[str, Any] = {}
#info["source"] =
if "name" in pivot_props:
info["name"] = name
if "show_total" in pivot_props:
info["show_total"] = pvt.visualTotals
if "show_empty_row" in pivot_props:
info["show_empty_row"] = pvt.showEmptyRow
if "show_empty_col" in pivot_props:
info["show_empty_col"] = pvt.showEmptyCol
if "show_headers" in pivot_props:
info["show_headers"] = pvt.showHeaders
if "location" in pivot_props:
info["location"] = pvt.location
if "filter" in pivot_props or "selection" in pivot_props:
info["selection"] = [ [(itm.h, itm.x) for itm in f.items]\
for f in pvt.pivotFields
]
if "filter" in pivot_props:
info["filter_fields"] = set(f.fld for f in pvt.pageFields)
if "col_fields" in pivot_props:
info["col_fields"] = [f.x for f in pvt.colFields]
if "row_fields" in pivot_props:
info["row_fields"] = [f.x for f in pvt.rowFields]
if "data_fields" in pivot_props:
info["data_fields"] = [ "{:d};{:};{:};{:}".format( f.fld, f.name if "data_fields_name" in pivot_props else ""
, f.subtotal, f.showDataAs
)\
for f in pvt.dataFields
]
pivot_set[source] = info
return pivot_set
# }}} function load_pivot_tables #
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping) _shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# read_cell_value {{{ # # read_cell_value {{{ #
@@ -210,9 +302,12 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# font_color - in aRGB, e.g., FF000000 is black # font_color - in aRGB, e.g., FF000000 is black
# font_bold - bool # font_bold - bool
# font_italic - bool # font_italic - bool
# font_underline - "single" | "double" | "singleAccounting" | "doubleAccounting"
# font_size - float
# fill_type - "patternFill" | "gradientFill" # fill_type - "patternFill" | "gradientFill"
# bgcolor - in aRGB, e.g., FFFF0000 is red # bgcolor - in aRGB, e.g., FFFF0000 is red
# fgcolor - in aRGB, e.g., FF00FFFF is yellow # fgcolor - in aRGB, e.g., FF00FFFF is yellow
# hyperlink - str
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any: def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
if style_name=="number_format": if style_name=="number_format":
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\ return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\
@@ -227,12 +322,18 @@ def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[Different
return (diff_style or cell).font.bold if cell.value is not None else None return (diff_style or cell).font.bold if cell.value is not None else None
elif style_name=="font_italic": elif style_name=="font_italic":
return (diff_style or cell).font.italic if cell.value is not None else None return (diff_style or cell).font.italic if cell.value is not None else None
elif style_name=="font_underline":
return (diff_style or cell).font.underline if cell.value is not None else None
elif style_name=="font_size":
return (diff_style or cell).font.size if cell.value is not None else None
elif style_name=="fill_type": elif style_name=="fill_type":
return (diff_style or cell).fill.tagname return (diff_style or cell).fill.tagname
elif style_name=="bgcolor": elif style_name=="bgcolor":
return (diff_style or cell).fill.bgColor.rgb return (diff_style or cell).fill.bgColor.rgb
elif style_name=="fgcolor": elif style_name=="fgcolor":
return (diff_style or cell).fill.fgColor.rgb return (diff_style or cell).fill.fgColor.rgb
elif style_name=="hyperlink":
return cell.hyperlink or "" if cell.value is not None else None
else: else:
raise NotImplementedError("Unsupported Style: {:}".format(style_name)) raise NotImplementedError("Unsupported Style: {:}".format(style_name))
@@ -328,6 +429,61 @@ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
return obj_set return obj_set
# }}} function load_rows_or_cols # # }}} function load_rows_or_cols #
def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
# function load_filters {{{ #
worksheet: Worksheet = xlsx_file[sheet_name]
filters: AutoFilter = worksheet.auto_filter
filter_dict: Dict[str, Any] = {}
filter_dict["ref"] = filters.ref
# filterColumn
filter_column_set: List[Dict[str, Any]] = []
for flt_clm in filters.filterColumn:
filter_column: Dict[str, Any] = {}
filter_column["col_id"] = flt_clm.colId
filter_column["hidden_button"] = flt_clm.hiddenButton
filter_column["show_button"] = flt_clm.showButton
if flt_clm.filters is not None:
filter_column["filters_blank"] = flt_clm.filters.blank
filter_column["filters"] = set(flt_clm.filters.filter)
if flt_clm.customFilters is not None:
filter_column["custom_filters_op"] = flt_clm.customFilters._and
filter_column["custom_filters"] = set( ( flt.operator
, flt.val
)\
for flt in flt_clm.customFilters.customFilter
)
filter_column_set.append(filter_column)
filter_column_set = list( sorted( filter_column_set
, key=(lambda d: d["col_id"])
)
)
filter_dict["filter_column"] = filter_column_set
# sortState
sort_state: Optional[SortState] = filters.sortState
if sort_state is not None:
sort_state_dict: Dict[str, Any] = {}
sort_state_dict["sort"] = sort_state.columnSort
sort_state_dict["case"] = sort_state.caseSensitive
sort_state_dict["method"] = sort_state.sortMethod
sort_state_dict["ref"] = sort_state.ref
sort_state_dict["condition"] = list( { "descending": cdt.descending
, "key": cdt.sortBy
, "ref": cdt.ref
, "custom_list": cdt.customList
, "dxf_id": cdt.dxfId
, "icon": cdt.iconSet
, "iconid": cdt.iconId
}\
for cdt in sort_state.sortCondition
)
filter_dict["sort_state"] = sort_state_dict
return filter_dict
# }}} function load_filters #
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool: def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
return all(k in item and item[k] == val for k, val in pattern.items()) return all(k in item and item[k] == val for k, val in pattern.items())
@@ -431,10 +587,12 @@ def compare_urls(url1, url2):
if __name__ == "__main__": if __name__ == "__main__":
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx" path1 = "test.xlsx"
#path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
path1 = "../../任务集/SheetCopilot/dataset/task_sheet_answers_v2/BoomerangSales/2_BoomerangSales/2_BoomerangSales_gt1.xlsx"
workbook1: Workbook = openpyxl.load_workbook(filename=path1) workbook1: Workbook = openpyxl.load_workbook(filename=path1)
worksheet1: Worksheet = workbook1.active worksheet1: Worksheet = workbook1.active
charts: List[ChartBase] = worksheet1._charts #charts: List[ChartBase] = worksheet1._charts
# print(len(charts)) # print(len(charts))
# print(type(charts[0])) # print(type(charts[0]))
# #
@@ -466,4 +624,74 @@ if __name__ == "__main__":
# df1 = pd.read_excel(path1) # df1 = pd.read_excel(path1)
# print(df1) # print(df1)
print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"])) #print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
#print(type(worksheet1["A1"].hyperlink))
#print(worksheet1["A1"].hyperlink)
#print(worksheet1._charts[0].legend)
#print(worksheet1._charts[0].legend.position)
#for entr in worksheet1._charts[0].legend.legendEntry:
#print("Entr", entr.txPr.p[0].r[0].t)
#print(load_filters(workbook1, "工作表1"))
#print(worksheet1.auto_filter)
#for pvt in worksheet1._pivots:
##print(type(pvt))
##print(pvt)
#print(type(pvt.cache))
##print(pvt.cache)
#print(pvt.cache.cacheSource.type)
#print(pvt.cache.cacheSource.worksheetSource.ref)
#print(pvt.cache.cacheSource.worksheetSource.sheet)
#
#print(type(pvt.location))
#print(pvt.location)
#for f in pvt.pivotFields:
#print(type(f))
#print([(itm.h, itm.x) for itm in f.items])
##for f_itm in f.items:
##print(f_itm.n)
##print(f_itm.t)
##print(f_itm.h)
##print(f_itm.s)
##print(f_itm.sd)
##print(f_itm.f)
##print(f_itm.m)
##print(f_itm.c)
##print(f_itm.x)
##print(f_itm.d)
##print(f_itm.e)
##print(f.countASubtotal)
##print(f.countSubtotal)
##for f in pvt.dataFields:
##print(f.name)
##print(f.fld)
###print(f.baseField)
##print(f.subtotal)
##print(f.showDataAs)
##for f in pvt.rowFields:
##print(1, f.x)
##for f in pvt.rowItems:
##print(2, f.t, f.r, f.i, f.x)
##for f in pvt.colFields:
##print(3, f.x)
##for f in pvt.colItems:
##print(4, f.t, f.r, f.i, f.x)
#for f in pvt.pageFields:
#print(5, f.fld)
#for flt in pvt.filters:
#print(5, flt.fld)
#print(6, flt.mpFld)
#print(7, flt.type)
#print(8, flt.evalOrder)
#print(9, flt.id)
#print(10, flt.stringValue1)
#print(11, flt.stringValue2)
#print(load_charts(workbook1, "Sheet2", chart_props=["title", "type", "legend"]))
#print(load_filters(workbook1, "透视表_工作表1_1"))
#workbook1.save("test2.xlsx")
print( load_pivot_tables( workbook1, "Sheet2", pivot_props=[ "col_fields"
, "filter"
, "row_fields"
, "data_fields"
]
)
)