import zipfile import lxml.etree import lxml.cssselect from lxml.etree import _Element import xmltodict import openpyxl from openpyxl import Workbook from openpyxl.worksheet.worksheet import Worksheet from openpyxl.chart._chart import ChartBase from typing import Dict, List, Set from typing import Any _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") , ("xm", "http://schemas.microsoft.com/office/excel/2006/main") ] _xlsx_ns_mapping = dict(_xlsx_namespaces) _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping) #print(_sparklines_selector.css) def load_sparklines(xlsx_file: str) -> Dict[str, str]: """ This function modifies data_frame in-place Args: xlsx_file (str): path to xlsx Returns: List[Dict[str, str]]: sparkline definitions in form of { "F3": "Sheet1!C3:E3" } """ # read xlsx with zipfile.ZipFile(xlsx_file, "r") as z_f: with z_f.open("xl/worksheets/sheet1.xml") as f: sheet1: _Element = lxml.etree.fromstring(f.read()) sparklines: List[_Element] = _sparklines_selector(sheet1) sparklines_dict: Dict[str, str] = {} for sp_l in sparklines: sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode") sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml , process_namespaces=True , namespaces=_xlsx_ns_imapping ) sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"] return sparklines_dict # Available Chart Properties: # title: str # anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1] # width: number # height: number # type: "scatterChart" | "lineChart" | "barChart" # direction: "bar" (hori) | "col" (vert) # xtitle, ytitle, ztitle: str def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]: """ Args: xlsx_file (Workbook): concerned excel book options (Dict[str, List[str]]): dict like {"chart_props": list of str} giving the concerned chart properties Returns: Dict[str, Any]: information of charts """ #workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file) worksheet: Worksheet = xlsx_file.active charts: List[ChartBase] = worksheet._charts chart_set: Dict[str, Any] = {} chart_props: Set[str] = set(options["chart_props"]) if "chart_props" in options else set() for ch in charts: series: List[str] = [] for ser in ch.series: value_num = ser.val.numRef.f\ if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\ else "" value_str = ser.val.strRef.f\ if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\ else "" categ_num = ser.cat.numRef.f\ if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\ else "" categ_str = ser.cat.strRef.f\ if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\ else "" series.append( "{:},{:},{:},{:}".format( value_num, value_str , categ_num, categ_str ) ) series: str = ";".join(series) # TODO: maybe more aspects, like chart type info: Dict[str, Any] = {} if "title" in chart_props: info["title"] = ch.title.tx.rich.p[0].r[0].t if "anchor" in chart_props: info["anchor"] = [ ch.anchor.editAs , ch.anchor._from.col, ch.anchor.to.row , ch.anchor.to.col, ch.anchor.to.row ] if "width" in chart_props: info["width"] = ch.width if "height" in chart_props: info["height"] = ch.height if "type" in chart_props: info["type"] = ch.tagname if "direction" in chart_props: info["direction"] = ch.barDir if "xtitle" in chart_props: info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t if "ytitle" in chart_props: info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t if "ztitle" in chart_props: info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t chart_set[series] = info return chart_set if __name__ == "__main__": path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx" workbook1: Workbook = openpyxl.load_workbook(filename=path1) worksheet1: Worksheet = workbook1.active charts: List[ChartBase] = worksheet1._charts #print(len(charts)) #print(type(charts[0])) # #print(len(charts[0].series)) #print(type(charts[0].series[0])) #print(type(charts[0].series[0].val)) ##print(charts[0].series[0].val) #print(charts[0].series[0].val.numRef.f) # #print(type(charts[0].series[0].cat)) ##print(charts[0].series[0].cat) #print(charts[0].series[0].cat.numRef) #print(charts[0].series[0].cat.strRef) #print(charts[0].series[0].cat.strRef.f) #print(type(charts[0].title.tx.strRef)) #print(type(charts[0].title.tx.rich)) #print(type(charts[0].title.txPr)) #print(len(charts[0].title.tx.rich.p)) #print(len(charts[0].title.tx.rich.p[0].r)) #print(type(charts[0].title.tx.rich.p[0].r[0])) #print(type(charts[0].title.tx.rich.p[0].r[0].t)) #print(charts[0].title.tx.rich.p[0].r[0].t) #print(type(charts[0].anchor)) #print(charts[0].anchor.editAs) #print(charts[0].anchor._from.col, charts[0].anchor.to.row) #print(charts[0].anchor.to.col, charts[0].anchor.to.row) #df1 = pd.read_excel(path1) #print(df1) print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))