From 3fb2fd971848efeeba93d705eabc6a713f6af753 Mon Sep 17 00:00:00 2001 From: David Chang Date: Mon, 19 Feb 2024 12:08:36 +0800 Subject: [PATCH] ver Feb19th updated load_charts with legend prop updated _read_cell_style with styles font_underline, font_size, and hyperlink added load_filters and load_pivot_tables --- desktop_env/evaluators/metrics/utils.py | 242 +++++++++++++++++++++++- 1 file changed, 235 insertions(+), 7 deletions(-) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 4f35b65..ac8b879 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -16,12 +16,15 @@ from lxml.etree import _Element from openpyxl import Workbook from openpyxl.chart._chart import ChartBase from openpyxl.worksheet.worksheet import Worksheet +from openpyxl.worksheet.filters import AutoFilter, SortState from openpyxl.worksheet.cell_range import MultiCellRange from openpyxl.worksheet.dimensions import DimensionHolder from openpyxl.formatting.formatting import ConditionalFormattingList #from openpyxl.utils import get_column_letter from openpyxl.cell.cell import Cell from openpyxl.styles.differential import DifferentialStyle +from openpyxl.pivot.table import TableDefinition as PivotTableDefinition +from openpyxl.pivot.cache import CacheSource as PivotCacheSource import formulas V = TypeVar("Value") @@ -79,6 +82,7 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]: # Available Chart Properties: # title: str # anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1] +# legend: "b" | "tr" | "l" | "r" | "t" # width: number # height: number # type: "scatterChart" | "lineChart" | "barChart" @@ -133,7 +137,12 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An info: Dict[str, Any] = {} if "title" in chart_props: - info["title"] = ch.title.tx.rich.p[0].r[0].t + try: + info["title"] = ch.title.tx.rich.p[0].r[0].t + except: + info["title"] = None + if "legend" in chart_props: + info["legend"] = ch.legend.position if ch.legend is not None else None if "anchor" in chart_props: info["anchor"] = [ch.anchor.editAs , ch.anchor._from.col, ch.anchor.to.row @@ -149,15 +158,98 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An info["direction"] = ch.barDir if "xtitle" in chart_props: - info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t + try: + info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t + except: + info["xtitle"] = None if "ytitle" in chart_props: - info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t + try: + info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t + except: + info["ytitle"] = None if "ztitle" in chart_props: - info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t + try: + info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t + except: + info["ztitle"] = None chart_set[series] = info return chart_set # }}} function load_charts # +# Available Pivot Properties: +# name: str +# show_total, show_empty_row, show_empty_col, show_headers: bool +# location: str +# selection: if the concrete item selection should be checked, a list of list of tuple like (bool, index) will be returned +# filter: if the filter fields should be checked; fields indices will be return in `filter_fields` item +# col_fields: indices +# row_fields: indices +# data_fields: list of str representations. the str representation is like "index;name;subtotal_type;show_data_as"; name is optional and is only returned when `data_fields_name` is specified in `pivot_props` +def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]: + # function load_pivot_tables {{{ # + """ + Args: + xlsx_file (Workbook): concerned excel book + sheet_name (str): sheet name + options (Dict[str, List[str]]): dict like {"pivot_props": list of str} + giving the concerned pivot properties + + Returns: + Dict[str, Any]: information of pivot tables, dict like + { + : { + : anything + } + } + """ + + worksheet: Worksheet = xlsx_file[sheet_name] + pivots: List[PivotTableDefinition] = worksheet._pivots + + pivot_set: Dict[str, Any] = {} + pivot_props: Set[str] = set(options.get("pivot_props", [])) + for pvt in pivots: + name: str = pvt.name + cache_source: PivotCacheSource = pvt.cache.cacheSource + source: str = "{:};{:};{:}".format(cache_source.type, cache_source.worksheetSource.ref, cache_source.worksheetSource.sheet) + + info: Dict[str, Any] = {} + #info["source"] = + if "name" in pivot_props: + info["name"] = name + + if "show_total" in pivot_props: + info["show_total"] = pvt.visualTotals + if "show_empty_row" in pivot_props: + info["show_empty_row"] = pvt.showEmptyRow + if "show_empty_col" in pivot_props: + info["show_empty_col"] = pvt.showEmptyCol + if "show_headers" in pivot_props: + info["show_headers"] = pvt.showHeaders + + if "location" in pivot_props: + info["location"] = pvt.location + if "filter" in pivot_props or "selection" in pivot_props: + info["selection"] = [ [(itm.h, itm.x) for itm in f.items]\ + for f in pvt.pivotFields + ] + if "filter" in pivot_props: + info["filter_fields"] = set(f.fld for f in pvt.pageFields) + if "col_fields" in pivot_props: + info["col_fields"] = [f.x for f in pvt.colFields] + if "row_fields" in pivot_props: + info["row_fields"] = [f.x for f in pvt.rowFields] + if "data_fields" in pivot_props: + info["data_fields"] = [ "{:d};{:};{:};{:}".format( f.fld, f.name if "data_fields_name" in pivot_props else "" + , f.subtotal, f.showDataAs + )\ + for f in pvt.dataFields + ] + + pivot_set[source] = info + return pivot_set + # }}} function load_pivot_tables # + _shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping) def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: # read_cell_value {{{ # @@ -210,9 +302,12 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: # font_color - in aRGB, e.g., FF000000 is black # font_bold - bool # font_italic - bool +# font_underline - "single" | "double" | "singleAccounting" | "doubleAccounting" +# font_size - float # fill_type - "patternFill" | "gradientFill" # bgcolor - in aRGB, e.g., FFFF0000 is red # fgcolor - in aRGB, e.g., FF00FFFF is yellow +# hyperlink - str def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any: if style_name=="number_format": return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\ @@ -227,12 +322,18 @@ def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[Different return (diff_style or cell).font.bold if cell.value is not None else None elif style_name=="font_italic": return (diff_style or cell).font.italic if cell.value is not None else None + elif style_name=="font_underline": + return (diff_style or cell).font.underline if cell.value is not None else None + elif style_name=="font_size": + return (diff_style or cell).font.size if cell.value is not None else None elif style_name=="fill_type": return (diff_style or cell).fill.tagname elif style_name=="bgcolor": return (diff_style or cell).fill.bgColor.rgb elif style_name=="fgcolor": return (diff_style or cell).fill.fgColor.rgb + elif style_name=="hyperlink": + return cell.hyperlink or "" if cell.value is not None else None else: raise NotImplementedError("Unsupported Style: {:}".format(style_name)) @@ -328,6 +429,61 @@ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\ return obj_set # }}} function load_rows_or_cols # +def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]: + # function load_filters {{{ # + worksheet: Worksheet = xlsx_file[sheet_name] + + filters: AutoFilter = worksheet.auto_filter + filter_dict: Dict[str, Any] = {} + filter_dict["ref"] = filters.ref + + # filterColumn + filter_column_set: List[Dict[str, Any]] = [] + for flt_clm in filters.filterColumn: + filter_column: Dict[str, Any] = {} + filter_column["col_id"] = flt_clm.colId + filter_column["hidden_button"] = flt_clm.hiddenButton + filter_column["show_button"] = flt_clm.showButton + if flt_clm.filters is not None: + filter_column["filters_blank"] = flt_clm.filters.blank + filter_column["filters"] = set(flt_clm.filters.filter) + if flt_clm.customFilters is not None: + filter_column["custom_filters_op"] = flt_clm.customFilters._and + filter_column["custom_filters"] = set( ( flt.operator + , flt.val + )\ + for flt in flt_clm.customFilters.customFilter + ) + filter_column_set.append(filter_column) + filter_column_set = list( sorted( filter_column_set + , key=(lambda d: d["col_id"]) + ) + ) + filter_dict["filter_column"] = filter_column_set + + # sortState + sort_state: Optional[SortState] = filters.sortState + if sort_state is not None: + sort_state_dict: Dict[str, Any] = {} + sort_state_dict["sort"] = sort_state.columnSort + sort_state_dict["case"] = sort_state.caseSensitive + sort_state_dict["method"] = sort_state.sortMethod + sort_state_dict["ref"] = sort_state.ref + sort_state_dict["condition"] = list( { "descending": cdt.descending + , "key": cdt.sortBy + , "ref": cdt.ref + , "custom_list": cdt.customList + , "dxf_id": cdt.dxfId + , "icon": cdt.iconSet + , "iconid": cdt.iconId + }\ + for cdt in sort_state.sortCondition + ) + filter_dict["sort_state"] = sort_state_dict + + return filter_dict + # }}} function load_filters # + def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool: return all(k in item and item[k] == val for k, val in pattern.items()) @@ -431,10 +587,12 @@ def compare_urls(url1, url2): if __name__ == "__main__": - path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx" + path1 = "test.xlsx" + #path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx" + path1 = "../../任务集/SheetCopilot/dataset/task_sheet_answers_v2/BoomerangSales/2_BoomerangSales/2_BoomerangSales_gt1.xlsx" workbook1: Workbook = openpyxl.load_workbook(filename=path1) worksheet1: Worksheet = workbook1.active - charts: List[ChartBase] = worksheet1._charts + #charts: List[ChartBase] = worksheet1._charts # print(len(charts)) # print(type(charts[0])) # @@ -466,4 +624,74 @@ if __name__ == "__main__": # df1 = pd.read_excel(path1) # print(df1) - print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"])) + #print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"])) + #print(type(worksheet1["A1"].hyperlink)) + #print(worksheet1["A1"].hyperlink) + #print(worksheet1._charts[0].legend) + #print(worksheet1._charts[0].legend.position) + #for entr in worksheet1._charts[0].legend.legendEntry: + #print("Entr", entr.txPr.p[0].r[0].t) + #print(load_filters(workbook1, "工作表1")) + #print(worksheet1.auto_filter) + #for pvt in worksheet1._pivots: + ##print(type(pvt)) + ##print(pvt) + #print(type(pvt.cache)) + ##print(pvt.cache) + #print(pvt.cache.cacheSource.type) + #print(pvt.cache.cacheSource.worksheetSource.ref) + #print(pvt.cache.cacheSource.worksheetSource.sheet) +# + #print(type(pvt.location)) + #print(pvt.location) + #for f in pvt.pivotFields: + #print(type(f)) + #print([(itm.h, itm.x) for itm in f.items]) + ##for f_itm in f.items: + ##print(f_itm.n) + ##print(f_itm.t) + ##print(f_itm.h) + ##print(f_itm.s) + ##print(f_itm.sd) + ##print(f_itm.f) + ##print(f_itm.m) + ##print(f_itm.c) + ##print(f_itm.x) + ##print(f_itm.d) + ##print(f_itm.e) + ##print(f.countASubtotal) + ##print(f.countSubtotal) + ##for f in pvt.dataFields: + ##print(f.name) + ##print(f.fld) + ###print(f.baseField) + ##print(f.subtotal) + ##print(f.showDataAs) + ##for f in pvt.rowFields: + ##print(1, f.x) + ##for f in pvt.rowItems: + ##print(2, f.t, f.r, f.i, f.x) + ##for f in pvt.colFields: + ##print(3, f.x) + ##for f in pvt.colItems: + ##print(4, f.t, f.r, f.i, f.x) + #for f in pvt.pageFields: + #print(5, f.fld) + #for flt in pvt.filters: + #print(5, flt.fld) + #print(6, flt.mpFld) + #print(7, flt.type) + #print(8, flt.evalOrder) + #print(9, flt.id) + #print(10, flt.stringValue1) + #print(11, flt.stringValue2) + #print(load_charts(workbook1, "Sheet2", chart_props=["title", "type", "legend"])) + #print(load_filters(workbook1, "透视表_工作表1_1")) + #workbook1.save("test2.xlsx") + print( load_pivot_tables( workbook1, "Sheet2", pivot_props=[ "col_fields" + , "filter" + , "row_fields" + , "data_fields" + ] + ) + )